From 7578a7c0f296b678259ea852649276e7078064d7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 07:48:37 -0700 Subject: [PATCH 001/543] ARM: renumber bits related to _TIF_WORK_MASK commit 191f8453fc99a537ea78b727acea739782378b0d upstream. We want to ensure that the mask related to calling do_work_pending() is within the first 16 bits. Move bits unrelated to that outside of that range, to avoid spuriously calling do_work_pending() when we don't need to. Cc: stable@vger.kernel.org Fixes: 32d59773da38 ("arm: add support for TIF_NOTIFY_SIGNAL") Reported-and-tested-by: Hui Tang Suggested-by: Russell King (Oracle) Link: https://lore.kernel.org/lkml/7ecb8f3c-2aeb-a905-0d4a-aa768b9649b5@huawei.com/ Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/thread_info.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index aecc403b2880..7f092cb55a41 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -128,15 +128,16 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define TIF_NEED_RESCHED 1 /* rescheduling necessary */ #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_UPROBE 3 /* breakpointed or singlestepping */ -#define TIF_SYSCALL_TRACE 4 /* syscall trace active */ -#define TIF_SYSCALL_AUDIT 5 /* syscall auditing active */ -#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */ -#define TIF_SECCOMP 7 /* seccomp syscall filtering active */ -#define TIF_NOTIFY_SIGNAL 8 /* signal notifications exist */ +#define TIF_NOTIFY_SIGNAL 4 /* signal notifications exist */ #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ -#define TIF_RESTORE_SIGMASK 20 +#define TIF_RESTORE_SIGMASK 19 +#define TIF_SYSCALL_TRACE 20 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 21 /* syscall auditing active */ +#define TIF_SYSCALL_TRACEPOINT 22 /* syscall tracepoint instrumentation */ +#define TIF_SECCOMP 23 /* seccomp syscall filtering active */ + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) From 0af9640736a61f1404e4d64a088491d24cb0bdb3 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 4 Jan 2023 11:14:45 -0500 Subject: [PATCH 002/543] btrfs: replace strncpy() with strscpy() [ Upstream commit 63d5429f68a3d4c4aa27e65a05196c17f86c41d6 ] Using strncpy() on NUL-terminated strings are deprecated. To avoid possible forming of non-terminated string strscpy() should be used. Found by Linux Verification Center (linuxtesting.org) with SVACE. CC: stable@vger.kernel.org # 4.9+ Signed-off-by: Artem Chernyshev Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ioctl.c | 9 +++------ fs/btrfs/rcu-string.h | 6 +++++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5ba2e810dc6e..fd1902573cde 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3750,13 +3750,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, di_args->bytes_used = btrfs_device_get_bytes_used(dev); di_args->total_bytes = btrfs_device_get_total_bytes(dev); memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); - if (dev->name) { - strncpy(di_args->path, rcu_str_deref(dev->name), - sizeof(di_args->path) - 1); - di_args->path[sizeof(di_args->path) - 1] = 0; - } else { + if (dev->name) + strscpy(di_args->path, rcu_str_deref(dev->name), sizeof(di_args->path)); + else di_args->path[0] = '\0'; - } out: rcu_read_unlock(); diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h index 5c1a617eb25d..5c2b66d155ef 100644 --- a/fs/btrfs/rcu-string.h +++ b/fs/btrfs/rcu-string.h @@ -18,7 +18,11 @@ static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) (len * sizeof(char)), mask); if (!ret) return ret; - strncpy(ret->str, src, len); + /* Warn if the source got unexpectedly truncated. */ + if (WARN_ON(strscpy(ret->str, src, len) < 0)) { + kfree(ret); + return NULL; + } return ret; } From 76f1ff02e23edf6e4a346743b13a2287cfe5af06 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Thu, 22 Dec 2022 12:54:44 +0000 Subject: [PATCH 003/543] cifs: fix interface count calculation during refresh commit cc7d79d4fad6a4eab3f88c4bb237de72be4478f1 upstream. The last fix to iface_count did fix the overcounting issue. However, during each refresh, we could end up undercounting the iface_count, if a match was found. Fixing this by doing increments and decrements instead of setting it to 0 before each parsing of server interfaces. Fixes: 096bbeec7bd6 ("smb3: interface count displayed incorrectly") Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2ops.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b24e68b5ccd6..8da98918cf86 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -530,7 +530,6 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, p = buf; spin_lock(&ses->iface_lock); - ses->iface_count = 0; /* * Go through iface_list and do kref_put to remove * any unused ifaces. ifaces in use will be removed @@ -540,6 +539,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, iface_head) { iface->is_active = 0; kref_put(&iface->refcount, release_iface); + ses->iface_count--; } spin_unlock(&ses->iface_lock); @@ -618,6 +618,7 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, /* just get a ref so that it doesn't get picked/freed */ iface->is_active = 1; kref_get(&iface->refcount); + ses->iface_count++; spin_unlock(&ses->iface_lock); goto next_iface; } else if (ret < 0) { From 94d0e33c5f5270ff3615ba3d173bf37dfcf1e64a Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Tue, 27 Dec 2022 11:29:28 +0000 Subject: [PATCH 004/543] cifs: refcount only the selected iface during interface update commit 7246210ecdd0cda97fa3e3bb15c32c6c2d9a23b5 upstream. When the server interface for a channel is not active anymore, we have the logic to select an alternative interface. However this was not breaking out of the loop as soon as a new alternative was found. As a result, some interfaces may get refcounted unintentionally. There was also a bug in checking if we found an alternate iface. Fixed that too. Fixes: b54034a73baf ("cifs: during reconnect, update interface if necessary") Cc: stable@vger.kernel.org # 5.19+ Signed-off-by: Shyam Prasad N Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/sess.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 9e7d9f0baa18..0b842a07e157 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -292,9 +292,10 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) continue; } kref_get(&iface->refcount); + break; } - if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) { + if (list_entry_is_head(iface, &ses->iface_list, iface_head)) { rc = 1; iface = NULL; cifs_dbg(FYI, "unable to find a suitable iface\n"); From acbf70d7f9cd416718ec701dfa945d5da0486fa5 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 8 Dec 2022 16:50:35 -0800 Subject: [PATCH 005/543] usb: dwc3: gadget: Ignore End Transfer delay on teardown commit c4e3ef5685393c5051b52cf1e94b8891d49793ab upstream. If we delay sending End Transfer for Setup TRB to be prepared, we need to check if the End Transfer was in preparation for a driver teardown/soft-disconnect. In those cases, just send the End Transfer command without delay. In the case of soft-disconnect, there's a very small chance the command may not go through immediately. But should it happen, the Setup TRB will be prepared during the polling of the controller halted state, allowing the command to go through then. In the case of disabling endpoint due to reconfiguration (e.g. set_interface(alt-setting) or usb reset), then it's driven by the host. Typically the host wouldn't immediately cancel the control request and send another control transfer to trigger the End Transfer command timeout. Fixes: 4db0fbb60136 ("usb: dwc3: gadget: Don't delay End Transfer on delayed_status") Cc: stable@vger.kernel.org Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/f1617a323e190b9cc408fb8b65456e32b5814113.1670546756.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 6d524fa76443..ed958da0e1c9 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1717,6 +1717,7 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int else if (!ret) dep->flags |= DWC3_EP_END_TRANSFER_PENDING; + dep->flags &= ~DWC3_EP_DELAY_STOP; return ret; } @@ -3722,8 +3723,10 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, if (dep->number <= 1 && dwc->ep0state != EP0_DATA_PHASE) return; + if (interrupt && (dep->flags & DWC3_EP_DELAY_STOP)) + return; + if (!(dep->flags & DWC3_EP_TRANSFER_STARTED) || - (dep->flags & DWC3_EP_DELAY_STOP) || (dep->flags & DWC3_EP_END_TRANSFER_PENDING)) return; From 427a36cf819b38d37756291b77ed9c0c6e74daf6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 23 Dec 2022 18:28:53 +0000 Subject: [PATCH 006/543] btrfs: fix off-by-one in delalloc search during lseek MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2f2e84ca60660402bd81d0859703567c59556e6a upstream. During lseek, when searching for delalloc in a range that represents a hole and that range has a length of 1 byte, we end up not doing the actual delalloc search in the inode's io tree, resulting in not correctly reporting the offset with data or a hole. This actually only happens when the start offset is 0 because with any other start offset we round it down by sector size. Reproducer: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt/sdc $ xfs_io -f -c "pwrite -q 0 1" /mnt/sdc/foo $ xfs_io -c "seek -d 0" /mnt/sdc/foo Whence Result DATA EOF It should have reported an offset of 0 instead of EOF. Fix this by updating btrfs_find_delalloc_in_range() and count_range_bits() to deal with inclusive ranges properly. These functions are already supposed to work with inclusive end offsets, they just got it wrong in a couple places due to off-by-one mistakes. A test case for fstests will be added later. Reported-by: Joan Bruguera Micó Link: https://lore.kernel.org/linux-btrfs/20221223020509.457113-1-joanbrugueram@gmail.com/ Fixes: b6e833567ea1 ("btrfs: make hole and data seeking a lot more efficient") CC: stable@vger.kernel.org # 6.1 Tested-by: Joan Bruguera Micó Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/extent-io-tree.c | 2 +- fs/btrfs/file.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c index 7b93719a486c..09ae0e73e680 100644 --- a/fs/btrfs/extent-io-tree.c +++ b/fs/btrfs/extent-io-tree.c @@ -1507,7 +1507,7 @@ u64 count_range_bits(struct extent_io_tree *tree, u64 last = 0; int found = 0; - if (WARN_ON(search_end <= cur_start)) + if (WARN_ON(search_end < cur_start)) return 0; spin_lock(&tree->lock); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ed4e1c3705d0..9bef8eaa074a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3671,7 +3671,7 @@ bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end, u64 prev_delalloc_end = 0; bool ret = false; - while (cur_offset < end) { + while (cur_offset <= end) { u64 delalloc_start; u64 delalloc_end; bool delalloc; From def94d5966f63a12eb2e8ee23d6a5e80ba713fa7 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 22 Dec 2022 07:59:17 +0800 Subject: [PATCH 007/543] btrfs: fix compat_ro checks against remount commit 2ba48b20049b5a76f34a85f853c9496d1b10533a upstream. [BUG] Even with commit 81d5d61454c3 ("btrfs: enhance unsupported compat RO flags handling"), btrfs can still mount a fs with unsupported compat_ro flags read-only, then remount it RW: # btrfs ins dump-super /dev/loop0 | grep compat_ro_flags -A 3 compat_ro_flags 0x403 ( FREE_SPACE_TREE | FREE_SPACE_TREE_VALID | unknown flag: 0x400 ) # mount /dev/loop0 /mnt/btrfs mount: /mnt/btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error. dmesg(1) may have more information after failed mount system call. ^^^ RW mount failed as expected ^^^ # dmesg -t | tail -n5 loop0: detected capacity change from 0 to 1048576 BTRFS: device fsid cb5b82f5-0fdd-4d81-9b4b-78533c324afa devid 1 transid 7 /dev/loop0 scanned by mount (1146) BTRFS info (device loop0): using crc32c (crc32c-intel) checksum algorithm BTRFS info (device loop0): using free space tree BTRFS error (device loop0): cannot mount read-write because of unknown compat_ro features (0x403) BTRFS error (device loop0): open_ctree failed # mount /dev/loop0 -o ro /mnt/btrfs # mount -o remount,rw /mnt/btrfs ^^^ RW remount succeeded unexpectedly ^^^ [CAUSE] Currently we use btrfs_check_features() to check compat_ro flags against our current mount flags. That function get reused between open_ctree() and btrfs_remount(). But for btrfs_remount(), the super block we passed in still has the old mount flags, thus btrfs_check_features() still believes we're mounting read-only. [FIX] Replace the existing @sb argument with @is_rw_mount. As originally we only use @sb to determine if the mount is RW. Now it's callers' responsibility to determine if the mount is RW, and since there are only two callers, the check is pretty simple: - caller in open_ctree() Just pass !sb_rdonly(). - caller in btrfs_remount() Pass !(*flags & SB_RDONLY), as our check should be against the new flags. Now we can correctly reject the RW remount: # mount /dev/loop0 -o ro /mnt/btrfs # mount -o remount,rw /mnt/btrfs mount: /mnt/btrfs: mount point not mounted or bad option. dmesg(1) may have more information after failed mount system call. # dmesg -t | tail -n 1 BTRFS error (device loop0: state M): cannot mount read-write because of unknown compat_ro features (0x403) Reported-by: Chung-Chiang Cheng Fixes: 81d5d61454c3 ("btrfs: enhance unsupported compat RO flags handling") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 8 +++++--- fs/btrfs/disk-io.h | 2 +- fs/btrfs/super.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d99bf7c64611..6538f52262ca 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3296,6 +3296,8 @@ out: /* * Do various sanity and dependency checks of different features. * + * @is_rw_mount: If the mount is read-write. + * * This is the place for less strict checks (like for subpage or artificial * feature dependencies). * @@ -3306,7 +3308,7 @@ out: * (space cache related) can modify on-disk format like free space tree and * screw up certain feature dependencies. */ -int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb) +int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount) { struct btrfs_super_block *disk_super = fs_info->super_copy; u64 incompat = btrfs_super_incompat_flags(disk_super); @@ -3345,7 +3347,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb) if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; - if (compat_ro_unsupp && !sb_rdonly(sb)) { + if (compat_ro_unsupp && is_rw_mount) { btrfs_err(fs_info, "cannot mount read-write because of unknown compat_ro features (0x%llx)", compat_ro); @@ -3548,7 +3550,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_alloc; } - ret = btrfs_check_features(fs_info, sb); + ret = btrfs_check_features(fs_info, !sb_rdonly(sb)); if (ret < 0) { err = ret; goto fail_alloc; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 9fa923e005a3..7322af63c0cc 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -50,7 +50,7 @@ int __cold open_ctree(struct super_block *sb, void __cold close_ctree(struct btrfs_fs_info *fs_info); int btrfs_validate_super(struct btrfs_fs_info *fs_info, struct btrfs_super_block *sb, int mirror_num); -int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb); +int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount); int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors); struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev); struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 5942b9384088..abfd7c897075 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2014,7 +2014,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; - ret = btrfs_check_features(fs_info, sb); + ret = btrfs_check_features(fs_info, !(*flags & SB_RDONLY)); if (ret < 0) goto restore; From 8dba3cdb3d5fbe2067baeda87f29e1cb8f25078f Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 1 Nov 2022 22:48:39 +0900 Subject: [PATCH 008/543] perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor [ Upstream commit f828929ab7f0dc3353e4a617f94f297fa8f3dec3 ] Use dwarf_attr_integrate() instead of dwarf_attr() for generic attribute acccessor functions, so that it can find the specified attribute from abstact origin DIE etc. Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166731051988.2100653.13595339994343449770.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo Stable-dep-of: a9dfc46c67b5 ("perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data") Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 609ca1671501..a07efbadb775 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -308,7 +308,7 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, { Dwarf_Attribute attr; - if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || dwarf_formudata(&attr, result) != 0) return -ENOENT; @@ -321,7 +321,7 @@ static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, { Dwarf_Attribute attr; - if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || dwarf_formsdata(&attr, result) != 0) return -ENOENT; From 53be6d939bf220b80d73bbef3b2d241d2031545b Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Sat, 5 Nov 2022 12:01:14 +0900 Subject: [PATCH 009/543] perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data [ Upstream commit a9dfc46c67b52ad43b8e335e28f4cf8002c67793 ] DWARF version 5 standard Sec 2.14 says that Any debugging information entry representing the declaration of an object, module, subprogram or type may have DW_AT_decl_file, DW_AT_decl_line and DW_AT_decl_column attributes, each of whose value is an unsigned integer constant. So it should be an unsigned integer data. Also, even though the standard doesn't clearly say the DW_AT_call_file is signed or unsigned, the elfutils (eu-readelf) interprets it as unsigned integer data and it is natural to handle it as unsigned integer data as same as DW_AT_decl_file. This changes the DW_AT_call_file as unsigned integer data too. Fixes: 3f4460a28fb2f73d ("perf probe: Filter out redundant inline-instances") Signed-off-by: Masami Hiramatsu Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Steven Rostedt (VMware) Link: https://lore.kernel.org/r/166761727445.480106.3738447577082071942.stgit@devnote3 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/dwarf-aux.c | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index a07efbadb775..623527edeac1 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -315,19 +315,6 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, return 0; } -/* Get attribute and translate it as a sdata */ -static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, - Dwarf_Sword *result) -{ - Dwarf_Attribute attr; - - if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || - dwarf_formsdata(&attr, result) != 0) - return -ENOENT; - - return 0; -} - /** * die_is_signed_type - Check whether a type DIE is signed or not * @tp_die: a DIE of a type @@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) /* Get the call file index number in CU DIE */ static int die_get_call_fileno(Dwarf_Die *in_die) { - Dwarf_Sword idx; + Dwarf_Word idx; - if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0) + if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0) return (int)idx; else return -ENOENT; @@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die) /* Get the declared file index number in CU DIE */ static int die_get_decl_fileno(Dwarf_Die *pdie) { - Dwarf_Sword idx; + Dwarf_Word idx; - if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0) + if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0) return (int)idx; else return -ENOENT; From 667347e338cb0d76806ddb58275b56a37c6c20d9 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 14 Nov 2022 09:13:44 +0100 Subject: [PATCH 010/543] phy: qcom-qmp-combo: fix broken power on [ Upstream commit 7a7d86d14d073dfa3429c550667a8e78b99edbd4 ] The PHY is powered on during phy-init by setting the SW_PWRDN bit in the COM_POWER_DOWN_CTRL register and then setting the same bit in the in the PCS_POWER_DOWN_CONTROL register that belongs to the USB part of the PHY. Currently, whether power on succeeds depends on probe order and having the USB part of the PHY be initialised first. In case the DP part of the PHY is instead initialised first, the intended power on of the USB block results in a corrupted DP_PHY register (e.g. DP_PHY_AUX_CFG8). Add a pointer to the USB part of the PHY to the driver data and use that to power on the PHY also if the DP part of the PHY is initialised first. Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy") Cc: stable@vger.kernel.org # 5.10 Reviewed-by: Dmitry Baryshkov Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20221114081346.5116-5-johan+linaro@kernel.org Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c index 91f8ee79000d..adcda7762acf 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c @@ -955,6 +955,7 @@ struct qcom_qmp { struct regulator_bulk_data *vregs; struct qmp_phy **phys; + struct qmp_phy *usb_phy; struct mutex phy_mutex; int init_count; @@ -1978,7 +1979,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) { struct qcom_qmp *qmp = qphy->qmp; const struct qmp_phy_cfg *cfg = qphy->cfg; - void __iomem *pcs = qphy->pcs; + struct qmp_phy *usb_phy = qmp->usb_phy; void __iomem *dp_com = qmp->dp_com; int ret; @@ -2031,13 +2032,13 @@ static int qmp_combo_com_init(struct qmp_phy *qphy) qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03); qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET); - if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) - qphy_setbits(pcs, - cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], - cfg->pwrdn_ctrl); + if (usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) + qphy_setbits(usb_phy->pcs, + usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], + usb_phy->cfg->pwrdn_ctrl); else - qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, - cfg->pwrdn_ctrl); + qphy_setbits(usb_phy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, + usb_phy->cfg->pwrdn_ctrl); mutex_unlock(&qmp->phy_mutex); @@ -2925,6 +2926,8 @@ static int qmp_combo_probe(struct platform_device *pdev) goto err_node_put; } + qmp->usb_phy = qmp->phys[id]; + /* * Register the pipe clock provided by phy. * See function description to see details of this pipe clock. @@ -2940,6 +2943,9 @@ static int qmp_combo_probe(struct platform_device *pdev) id++; } + if (!qmp->usb_phy) + return -EINVAL; + phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); return PTR_ERR_OR_ZERO(phy_provider); From e92536de04e4e356a7f18a824b30bac1cb1d89a0 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sun, 8 Jan 2023 08:24:19 -0500 Subject: [PATCH 011/543] btrfs: fix an error handling path in btrfs_defrag_leaves() [ Upstream commit db0a4a7b8e95f9312a59a67cbd5bc589f090e13d ] All error handling paths end to 'out', except this memory allocation failure. This is spurious. So branch to the error handling path also in this case. It will add a call to: memset(&root->defrag_progress, 0, sizeof(root->defrag_progress)); Fixes: 6702ed490ca0 ("Btrfs: Add run time btree defrag, and an ioctl to force btree defrag") Signed-off-by: Christophe JAILLET Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/tree-defrag.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b6cf39f4e7e4..072ab9a1374b 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -31,8 +31,10 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } level = btrfs_header_level(root->node); From 3e8dee13f3d105a0007f9c3527125c61505accf8 Mon Sep 17 00:00:00 2001 From: minoura makoto Date: Tue, 13 Dec 2022 13:14:31 +0900 Subject: [PATCH 012/543] SUNRPC: ensure the matching upcall is in-flight upon downcall [ Upstream commit b18cba09e374637a0a3759d856a6bca94c133952 ] Commit 9130b8dbc6ac ("SUNRPC: allow for upcalls for the same uid but different gss service") introduced `auth` argument to __gss_find_upcall(), but in gss_pipe_downcall() it was left as NULL since it (and auth->service) was not (yet) determined. When multiple upcalls with the same uid and different service are ongoing, it could happen that __gss_find_upcall(), which returns the first match found in the pipe->in_downcall list, could not find the correct gss_msg corresponding to the downcall we are looking for. Moreover, it might return a msg which is not sent to rpc.gssd yet. We could see mount.nfs process hung in D state with multiple mount.nfs are executed in parallel. The call trace below is of CentOS 7.9 kernel-3.10.0-1160.24.1.el7.x86_64 but we observed the same hang w/ elrepo kernel-ml-6.0.7-1.el7. PID: 71258 TASK: ffff91ebd4be0000 CPU: 36 COMMAND: "mount.nfs" #0 [ffff9203ca3234f8] __schedule at ffffffffa3b8899f #1 [ffff9203ca323580] schedule at ffffffffa3b88eb9 #2 [ffff9203ca323590] gss_cred_init at ffffffffc0355818 [auth_rpcgss] #3 [ffff9203ca323658] rpcauth_lookup_credcache at ffffffffc0421ebc [sunrpc] #4 [ffff9203ca3236d8] gss_lookup_cred at ffffffffc0353633 [auth_rpcgss] #5 [ffff9203ca3236e8] rpcauth_lookupcred at ffffffffc0421581 [sunrpc] #6 [ffff9203ca323740] rpcauth_refreshcred at ffffffffc04223d3 [sunrpc] #7 [ffff9203ca3237a0] call_refresh at ffffffffc04103dc [sunrpc] #8 [ffff9203ca3237b8] __rpc_execute at ffffffffc041e1c9 [sunrpc] #9 [ffff9203ca323820] rpc_execute at ffffffffc0420a48 [sunrpc] The scenario is like this. Let's say there are two upcalls for services A and B, A -> B in pipe->in_downcall, B -> A in pipe->pipe. When rpc.gssd reads pipe to get the upcall msg corresponding to service B from pipe->pipe and then writes the response, in gss_pipe_downcall the msg corresponding to service A will be picked because only uid is used to find the msg and it is before the one for B in pipe->in_downcall. And the process waiting for the msg corresponding to service A will be woken up. Actual scheduing of that process might be after rpc.gssd processes the next msg. In rpc_pipe_generic_upcall it clears msg->errno (for A). The process is scheduled to see gss_msg->ctx == NULL and gss_msg->msg.errno == 0, therefore it cannot break the loop in gss_create_upcall and is never woken up after that. This patch adds a simple check to ensure that a msg which is not sent to rpc.gssd yet is not chosen as the matching upcall upon receiving a downcall. Signed-off-by: minoura makoto Signed-off-by: Hiroshi Shimamoto Tested-by: Hiroshi Shimamoto Cc: Trond Myklebust Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service") Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- include/linux/sunrpc/rpc_pipe_fs.h | 5 +++++ net/sunrpc/auth_gss/auth_gss.c | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h index cd188a527d16..3b35b6f6533a 100644 --- a/include/linux/sunrpc/rpc_pipe_fs.h +++ b/include/linux/sunrpc/rpc_pipe_fs.h @@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, char __user *, size_t); extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); +/* returns true if the msg is in-flight, i.e., already eaten by the peer */ +static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) { + return (msg->copied != 0 && list_empty(&msg->list)); +} + struct rpc_clnt; extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); extern int rpc_remove_client_dir(struct rpc_clnt *); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 7bb247c51e2f..2d7b1e03110a 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -302,7 +302,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth list_for_each_entry(pos, &pipe->in_downcall, list) { if (!uid_eq(pos->uid, uid)) continue; - if (auth && pos->auth->service != auth->service) + if (pos->auth->service != auth->service) continue; refcount_inc(&pos->count); return pos; @@ -686,6 +686,21 @@ out: return err; } +static struct gss_upcall_msg * +gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid) +{ + struct gss_upcall_msg *pos; + list_for_each_entry(pos, &pipe->in_downcall, list) { + if (!uid_eq(pos->uid, uid)) + continue; + if (!rpc_msg_is_inflight(&pos->msg)) + continue; + refcount_inc(&pos->count); + return pos; + } + return NULL; +} + #define MSG_BUF_MAXSIZE 1024 static ssize_t @@ -732,7 +747,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) err = -ENOENT; /* Find a matching upcall */ spin_lock(&pipe->lock); - gss_msg = __gss_find_upcall(pipe, uid, NULL); + gss_msg = gss_find_downcall(pipe, uid); if (gss_msg == NULL) { spin_unlock(&pipe->lock); goto err_put_ctx; From 35dab092c295b76d94bd5b47ff60a11190e532ce Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 17:55:42 +0100 Subject: [PATCH 013/543] wifi: ath9k: use proper statements in conditionals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b7dc753fe33a707379e2254317794a4dad6c0fe2 ] A previous cleanup patch accidentally broke some conditional expressions by replacing the safe "do {} while (0)" constructs with empty macros. gcc points this out when extra warnings are enabled: drivers/net/wireless/ath/ath9k/hif_usb.c: In function 'ath9k_skb_queue_complete': drivers/net/wireless/ath/ath9k/hif_usb.c:251:57: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body] 251 | TX_STAT_INC(hif_dev, skb_failed); Make both sets of macros proper expressions again. Fixes: d7fc76039b74 ("ath9k: htc: clean up statistics macros") Signed-off-by: Arnd Bergmann Acked-by: Toke Høiland-Jørgensen Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221215165553.1950307-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath9k/htc.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index 30f0765fb9fd..237f4ec2cffd 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -327,9 +327,9 @@ static inline struct ath9k_htc_tx_ctl *HTC_SKB_CB(struct sk_buff *skb) } #ifdef CONFIG_ATH9K_HTC_DEBUGFS -#define __STAT_SAFE(hif_dev, expr) ((hif_dev)->htc_handle->drv_priv ? (expr) : 0) -#define CAB_STAT_INC(priv) ((priv)->debug.tx_stats.cab_queued++) -#define TX_QSTAT_INC(priv, q) ((priv)->debug.tx_stats.queue_stats[q]++) +#define __STAT_SAFE(hif_dev, expr) do { ((hif_dev)->htc_handle->drv_priv ? (expr) : 0); } while (0) +#define CAB_STAT_INC(priv) do { ((priv)->debug.tx_stats.cab_queued++); } while (0) +#define TX_QSTAT_INC(priv, q) do { ((priv)->debug.tx_stats.queue_stats[q]++); } while (0) #define TX_STAT_INC(hif_dev, c) \ __STAT_SAFE((hif_dev), (hif_dev)->htc_handle->drv_priv->debug.tx_stats.c++) @@ -378,10 +378,10 @@ void ath9k_htc_get_et_stats(struct ieee80211_hw *hw, struct ethtool_stats *stats, u64 *data); #else -#define TX_STAT_INC(hif_dev, c) -#define TX_STAT_ADD(hif_dev, c, a) -#define RX_STAT_INC(hif_dev, c) -#define RX_STAT_ADD(hif_dev, c, a) +#define TX_STAT_INC(hif_dev, c) do { } while (0) +#define TX_STAT_ADD(hif_dev, c, a) do { } while (0) +#define RX_STAT_INC(hif_dev, c) do { } while (0) +#define RX_STAT_ADD(hif_dev, c, a) do { } while (0) #define CAB_STAT_INC(priv) #define TX_QSTAT_INC(priv, c) From 321635c70352d635832a2cb067520cbc0852bc7b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 19 Dec 2022 16:47:00 -0800 Subject: [PATCH 014/543] bpf: pull before calling skb_postpull_rcsum() [ Upstream commit 54c3f1a81421f85e60ae2eaae7be3727a09916ee ] Anand hit a BUG() when pulling off headers on egress to a SW tunnel. We get to skb_checksum_help() with an invalid checksum offset (commit d7ea0d9df2a6 ("net: remove two BUG() from skb_checksum_help()") converted those BUGs to WARN_ONs()). He points out oddness in how skb_postpull_rcsum() gets used. Indeed looks like we should pull before "postpull", otherwise the CHECKSUM_PARTIAL fixup from skb_postpull_rcsum() will not be able to do its job: if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; Reported-by: Anand Parthasarathy Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") Signed-off-by: Jakub Kicinski Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20221220004701.402165-1-kuba@kernel.org Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- net/core/filter.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index a368edd9057c..0c2666e041d3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3184,15 +3184,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) { + void *old_data; + /* skb_ensure_writable() is not needed here, as we're * already working on an uncloned skb. */ if (unlikely(!pskb_may_pull(skb, off + len))) return -ENOMEM; - skb_postpull_rcsum(skb, skb->data + off, len); - memmove(skb->data + len, skb->data, off); + old_data = skb->data; __skb_pull(skb, len); + skb_postpull_rcsum(skb, old_data + off, len); + memmove(skb->data, old_data, off); return 0; } From ba3d2c2380e7129b525a787489c0b7e819a3b898 Mon Sep 17 00:00:00 2001 From: Steven Price Date: Mon, 19 Dec 2022 14:01:30 +0000 Subject: [PATCH 015/543] drm/panfrost: Fix GEM handle creation ref-counting [ Upstream commit 4217c6ac817451d5116687f3cc6286220dc43d49 ] panfrost_gem_create_with_handle() previously returned a BO but with the only reference being from the handle, which user space could in theory guess and release, causing a use-after-free. Additionally if the call to panfrost_gem_mapping_get() in panfrost_ioctl_create_bo() failed then a(nother) reference on the BO was dropped. The _create_with_handle() is a problematic pattern, so ditch it and instead create the handle in panfrost_ioctl_create_bo(). If the call to panfrost_gem_mapping_get() fails then this means that user space has indeed gone behind our back and freed the handle. In which case just return an error code. Reported-by: Rob Clark Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver") Signed-off-by: Steven Price Reviewed-by: Rob Clark Link: https://patchwork.freedesktop.org/patch/msgid/20221219140130.410578-1-steven.price@arm.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panfrost/panfrost_drv.c | 27 ++++++++++++++++--------- drivers/gpu/drm/panfrost/panfrost_gem.c | 16 +-------------- drivers/gpu/drm/panfrost/panfrost_gem.h | 5 +---- 3 files changed, 20 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index 2fa5afe21288..919e6cc04982 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -82,6 +82,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, struct panfrost_gem_object *bo; struct drm_panfrost_create_bo *args = data; struct panfrost_gem_mapping *mapping; + int ret; if (!args->size || args->pad || (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) @@ -92,21 +93,29 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, !(args->flags & PANFROST_BO_NOEXEC)) return -EINVAL; - bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags, - &args->handle); + bo = panfrost_gem_create(dev, args->size, args->flags); if (IS_ERR(bo)) return PTR_ERR(bo); + ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); + if (ret) + goto out; + mapping = panfrost_gem_mapping_get(bo, priv); - if (!mapping) { - drm_gem_object_put(&bo->base.base); - return -EINVAL; + if (mapping) { + args->offset = mapping->mmnode.start << PAGE_SHIFT; + panfrost_gem_mapping_put(mapping); + } else { + /* This can only happen if the handle from + * drm_gem_handle_create() has already been guessed and freed + * by user space + */ + ret = -EINVAL; } - args->offset = mapping->mmnode.start << PAGE_SHIFT; - panfrost_gem_mapping_put(mapping); - - return 0; +out: + drm_gem_object_put(&bo->base.base); + return ret; } /** diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c index 293e799e2fe8..3c812fbd126f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem.c @@ -235,12 +235,8 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t } struct panfrost_gem_object * -panfrost_gem_create_with_handle(struct drm_file *file_priv, - struct drm_device *dev, size_t size, - u32 flags, - uint32_t *handle) +panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) { - int ret; struct drm_gem_shmem_object *shmem; struct panfrost_gem_object *bo; @@ -256,16 +252,6 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv, bo->noexec = !!(flags & PANFROST_BO_NOEXEC); bo->is_heap = !!(flags & PANFROST_BO_HEAP); - /* - * Allocate an id of idr table where the obj is registered - * and handle has the id what user can see. - */ - ret = drm_gem_handle_create(file_priv, &shmem->base, handle); - /* drop reference from allocate - handle holds it now. */ - drm_gem_object_put(&shmem->base); - if (ret) - return ERR_PTR(ret); - return bo; } diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h index 8088d5fd8480..ad2877eeeccd 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem.h +++ b/drivers/gpu/drm/panfrost/panfrost_gem.h @@ -69,10 +69,7 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev, struct sg_table *sgt); struct panfrost_gem_object * -panfrost_gem_create_with_handle(struct drm_file *file_priv, - struct drm_device *dev, size_t size, - u32 flags, - uint32_t *handle); +panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags); int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv); void panfrost_gem_close(struct drm_gem_object *obj, From 4fba3958a9b9c7ab24cfb16bdeaa7ba3f749691b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2022 20:07:52 +0100 Subject: [PATCH 016/543] netfilter: nf_tables: consolidate set description [ Upstream commit bed4a63ea4ae77cfe5aae004ef87379f0655260a ] Add the following fields to the set description: - key type - data type - object type - policy - gc_int: garbage collection interval) - timeout: element timeout This prepares for stricter set type checks on updates in a follow up patch. Signed-off-by: Pablo Neira Ayuso Stable-dep-of: f6594c372afd ("netfilter: nf_tables: perform type checking for existing sets") Signed-off-by: Sasha Levin --- include/net/netfilter/nf_tables.h | 12 +++++++ net/netfilter/nf_tables_api.c | 58 +++++++++++++++---------------- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index cdb7db9b0e25..ddcdde230747 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -311,17 +311,29 @@ struct nft_set_iter { /** * struct nft_set_desc - description of set elements * + * @ktype: key type * @klen: key length + * @dtype: data type * @dlen: data length + * @objtype: object type + * @flags: flags * @size: number of set elements + * @policy: set policy + * @gc_int: garbage collector interval * @field_len: length of each field in concatenation, bytes * @field_count: number of concatenated fields in element * @expr: set must support for expressions */ struct nft_set_desc { + u32 ktype; unsigned int klen; + u32 dtype; unsigned int dlen; + u32 objtype; unsigned int size; + u32 policy; + u32 gc_int; + u64 timeout; u8 field_len[NFT_REG32_COUNT]; u8 field_count; bool expr; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7a09421f19e1..a31f8dc40646 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3732,8 +3732,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags) static const struct nft_set_ops * nft_select_set_ops(const struct nft_ctx *ctx, const struct nlattr * const nla[], - const struct nft_set_desc *desc, - enum nft_set_policies policy) + const struct nft_set_desc *desc) { struct nftables_pernet *nft_net = nft_pernet(ctx->net); const struct nft_set_ops *ops, *bops; @@ -3762,7 +3761,7 @@ nft_select_set_ops(const struct nft_ctx *ctx, if (!ops->estimate(desc, flags, &est)) continue; - switch (policy) { + switch (desc->policy) { case NFT_SET_POL_PERFORMANCE: if (est.lookup < best.lookup) break; @@ -4344,7 +4343,6 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc, static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - u32 ktype, dtype, flags, policy, gc_int, objtype; struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; @@ -4357,10 +4355,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, struct nft_set *set; struct nft_ctx ctx; size_t alloc_size; - u64 timeout; char *name; int err, i; u16 udlen; + u32 flags; u64 size; if (nla[NFTA_SET_TABLE] == NULL || @@ -4371,10 +4369,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, memset(&desc, 0, sizeof(desc)); - ktype = NFT_DATA_VALUE; + desc.ktype = NFT_DATA_VALUE; if (nla[NFTA_SET_KEY_TYPE] != NULL) { - ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); - if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) + desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE])); + if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK) return -EINVAL; } @@ -4399,17 +4397,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, return -EOPNOTSUPP; } - dtype = 0; + desc.dtype = 0; if (nla[NFTA_SET_DATA_TYPE] != NULL) { if (!(flags & NFT_SET_MAP)) return -EINVAL; - dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); - if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && - dtype != NFT_DATA_VERDICT) + desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE])); + if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK && + desc.dtype != NFT_DATA_VERDICT) return -EINVAL; - if (dtype != NFT_DATA_VERDICT) { + if (desc.dtype != NFT_DATA_VERDICT) { if (nla[NFTA_SET_DATA_LEN] == NULL) return -EINVAL; desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); @@ -4424,34 +4422,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(flags & NFT_SET_OBJECT)) return -EINVAL; - objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); - if (objtype == NFT_OBJECT_UNSPEC || - objtype > NFT_OBJECT_MAX) + desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE])); + if (desc.objtype == NFT_OBJECT_UNSPEC || + desc.objtype > NFT_OBJECT_MAX) return -EOPNOTSUPP; } else if (flags & NFT_SET_OBJECT) return -EINVAL; else - objtype = NFT_OBJECT_UNSPEC; + desc.objtype = NFT_OBJECT_UNSPEC; - timeout = 0; + desc.timeout = 0; if (nla[NFTA_SET_TIMEOUT] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; - err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout); + err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout); if (err) return err; } - gc_int = 0; + desc.gc_int = 0; if (nla[NFTA_SET_GC_INTERVAL] != NULL) { if (!(flags & NFT_SET_TIMEOUT)) return -EINVAL; - gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); + desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); } - policy = NFT_SET_POL_PERFORMANCE; + desc.policy = NFT_SET_POL_PERFORMANCE; if (nla[NFTA_SET_POLICY] != NULL) - policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); + desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); if (nla[NFTA_SET_DESC] != NULL) { err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); @@ -4496,7 +4494,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) return -ENOENT; - ops = nft_select_set_ops(&ctx, nla, &desc, policy); + ops = nft_select_set_ops(&ctx, nla, &desc); if (IS_ERR(ops)) return PTR_ERR(ops); @@ -4536,18 +4534,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, set->table = table; write_pnet(&set->net, net); set->ops = ops; - set->ktype = ktype; + set->ktype = desc.ktype; set->klen = desc.klen; - set->dtype = dtype; - set->objtype = objtype; + set->dtype = desc.dtype; + set->objtype = desc.objtype; set->dlen = desc.dlen; set->flags = flags; set->size = desc.size; - set->policy = policy; + set->policy = desc.policy; set->udlen = udlen; set->udata = udata; - set->timeout = timeout; - set->gc_int = gc_int; + set->timeout = desc.timeout; + set->gc_int = desc.gc_int; set->field_count = desc.field_count; for (i = 0; i < desc.field_count; i++) From 59c8f9a2361429254a43945b199dd5ab20e736a5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2022 18:00:10 +0100 Subject: [PATCH 017/543] netfilter: nf_tables: add function to create set stateful expressions [ Upstream commit a8fe4154fa5a1bae590b243ed60f871e5a5e1378 ] Add a helper function to allocate and initialize the stateful expressions that are defined in a set. This patch allows to reuse this code from the set update path, to check that type of the update matches the existing set in the kernel. Signed-off-by: Pablo Neira Ayuso Stable-dep-of: f6594c372afd ("netfilter: nf_tables: perform type checking for existing sets") Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 106 ++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 38 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index a31f8dc40646..9f35a249c2c3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4340,6 +4340,59 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc, return err; } +static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr * const *nla, + struct nft_expr **exprs, int *num_exprs, + u32 flags) +{ + struct nft_expr *expr; + int err, i; + + if (nla[NFTA_SET_EXPR]) { + expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]); + if (IS_ERR(expr)) { + err = PTR_ERR(expr); + goto err_set_expr_alloc; + } + exprs[0] = expr; + (*num_exprs)++; + } else if (nla[NFTA_SET_EXPRESSIONS]) { + struct nlattr *tmp; + int left; + + if (!(flags & NFT_SET_EXPR)) { + err = -EINVAL; + goto err_set_expr_alloc; + } + i = 0; + nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { + if (i == NFT_SET_EXPR_MAX) { + err = -E2BIG; + goto err_set_expr_alloc; + } + if (nla_type(tmp) != NFTA_LIST_ELEM) { + err = -EINVAL; + goto err_set_expr_alloc; + } + expr = nft_set_elem_expr_alloc(ctx, set, tmp); + if (IS_ERR(expr)) { + err = PTR_ERR(expr); + goto err_set_expr_alloc; + } + exprs[i++] = expr; + (*num_exprs)++; + } + } + + return 0; + +err_set_expr_alloc: + for (i = 0; i < *num_exprs; i++) + nft_expr_destroy(ctx, exprs[i]); + + return err; +} + static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { @@ -4347,7 +4400,6 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, u8 genmask = nft_genmask_next(info->net); u8 family = info->nfmsg->nfgen_family; const struct nft_set_ops *ops; - struct nft_expr *expr = NULL; struct net *net = info->net; struct nft_set_desc desc; struct nft_table *table; @@ -4355,6 +4407,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, struct nft_set *set; struct nft_ctx ctx; size_t alloc_size; + int num_exprs = 0; char *name; int err, i; u16 udlen; @@ -4481,6 +4534,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, return PTR_ERR(set); } } else { + struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {}; + if (info->nlh->nlmsg_flags & NLM_F_EXCL) { NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); return -EEXIST; @@ -4488,6 +4543,13 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (info->nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags); + if (err < 0) + return err; + + for (i = 0; i < num_exprs; i++) + nft_expr_destroy(&ctx, exprs[i]); + return 0; } @@ -4555,43 +4617,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (err < 0) goto err_set_init; - if (nla[NFTA_SET_EXPR]) { - expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]); - if (IS_ERR(expr)) { - err = PTR_ERR(expr); - goto err_set_expr_alloc; - } - set->exprs[0] = expr; - set->num_exprs++; - } else if (nla[NFTA_SET_EXPRESSIONS]) { - struct nft_expr *expr; - struct nlattr *tmp; - int left; - - if (!(flags & NFT_SET_EXPR)) { - err = -EINVAL; - goto err_set_expr_alloc; - } - i = 0; - nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { - if (i == NFT_SET_EXPR_MAX) { - err = -E2BIG; - goto err_set_expr_alloc; - } - if (nla_type(tmp) != NFTA_LIST_ELEM) { - err = -EINVAL; - goto err_set_expr_alloc; - } - expr = nft_set_elem_expr_alloc(&ctx, set, tmp); - if (IS_ERR(expr)) { - err = PTR_ERR(expr); - goto err_set_expr_alloc; - } - set->exprs[i++] = expr; - set->num_exprs++; - } - } + err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags); + if (err < 0) + goto err_set_destroy; + set->num_exprs = num_exprs; set->handle = nf_tables_alloc_handle(table); err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); @@ -4605,7 +4635,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); - +err_set_destroy: ops->destroy(set); err_set_init: kfree(set->name); From 4052919880dba9966d8155d16bc375675e479597 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2022 20:09:00 +0100 Subject: [PATCH 018/543] netfilter: nf_tables: perform type checking for existing sets [ Upstream commit f6594c372afd5cec8b1e9ee9ea8f8819d59c6fb1 ] If a ruleset declares a set name that matches an existing set in the kernel, then validate that this declaration really refers to the same set, otherwise bail out with EEXIST. Currently, the kernel reports success when adding a set that already exists in the kernel. This usually results in EINVAL errors at a later stage, when the user adds elements to the set, if the set declaration mismatches the existing set representation in the kernel. Add a new function to check that the set declaration really refers to the same existing set in the kernel. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_tables_api.c | 36 ++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9f35a249c2c3..6e68cab474c2 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4393,6 +4393,34 @@ err_set_expr_alloc: return err; } +static bool nft_set_is_same(const struct nft_set *set, + const struct nft_set_desc *desc, + struct nft_expr *exprs[], u32 num_exprs, u32 flags) +{ + int i; + + if (set->ktype != desc->ktype || + set->dtype != desc->dtype || + set->flags != flags || + set->klen != desc->klen || + set->dlen != desc->dlen || + set->field_count != desc->field_count || + set->num_exprs != num_exprs) + return false; + + for (i = 0; i < desc->field_count; i++) { + if (set->field_len[i] != desc->field_len[i]) + return false; + } + + for (i = 0; i < num_exprs; i++) { + if (set->exprs[i]->ops != exprs[i]->ops) + return false; + } + + return true; +} + static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { @@ -4547,10 +4575,16 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, if (err < 0) return err; + err = 0; + if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]); + err = -EEXIST; + } + for (i = 0; i < num_exprs; i++) nft_expr_destroy(&ctx, exprs[i]); - return 0; + return err; } if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) From 3dfffe6214facea00ad968a679712effce36192b Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Tue, 20 Dec 2022 09:54:48 -0800 Subject: [PATCH 019/543] ice: xsk: do not use xdp_return_frame() on tx_buf->raw_buf [ Upstream commit 53fc61be273a1e76dd5e356f91805dce00ff2d2c ] Previously ice XDP xmit routine was changed in a way that it avoids xdp_buff->xdp_frame conversion as it is simply not needed for handling XDP_TX action and what is more it saves us CPU cycles. This routine is re-used on ZC driver to handle XDP_TX action. Although for XDP_TX on Rx ZC xdp_buff that comes from xsk_buff_pool is converted to xdp_frame, xdp_frame itself is not stored inside ice_tx_buf, we only store raw data pointer. Casting this pointer to xdp_frame and calling against it xdp_return_frame in ice_clean_xdp_tx_buf() results in undefined behavior. To fix this, simply call page_frag_free() on tx_buf->raw_buf. Later intention is to remove the buff->frame conversion in order to simplify the codebase and improve XDP_TX performance on ZC. Fixes: 126cdfe1007a ("ice: xsk: Improve AF_XDP ZC Tx and use batching API") Reported-and-tested-by: Robin Cowley Signed-off-by: Maciej Fijalkowski Tested-by: Chandan Kumar Rout (A Contingent Worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Piotr Raczynski Link: https://lore.kernel.org/r/20221220175448.693999-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 056c904b83cc..79fa65d1cf20 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -772,7 +772,7 @@ construct_skb: static void ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) { - xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); + page_frag_free(tx_buf->raw_buf); xdp_ring->xdp_tx_active--; dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); From acce57c9b6a25853fdd8b10c3bcf91085f9c20fc Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 20 Dec 2022 18:18:25 +0100 Subject: [PATCH 020/543] net: vrf: determine the dst using the original ifindex for multicast [ Upstream commit f2575c8f404911da83f25b688e12afcf4273e640 ] Multicast packets received on an interface bound to a VRF are marked as belonging to the VRF and the skb device is updated to point to the VRF device itself. This was fine even when a route was associated to a device as when performing a fib table lookup 'oif' in fib6_table_lookup (coming from 'skb->dev->ifindex' in ip6_route_input) was set to 0 when FLOWI_FLAG_SKIP_NH_OIF was set. With commit 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") this is not longer true and multicast traffic is not received on the original interface. Instead of adding back a similar check in fib6_table_lookup determine the dst using the original ifindex for multicast VRF traffic. To make things consistent across the function do the above for all strict packets, which was the logic before commit 6f12fa775530 ("vrf: mark skb for multicast or link-local as enslaved to VRF"). Note that reverting to this behavior should be fine as the change was about marking packets belonging to the VRF, not about their dst. Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices") Reported-by: Jianlin Shi Signed-off-by: Antoine Tenart Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20221220171825.1172237-1-atenart@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/vrf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index badf6f09ae51..f6dcec66f0a4 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1385,8 +1385,8 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. - * For strict packets with a source LLA, determine the dst using the - * original ifindex. + * For non-loopback strict packets, determine the dst using the original + * ifindex. */ if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { skb->dev = vrf_dev; @@ -1395,7 +1395,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, if (skb->pkt_type == PACKET_LOOPBACK) skb->pkt_type = PACKET_HOST; - else if (ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) + else vrf_ip6_input_dst(skb, vrf_dev, orig_iif); goto out; From c312bcd282545147506480865e7ad13b8c25a636 Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Tue, 20 Dec 2022 12:25:55 -0800 Subject: [PATCH 021/543] vmxnet3: correctly report csum_level for encapsulated packet [ Upstream commit 3d8f2c4269d08f8793e946279dbdf5e972cc4911 ] Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") added support for encapsulation offload. However, the pathc did not report correctly the csum_level for encapsulated packet. This patch fixes this issue by reporting correct csum level for the encapsulated packet. Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") Signed-off-by: Ronak Doshi Acked-by: Peng Li Link: https://lore.kernel.org/r/20221220202556.24421-1-doshir@vmware.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/vmxnet3/vmxnet3_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 6f1e560fb15c..56267c327f0b 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1288,6 +1288,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, (le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) { skb->ip_summed = CHECKSUM_UNNECESSARY; + if ((le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { + skb->csum_level = 1; + } WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && !(le32_to_cpu(gdesc->dword[0]) & (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); @@ -1297,6 +1301,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) & (1 << VMXNET3_RCD_TUC_SHIFT))) { skb->ip_summed = CHECKSUM_UNNECESSARY; + if ((le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { + skb->csum_level = 1; + } WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && !(le32_to_cpu(gdesc->dword[0]) & (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); From dfe522e4cdfb1b275f14e5bebe740b110f60262a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Dec 2022 11:52:14 -0800 Subject: [PATCH 022/543] mptcp: fix deadlock in fastopen error path [ Upstream commit 7d803344fdc3e38079fabcf38b1e4cb6f8faa655 ] MatM reported a deadlock at fastopening time: INFO: task syz-executor.0:11454 blocked for more than 143 seconds. Tainted: G S 6.1.0-rc5-03226-gdb0157db5153 #1 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:syz-executor.0 state:D stack:25104 pid:11454 ppid:424 flags:0x00004006 Call Trace: context_switch kernel/sched/core.c:5191 [inline] __schedule+0x5c2/0x1550 kernel/sched/core.c:6503 schedule+0xe8/0x1c0 kernel/sched/core.c:6579 __lock_sock+0x142/0x260 net/core/sock.c:2896 lock_sock_nested+0xdb/0x100 net/core/sock.c:3466 __mptcp_close_ssk+0x1a3/0x790 net/mptcp/protocol.c:2328 mptcp_destroy_common+0x16a/0x650 net/mptcp/protocol.c:3171 mptcp_disconnect+0xb8/0x450 net/mptcp/protocol.c:3019 __inet_stream_connect+0x897/0xa40 net/ipv4/af_inet.c:720 tcp_sendmsg_fastopen+0x3dd/0x740 net/ipv4/tcp.c:1200 mptcp_sendmsg_fastopen net/mptcp/protocol.c:1682 [inline] mptcp_sendmsg+0x128a/0x1a50 net/mptcp/protocol.c:1721 inet6_sendmsg+0x11f/0x150 net/ipv6/af_inet6.c:663 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xf7/0x190 net/socket.c:734 ____sys_sendmsg+0x336/0x970 net/socket.c:2476 ___sys_sendmsg+0x122/0x1c0 net/socket.c:2530 __sys_sendmmsg+0x18d/0x460 net/socket.c:2616 __do_sys_sendmmsg net/socket.c:2645 [inline] __se_sys_sendmmsg net/socket.c:2642 [inline] __x64_sys_sendmmsg+0x9d/0x110 net/socket.c:2642 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x38/0x90 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f5920a75e7d RSP: 002b:00007f59201e8028 EFLAGS: 00000246 ORIG_RAX: 0000000000000133 RAX: ffffffffffffffda RBX: 00007f5920bb4f80 RCX: 00007f5920a75e7d RDX: 0000000000000001 RSI: 0000000020002940 RDI: 0000000000000005 RBP: 00007f5920ae7593 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000020004050 R11: 0000000000000246 R12: 0000000000000000 R13: 000000000000000b R14: 00007f5920bb4f80 R15: 00007f59201c8000 In the error path, tcp_sendmsg_fastopen() ends-up calling mptcp_disconnect(), and the latter tries to close each subflow, acquiring the socket lock on each of them. At fastopen time, we have a single subflow, and such subflow socket lock is already held by the called, causing the deadlock. We already track the 'fastopen in progress' status inside the msk socket. Use it to address the issue, making mptcp_disconnect() a no op when invoked from the fastopen (error) path and doing the relevant cleanup after releasing the subflow socket lock. While at the above, rename the fastopen status bit to something more meaningful. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/321 Fixes: fa9e57468aa1 ("mptcp: fix abba deadlock on fastopen") Reported-by: Mat Martineau Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/mptcp/protocol.c | 18 +++++++++++++++--- net/mptcp/protocol.h | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1dbc62537259..e64ea2ca1c7a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1673,6 +1673,8 @@ static void mptcp_set_nospace(struct sock *sk) set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags); } +static int mptcp_disconnect(struct sock *sk, int flags); + static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg, size_t len, int *copied_syn) { @@ -1683,9 +1685,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh lock_sock(ssk); msg->msg_flags |= MSG_DONTWAIT; msk->connect_flags = O_NONBLOCK; - msk->is_sendmsg = 1; + msk->fastopening = 1; ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL); - msk->is_sendmsg = 0; + msk->fastopening = 0; msg->msg_flags = saved_flags; release_sock(ssk); @@ -1699,6 +1701,8 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh */ if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) *copied_syn = 0; + } else if (ret && ret != -EINPROGRESS) { + mptcp_disconnect(sk, 0); } return ret; @@ -3000,6 +3004,14 @@ static int mptcp_disconnect(struct sock *sk, int flags) { struct mptcp_sock *msk = mptcp_sk(sk); + /* We are on the fastopen error path. We can't call straight into the + * subflows cleanup code due to lock nesting (we are already under + * msk->firstsocket lock). Do nothing and leave the cleanup to the + * caller. + */ + if (msk->fastopening) + return 0; + inet_sk_state_store(sk, TCP_CLOSE); mptcp_stop_timer(sk); @@ -3566,7 +3578,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* if reaching here via the fastopen/sendmsg path, the caller already * acquired the subflow socket lock, too. */ - if (msk->is_sendmsg) + if (msk->fastopening) err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1); else err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 6a09ab99a12d..380bddbc52d4 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -286,7 +286,7 @@ struct mptcp_sock { u8 recvmsg_inq:1, cork:1, nodelay:1, - is_sendmsg:1; + fastopening:1; int connect_flags; struct work_struct work; struct sk_buff *ooo_last_skb; From dda2d04863b59ca13b35952d99f193423962bf64 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 20 Dec 2022 11:52:15 -0800 Subject: [PATCH 023/543] mptcp: fix lockdep false positive [ Upstream commit fec3adfd754ccc99a7230e8ab9f105b65fb07bcc ] MattB reported a lockdep splat in the mptcp listener code cleanup: WARNING: possible circular locking dependency detected packetdrill/14278 is trying to acquire lock: ffff888017d868f0 ((work_completion)(&msk->work)){+.+.}-{0:0}, at: __flush_work (kernel/workqueue.c:3069) but task is already holding lock: ffff888017d84130 (sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close (net/mptcp/protocol.c:2973) which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (sk_lock-AF_INET){+.+.}-{0:0}: __lock_acquire (kernel/locking/lockdep.c:5055) lock_acquire (kernel/locking/lockdep.c:466) lock_sock_nested (net/core/sock.c:3463) mptcp_worker (net/mptcp/protocol.c:2614) process_one_work (kernel/workqueue.c:2294) worker_thread (include/linux/list.h:292) kthread (kernel/kthread.c:376) ret_from_fork (arch/x86/entry/entry_64.S:312) -> #0 ((work_completion)(&msk->work)){+.+.}-{0:0}: check_prev_add (kernel/locking/lockdep.c:3098) validate_chain (kernel/locking/lockdep.c:3217) __lock_acquire (kernel/locking/lockdep.c:5055) lock_acquire (kernel/locking/lockdep.c:466) __flush_work (kernel/workqueue.c:3070) __cancel_work_timer (kernel/workqueue.c:3160) mptcp_cancel_work (net/mptcp/protocol.c:2758) mptcp_subflow_queue_clean (net/mptcp/subflow.c:1817) __mptcp_close_ssk (net/mptcp/protocol.c:2363) mptcp_destroy_common (net/mptcp/protocol.c:3170) mptcp_destroy (include/net/sock.h:1495) __mptcp_destroy_sock (net/mptcp/protocol.c:2886) __mptcp_close (net/mptcp/protocol.c:2959) mptcp_close (net/mptcp/protocol.c:2974) inet_release (net/ipv4/af_inet.c:432) __sock_release (net/socket.c:651) sock_close (net/socket.c:1367) __fput (fs/file_table.c:320) task_work_run (kernel/task_work.c:181 (discriminator 1)) exit_to_user_mode_prepare (include/linux/resume_user_mode.h:49) syscall_exit_to_user_mode (kernel/entry/common.c:130) do_syscall_64 (arch/x86/entry/common.c:87) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sk_lock-AF_INET); lock((work_completion)(&msk->work)); lock(sk_lock-AF_INET); lock((work_completion)(&msk->work)); *** DEADLOCK *** The report is actually a false positive, since the only existing lock nesting is the msk socket lock acquired by the mptcp work. cancel_work_sync() is invoked without the relevant socket lock being held, but under a different (the msk listener) socket lock. We could silence the splat adding a per workqueue dynamic lockdep key, but that looks overkill. Instead just tell lockdep the msk socket lock is not held around cancel_work_sync(). Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/322 Fixes: 30e51b923e43 ("mptcp: fix unreleased socket in accept queue") Reported-by: Matthieu Baerts Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 19 +++++++++++++++++-- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e64ea2ca1c7a..e97465f0c667 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2371,7 +2371,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, /* otherwise tcp will dispose of the ssk and subflow ctx */ if (ssk->sk_state == TCP_LISTEN) { tcp_set_state(ssk, TCP_CLOSE); - mptcp_subflow_queue_clean(ssk); + mptcp_subflow_queue_clean(sk, ssk); inet_csk_listen_stop(ssk); } __tcp_close(ssk, 0); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 380bddbc52d4..62e9ff237b6e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -614,7 +614,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void __mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); -void mptcp_subflow_queue_clean(struct sock *ssk); +void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); bool __mptcp_close(struct sock *sk, long timeout); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 613f515fedf0..9d3701fdb293 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1733,7 +1733,7 @@ static void subflow_state_change(struct sock *sk) } } -void mptcp_subflow_queue_clean(struct sock *listener_ssk) +void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk) { struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; struct mptcp_sock *msk, *next, *head = NULL; @@ -1782,8 +1782,23 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk) do_cancel_work = __mptcp_close(sk, 0); release_sock(sk); - if (do_cancel_work) + if (do_cancel_work) { + /* lockdep will report a false positive ABBA deadlock + * between cancel_work_sync and the listener socket. + * The involved locks belong to different sockets WRT + * the existing AB chain. + * Using a per socket key is problematic as key + * deregistration requires process context and must be + * performed at socket disposal time, in atomic + * context. + * Just tell lockdep to consider the listener socket + * released here. + */ + mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_); mptcp_cancel_work(sk); + mutex_acquire(&listener_sk->sk_lock.dep_map, + SINGLE_DEPTH_NESTING, 0, _RET_IP_); + } sock_put(sk); } From f03ecaeca49925b5fe99d60e367f2434ebc17ee5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 19 Dec 2022 20:10:12 +0100 Subject: [PATCH 024/543] netfilter: nf_tables: honor set timeout and garbage collection updates [ Upstream commit 123b99619cca94bdca0bf7bde9abe28f0a0dfe06 ] Set timeout and garbage collection interval updates are ignored on updates. Add transaction to update global set element timeout and garbage collection interval. Fixes: 96518518cc41 ("netfilter: add nftables") Suggested-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/net/netfilter/nf_tables.h | 13 ++++++- net/netfilter/nf_tables_api.c | 63 ++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 19 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ddcdde230747..1daededfa75e 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -592,7 +592,9 @@ void *nft_set_catchall_gc(const struct nft_set *set); static inline unsigned long nft_set_gc_interval(const struct nft_set *set) { - return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ; + u32 gc_int = READ_ONCE(set->gc_int); + + return gc_int ? msecs_to_jiffies(gc_int) : HZ; } /** @@ -1563,6 +1565,9 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_set *set; u32 set_id; + u32 gc_int; + u64 timeout; + bool update; bool bound; }; @@ -1572,6 +1577,12 @@ struct nft_trans_set { (((struct nft_trans_set *)trans->data)->set_id) #define nft_trans_set_bound(trans) \ (((struct nft_trans_set *)trans->data)->bound) +#define nft_trans_set_update(trans) \ + (((struct nft_trans_set *)trans->data)->update) +#define nft_trans_set_timeout(trans) \ + (((struct nft_trans_set *)trans->data)->timeout) +#define nft_trans_set_gc_int(trans) \ + (((struct nft_trans_set *)trans->data)->gc_int) struct nft_trans_chain { bool update; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 6e68cab474c2..3ba8c291fcaa 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx) return 0; } -static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, - struct nft_set *set) +static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, + struct nft_set *set, + const struct nft_set_desc *desc) { struct nft_trans *trans; @@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, if (trans == NULL) return -ENOMEM; - if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) { + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); nft_activate_next(ctx->net, set); } nft_trans_set(trans) = set; + if (desc) { + nft_trans_set_update(trans) = true; + nft_trans_set_gc_int(trans) = desc->gc_int; + nft_trans_set_timeout(trans) = desc->timeout; + } nft_trans_commit_list_add_tail(ctx->net, trans); return 0; } +static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, + struct nft_set *set) +{ + return __nft_trans_set_add(ctx, msg_type, set, NULL); +} + static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set) { int err; @@ -3996,8 +4008,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb, static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, const struct nft_set *set, u16 event, u16 flags) { - struct nlmsghdr *nlh; + u64 timeout = READ_ONCE(set->timeout); + u32 gc_int = READ_ONCE(set->gc_int); u32 portid = ctx->portid; + struct nlmsghdr *nlh; struct nlattr *nest; u32 seq = ctx->seq; int i; @@ -4033,13 +4047,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype))) goto nla_put_failure; - if (set->timeout && + if (timeout && nla_put_be64(skb, NFTA_SET_TIMEOUT, - nf_jiffies64_to_msecs(set->timeout), + nf_jiffies64_to_msecs(timeout), NFTA_SET_PAD)) goto nla_put_failure; - if (set->gc_int && - nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) + if (gc_int && + nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int))) goto nla_put_failure; if (set->policy != NFT_SET_POL_PERFORMANCE) { @@ -4584,7 +4598,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, for (i = 0; i < num_exprs; i++) nft_expr_destroy(&ctx, exprs[i]); - return err; + if (err < 0) + return err; + + return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc); } if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -6022,7 +6039,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return err; } else if (set->flags & NFT_SET_TIMEOUT && !(flags & NFT_SET_ELEM_INTERVAL_END)) { - timeout = set->timeout; + timeout = READ_ONCE(set->timeout); } expiration = 0; @@ -6123,7 +6140,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (err < 0) goto err_parse_key_end; - if (timeout != set->timeout) { + if (timeout != READ_ONCE(set->timeout)) { err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); if (err < 0) goto err_parse_key_end; @@ -9039,14 +9056,20 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: - nft_clear(net, nft_trans_set(trans)); - /* This avoids hitting -EBUSY when deleting the table - * from the transaction. - */ - if (nft_set_is_anonymous(nft_trans_set(trans)) && - !list_empty(&nft_trans_set(trans)->bindings)) - trans->ctx.table->use--; + if (nft_trans_set_update(trans)) { + struct nft_set *set = nft_trans_set(trans); + WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans)); + WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans)); + } else { + nft_clear(net, nft_trans_set(trans)); + /* This avoids hitting -EBUSY when deleting the table + * from the transaction. + */ + if (nft_set_is_anonymous(nft_trans_set(trans)) && + !list_empty(&nft_trans_set(trans)->bindings)) + trans->ctx.table->use--; + } nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); @@ -9268,6 +9291,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: + if (nft_trans_set_update(trans)) { + nft_trans_destroy(trans); + break; + } trans->ctx.table->use--; if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); From a96c5c6e67f8f2c631e0c6a0dc6581e7102ee068 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Dec 2022 13:08:31 +0000 Subject: [PATCH 025/543] bonding: fix lockdep splat in bond_miimon_commit() [ Upstream commit 42c7ded0eeacd2ba5db599205c71c279dc715de7 ] bond_miimon_commit() is run while RTNL is held, not RCU. WARNING: suspicious RCU usage 6.1.0-syzkaller-09671-g89529367293c #0 Not tainted ----------------------------- drivers/net/bonding/bond_main.c:2704 suspicious rcu_dereference_check() usage! Fixes: e95cc44763a4 ("bonding: do failover when high prio link up") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Hangbin Liu Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Link: https://lore.kernel.org/r/20221220130831.1480888-1-edumazet@google.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b108f2f4adc2..fce9301c8ebb 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2653,10 +2653,12 @@ static void bond_miimon_link_change(struct bonding *bond, static void bond_miimon_commit(struct bonding *bond) { - struct slave *slave, *primary; + struct slave *slave, *primary, *active; bool do_failover = false; struct list_head *iter; + ASSERT_RTNL(); + bond_for_each_slave(bond, slave, iter) { switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: @@ -2699,8 +2701,8 @@ static void bond_miimon_commit(struct bonding *bond) bond_miimon_link_change(bond, slave, BOND_LINK_UP); - if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary || - slave->prio > rcu_dereference(bond->curr_active_slave)->prio) + active = rtnl_dereference(bond->curr_active_slave); + if (!active || slave == primary || slave->prio > active->prio) do_failover = true; continue; From 62c84d0abe6f3ad449f7f035eb7f9e490b593b6d Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Wed, 21 Dec 2022 10:33:15 +0100 Subject: [PATCH 026/543] net: lan966x: Fix configuration of the PCS [ Upstream commit d717f9474e3fb7e6bd3e43ca16e131f04320ed6f ] When the PCS was taken out of reset, we were changing by mistake also the speed to 100 Mbit. But in case the link was going down, the link up routine was setting correctly the link speed. If the link was not getting down then the speed was forced to run at 100 even if the speed was something else. On lan966x, to set the speed link to 1G or 2.5G a value of 1 needs to be written in DEV_CLOCK_CFG_LINK_SPEED. This is similar to the procedure in lan966x_port_init. The issue was reproduced using 1000base-x sfp module using the commands: ip link set dev eth2 up ip link addr add 10.97.10.2/24 dev eth2 ethtool -s eth2 speed 1000 autoneg off Fixes: d28d6d2e37d1 ("net: lan966x: add port module support") Signed-off-by: Horatiu Vultur Reviewed-by: Piotr Raczynski Link: https://lore.kernel.org/r/20221221093315.939133-1-horatiu.vultur@microchip.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan966x/lan966x_port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c index 1a61c6cdb077..0050fcb988b7 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_port.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_port.c @@ -381,7 +381,7 @@ int lan966x_port_pcs_set(struct lan966x_port *port, } /* Take PCS out of reset */ - lan_rmw(DEV_CLOCK_CFG_LINK_SPEED_SET(2) | + lan_rmw(DEV_CLOCK_CFG_LINK_SPEED_SET(LAN966X_SPEED_1000) | DEV_CLOCK_CFG_PCS_RX_RST_SET(0) | DEV_CLOCK_CFG_PCS_TX_RST_SET(0), DEV_CLOCK_CFG_LINK_SPEED | From 6003a74bd473a8735d11198ee668fcae9eea5ff7 Mon Sep 17 00:00:00 2001 From: Shawn Bohrer Date: Tue, 20 Dec 2022 12:59:03 -0600 Subject: [PATCH 027/543] veth: Fix race with AF_XDP exposing old or uninitialized descriptors [ Upstream commit fa349e396e4886d742fd6501c599ec627ef1353b ] When AF_XDP is used on on a veth interface the RX ring is updated in two steps. veth_xdp_rcv() removes packet descriptors from the FILL ring fills them and places them in the RX ring updating the cached_prod pointer. Later xdp_do_flush() syncs the RX ring prod pointer with the cached_prod pointer allowing user-space to see the recently filled in descriptors. The rings are intended to be SPSC, however the existing order in veth_poll allows the xdp_do_flush() to run concurrently with another CPU creating a race condition that allows user-space to see old or uninitialized descriptors in the RX ring. This bug has been observed in production systems. To summarize, we are expecting this ordering: CPU 0 __xsk_rcv_zc() CPU 0 __xsk_map_flush() CPU 2 __xsk_rcv_zc() CPU 2 __xsk_map_flush() But we are seeing this order: CPU 0 __xsk_rcv_zc() CPU 2 __xsk_rcv_zc() CPU 0 __xsk_map_flush() CPU 2 __xsk_map_flush() This occurs because we rely on NAPI to ensure that only one napi_poll handler is running at a time for the given veth receive queue. napi_schedule_prep() will prevent multiple instances from getting scheduled. However calling napi_complete_done() signals that this napi_poll is complete and allows subsequent calls to napi_schedule_prep() and __napi_schedule() to succeed in scheduling a concurrent napi_poll before the xdp_do_flush() has been called. For the veth driver a concurrent call to napi_schedule_prep() and __napi_schedule() can occur on a different CPU because the veth xmit path can additionally schedule a napi_poll creating the race. The fix as suggested by Magnus Karlsson, is to simply move the xdp_do_flush() call before napi_complete_done(). This syncs the producer ring pointers before another instance of napi_poll can be scheduled on another CPU. It will also slightly improve performance by moving the flush closer to when the descriptors were placed in the RX ring. Fixes: d1396004dd86 ("veth: Add XDP TX and REDIRECT") Suggested-by: Magnus Karlsson Signed-off-by: Shawn Bohrer Link: https://lore.kernel.org/r/20221220185903.1105011-1-sbohrer@cloudflare.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/veth.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 09682ea3354e..bd385ccd0d18 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -974,6 +974,9 @@ static int veth_poll(struct napi_struct *napi, int budget) xdp_set_return_frame_no_direct(); done = veth_xdp_rcv(rq, budget, &bq, &stats); + if (stats.xdp_redirect > 0) + xdp_do_flush(); + if (done < budget && napi_complete_done(napi, done)) { /* Write rx_notify_masked before reading ptr_ring */ smp_store_mb(rq->rx_notify_masked, false); @@ -987,8 +990,6 @@ static int veth_poll(struct napi_struct *napi, int budget) if (stats.xdp_tx > 0) veth_xdp_flush(rq, &bq); - if (stats.xdp_redirect > 0) - xdp_do_flush(); xdp_clear_return_frame_no_direct(); return done; From d91be3d42c4a7d46ebfa90af565813fdb092d240 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 22 Dec 2022 09:51:30 -0500 Subject: [PATCH 028/543] nfsd: shut down the NFSv4 state objects before the filecache [ Upstream commit 789e1e10f214c00ca18fc6610824c5b9876ba5f2 ] Currently, we shut down the filecache before trying to clean up the stateids that depend on it. This leads to the kernel trying to free an nfsd_file twice, and a refcount overput on the nf_mark. Change the shutdown procedure to tear down all of the stateids prior to shutting down the filecache. Reported-and-tested-by: Wang Yugui Signed-off-by: Jeff Layton Fixes: 5e113224c17e ("nfsd: nfsd_file cache entries should be per net namespace") Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/nfssvc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index bfbd9f672f59..8b1afde19211 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -447,8 +447,8 @@ static void nfsd_shutdown_net(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfsd_file_cache_shutdown_net(net); nfs4_state_shutdown_net(net); + nfsd_file_cache_shutdown_net(net); if (nn->lockd_up) { lockd_down(net); nn->lockd_up = false; From 55d10ed4b50728efe31973a50b99e0e08d9b3812 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Thu, 22 Dec 2022 14:43:41 +0800 Subject: [PATCH 029/543] net: hns3: add interrupts re-initialization while doing VF FLR [ Upstream commit 09e6b30eeb254f1818a008cace3547159e908dfd ] Currently keep alive message between PF and VF may be lost and the VF is unalive in PF. So the VF will not do reset during PF FLR reset process. This would make the allocated interrupt resources of VF invalid and VF would't receive or respond to PF any more. So this patch adds VF interrupts re-initialization during VF FLR for VF recovery in above cases. Fixes: 862d969a3a4d ("net: hns3: do VF's pci re-initialization while PF doing FLR") Signed-off-by: Jie Wang Signed-off-by: Hao Lan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index db6f7cdba958..081bd2c3f289 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2767,7 +2767,8 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev) struct pci_dev *pdev = hdev->pdev; int ret = 0; - if (hdev->reset_type == HNAE3_VF_FULL_RESET && + if ((hdev->reset_type == HNAE3_VF_FULL_RESET || + hdev->reset_type == HNAE3_FLR_RESET) && test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) { hclgevf_misc_irq_uninit(hdev); hclgevf_uninit_msi(hdev); From fc7f6dca10b2685f9a6d657518d504262a1d8480 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Thu, 22 Dec 2022 14:43:42 +0800 Subject: [PATCH 030/543] net: hns3: fix miss L3E checking for rx packet [ Upstream commit 7d89b53cea1a702f97117fb4361523519bb1e52c ] For device supports RXD advanced layout, the driver will return directly if the hardware finish the checksum calculate. It cause missing L3E checking for ip packets. Fixes it. Fixes: 1ddc028ac849 ("net: hns3: refactor out RX completion checksum") Signed-off-by: Jian Shen Signed-off-by: Hao Lan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 028577943ec5..248f15dac86b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3855,18 +3855,16 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info) return 0; } -static bool hns3_checksum_complete(struct hns3_enet_ring *ring, +static void hns3_checksum_complete(struct hns3_enet_ring *ring, struct sk_buff *skb, u32 ptype, u16 csum) { if (ptype == HNS3_INVALID_PTYPE || hns3_rx_ptype_tbl[ptype].ip_summed != CHECKSUM_COMPLETE) - return false; + return; hns3_ring_stats_update(ring, csum_complete); skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)csum); - - return true; } static void hns3_rx_handle_csum(struct sk_buff *skb, u32 l234info, @@ -3926,8 +3924,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, ptype = hnae3_get_field(ol_info, HNS3_RXD_PTYPE_M, HNS3_RXD_PTYPE_S); - if (hns3_checksum_complete(ring, skb, ptype, csum)) - return; + hns3_checksum_complete(ring, skb, ptype, csum); /* check if hardware has done checksum */ if (!(bd_base_info & BIT(HNS3_RXD_L3L4P_B))) @@ -3936,6 +3933,7 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb, if (unlikely(l234info & (BIT(HNS3_RXD_L3E_B) | BIT(HNS3_RXD_L4E_B) | BIT(HNS3_RXD_OL3E_B) | BIT(HNS3_RXD_OL4E_B)))) { + skb->ip_summed = CHECKSUM_NONE; hns3_ring_stats_update(ring, l3l4_csum_err); return; From e0b6178773cf0aadff636b7cc678169c05ba825c Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Thu, 22 Dec 2022 14:43:43 +0800 Subject: [PATCH 031/543] net: hns3: fix VF promisc mode not update when mac table full [ Upstream commit 8ee57c7b8406c7aa8ca31e014440c87c6383f429 ] Currently, it missed set HCLGE_VPORT_STATE_PROMISC_CHANGE flag for VF when vport->overflow_promisc_flags changed. So the VF won't check whether to update promisc mode in this case. So add it. Fixes: 1e6e76101fd9 ("net: hns3: configure promisc mode for VF asynchronously") Signed-off-by: Jian Shen Signed-off-by: Hao Lan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../hisilicon/hns3/hns3pf/hclge_main.c | 83 +++++++++++-------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 4e54f91f7a6c..6c2742f59c77 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -12754,60 +12754,71 @@ static int hclge_gro_en(struct hnae3_handle *handle, bool enable) return ret; } -static void hclge_sync_promisc_mode(struct hclge_dev *hdev) +static int hclge_sync_vport_promisc_mode(struct hclge_vport *vport) { - struct hclge_vport *vport = &hdev->vport[0]; struct hnae3_handle *handle = &vport->nic; + struct hclge_dev *hdev = vport->back; + bool uc_en = false; + bool mc_en = false; u8 tmp_flags; + bool bc_en; int ret; - u16 i; if (vport->last_promisc_flags != vport->overflow_promisc_flags) { set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); vport->last_promisc_flags = vport->overflow_promisc_flags; } - if (test_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state)) { + if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, + &vport->state)) + return 0; + + /* for PF */ + if (!vport->vport_id) { tmp_flags = handle->netdev_flags | vport->last_promisc_flags; ret = hclge_set_promisc_mode(handle, tmp_flags & HNAE3_UPE, tmp_flags & HNAE3_MPE); - if (!ret) { - clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, - &vport->state); + if (!ret) set_bit(HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, &vport->state); - } - } - - for (i = 1; i < hdev->num_alloc_vport; i++) { - bool uc_en = false; - bool mc_en = false; - bool bc_en; - - vport = &hdev->vport[i]; - - if (!test_and_clear_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, - &vport->state)) - continue; - - if (vport->vf_info.trusted) { - uc_en = vport->vf_info.request_uc_en > 0 || - vport->overflow_promisc_flags & - HNAE3_OVERFLOW_UPE; - mc_en = vport->vf_info.request_mc_en > 0 || - vport->overflow_promisc_flags & - HNAE3_OVERFLOW_MPE; - } - bc_en = vport->vf_info.request_bc_en > 0; - - ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en, - mc_en, bc_en); - if (ret) { + else set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); + return ret; + } + + /* for VF */ + if (vport->vf_info.trusted) { + uc_en = vport->vf_info.request_uc_en > 0 || + vport->overflow_promisc_flags & HNAE3_OVERFLOW_UPE; + mc_en = vport->vf_info.request_mc_en > 0 || + vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE; + } + bc_en = vport->vf_info.request_bc_en > 0; + + ret = hclge_cmd_set_promisc_mode(hdev, vport->vport_id, uc_en, + mc_en, bc_en); + if (ret) { + set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); + return ret; + } + hclge_set_vport_vlan_fltr_change(vport); + + return 0; +} + +static void hclge_sync_promisc_mode(struct hclge_dev *hdev) +{ + struct hclge_vport *vport; + int ret; + u16 i; + + for (i = 0; i < hdev->num_alloc_vport; i++) { + vport = &hdev->vport[i]; + + ret = hclge_sync_vport_promisc_mode(vport); + if (ret) return; - } - hclge_set_vport_vlan_fltr_change(vport); } } From facc4405e8b7407e03216207b1d1d640127de0c8 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Thu, 22 Dec 2022 11:51:19 +0800 Subject: [PATCH 032/543] net: sched: fix memory leak in tcindex_set_parms [ Upstream commit 399ab7fe0fa0d846881685fd4e57e9a8ef7559f7 ] Syzkaller reports a memory leak as follows: ==================================== BUG: memory leak unreferenced object 0xffff88810c287f00 (size 256): comm "syz-executor105", pid 3600, jiffies 4294943292 (age 12.990s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046 [] kmalloc include/linux/slab.h:576 [inline] [] kmalloc_array include/linux/slab.h:627 [inline] [] kcalloc include/linux/slab.h:659 [inline] [] tcf_exts_init include/net/pkt_cls.h:250 [inline] [] tcindex_set_parms+0xa7/0xbe0 net/sched/cls_tcindex.c:342 [] tcindex_change+0xdf/0x120 net/sched/cls_tcindex.c:553 [] tc_new_tfilter+0x4f2/0x1100 net/sched/cls_api.c:2147 [] rtnetlink_rcv_msg+0x4dc/0x5d0 net/core/rtnetlink.c:6082 [] netlink_rcv_skb+0x87/0x1d0 net/netlink/af_netlink.c:2540 [] netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] [] netlink_unicast+0x397/0x4c0 net/netlink/af_netlink.c:1345 [] netlink_sendmsg+0x396/0x710 net/netlink/af_netlink.c:1921 [] sock_sendmsg_nosec net/socket.c:714 [inline] [] sock_sendmsg+0x56/0x80 net/socket.c:734 [] ____sys_sendmsg+0x178/0x410 net/socket.c:2482 [] ___sys_sendmsg+0xa8/0x110 net/socket.c:2536 [] __sys_sendmmsg+0x105/0x330 net/socket.c:2622 [] __do_sys_sendmmsg net/socket.c:2651 [inline] [] __se_sys_sendmmsg net/socket.c:2648 [inline] [] __x64_sys_sendmmsg+0x24/0x30 net/socket.c:2648 [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 [] entry_SYSCALL_64_after_hwframe+0x63/0xcd ==================================== Kernel uses tcindex_change() to change an existing filter properties. Yet the problem is that, during the process of changing, if `old_r` is retrieved from `p->perfect`, then kernel uses tcindex_alloc_perfect_hash() to newly allocate filter results, uses tcindex_filter_result_init() to clear the old filter result, without destroying its tcf_exts structure, which triggers the above memory leak. To be more specific, there are only two source for the `old_r`, according to the tcindex_lookup(). `old_r` is retrieved from `p->perfect`, or `old_r` is retrieved from `p->h`. * If `old_r` is retrieved from `p->perfect`, kernel uses tcindex_alloc_perfect_hash() to newly allocate the filter results. Then `r` is assigned with `cp->perfect + handle`, which is newly allocated. So condition `old_r && old_r != r` is true in this situation, and kernel uses tcindex_filter_result_init() to clear the old filter result, without destroying its tcf_exts structure * If `old_r` is retrieved from `p->h`, then `p->perfect` is NULL according to the tcindex_lookup(). Considering that `cp->h` is directly copied from `p->h` and `p->perfect` is NULL, `r` is assigned with `tcindex_lookup(cp, handle)`, whose value should be the same as `old_r`, so condition `old_r && old_r != r` is false in this situation, kernel ignores using tcindex_filter_result_init() to clear the old filter result. So only when `old_r` is retrieved from `p->perfect` does kernel use tcindex_filter_result_init() to clear the old filter result, which triggers the above memory leak. Considering that there already exists a tc_filter_wq workqueue to destroy the old tcindex_data by tcindex_partial_destroy_work() at the end of tcindex_set_parms(), this patch solves this memory leak bug by removing this old filter result clearing part and delegating it to the tc_filter_wq workqueue. Note that this patch doesn't introduce any other issues. If `old_r` is retrieved from `p->perfect`, this patch just delegates old filter result clearing part to the tc_filter_wq workqueue; If `old_r` is retrieved from `p->h`, kernel doesn't reach the old filter result clearing part, so removing this part has no effect. [Thanks to the suggestion from Jakub Kicinski, Cong Wang, Paolo Abeni and Dmitry Vyukov] Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()") Link: https://lore.kernel.org/all/0000000000001de5c505ebc9ec59@google.com/ Reported-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com Tested-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com Cc: Cong Wang Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Dmitry Vyukov Acked-by: Paolo Abeni Signed-off-by: Hawkins Jiawei Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/cls_tcindex.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 1c9eeb98d826..4bdcbee4bec5 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -332,7 +332,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcindex_filter_result *r, struct nlattr **tb, struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { - struct tcindex_filter_result new_filter_result, *old_r = r; + struct tcindex_filter_result new_filter_result; struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ struct tcf_result cr = {}; @@ -401,7 +401,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcindex_filter_result_init(&new_filter_result, cp, net); if (err < 0) goto errout_alloc; - if (old_r) + if (r) cr = r->res; err = -EBUSY; @@ -478,14 +478,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, tcf_bind_filter(tp, &cr, base); } - if (old_r && old_r != r) { - err = tcindex_filter_result_init(old_r, cp, net); - if (err < 0) { - kfree(f); - goto errout_alloc; - } - } - oldp = p; r->res = cr; tcf_exts_change(&r->exts, &e); From a2a694e6edbdb3efb34e1613a31fdcf6cf444a55 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Thu, 22 Dec 2022 14:52:28 +0300 Subject: [PATCH 033/543] qlcnic: prevent ->dcb use-after-free on qlcnic_dcb_enable() failure [ Upstream commit 13a7c8964afcd8ca43c0b6001ebb0127baa95362 ] adapter->dcb would get silently freed inside qlcnic_dcb_enable() in case qlcnic_dcb_attach() would return an error, which always happens under OOM conditions. This would lead to use-after-free because both of the existing callers invoke qlcnic_dcb_get_info() on the obtained pointer, which is potentially freed at that point. Propagate errors from qlcnic_dcb_enable(), and instead free the dcb pointer at callsite using qlcnic_dcb_free(). This also removes the now unused qlcnic_clear_dcb_ops() helper, which was a simple wrapper around kfree() also causing memory leaks for partially initialized dcb. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: 3c44bba1d270 ("qlcnic: Disable DCB operations from SR-IOV VFs") Reviewed-by: Michal Swiatkowski Signed-off-by: Daniil Tatianin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 8 +++++++- drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h | 10 ++-------- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 +++++++- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index dbb800769cb6..c95d56e56c59 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -2505,7 +2505,13 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter) goto disable_mbx_intr; qlcnic_83xx_clear_function_resources(adapter); - qlcnic_dcb_enable(adapter->dcb); + + err = qlcnic_dcb_enable(adapter->dcb); + if (err) { + qlcnic_dcb_free(adapter->dcb); + goto disable_mbx_intr; + } + qlcnic_83xx_initialize_nic(adapter, 1); qlcnic_dcb_get_info(adapter->dcb); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h index 7519773eaca6..22afa2be85fd 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h @@ -41,11 +41,6 @@ struct qlcnic_dcb { unsigned long state; }; -static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb) -{ - kfree(dcb); -} - static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb) { if (dcb && dcb->ops->get_hw_capability) @@ -112,9 +107,8 @@ static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_dcb *dcb) dcb->ops->init_dcbnl_ops(dcb); } -static inline void qlcnic_dcb_enable(struct qlcnic_dcb *dcb) +static inline int qlcnic_dcb_enable(struct qlcnic_dcb *dcb) { - if (dcb && qlcnic_dcb_attach(dcb)) - qlcnic_clear_dcb_ops(dcb); + return dcb ? qlcnic_dcb_attach(dcb) : 0; } #endif diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 28476b982bab..44dac3c0908e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2599,7 +2599,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "Device does not support MSI interrupts\n"); if (qlcnic_82xx_check(adapter)) { - qlcnic_dcb_enable(adapter->dcb); + err = qlcnic_dcb_enable(adapter->dcb); + if (err) { + qlcnic_dcb_free(adapter->dcb); + dev_err(&pdev->dev, "Failed to enable DCB\n"); + goto err_out_free_hw; + } + qlcnic_dcb_get_info(adapter->dcb); err = qlcnic_setup_intr(adapter); From bb50c43f7789a1158263ee02b2f50cf341e32578 Mon Sep 17 00:00:00 2001 From: "Johnny S. Lee" Date: Thu, 22 Dec 2022 22:34:05 +0800 Subject: [PATCH 034/543] net: dsa: mv88e6xxx: depend on PTP conditionally [ Upstream commit 30e725537546248bddc12eaac2fe0a258917f190 ] PTP hardware timestamping related objects are not linked when PTP support for MV88E6xxx (NET_DSA_MV88E6XXX_PTP) is disabled, therefore NET_DSA_MV88E6XXX should not depend on PTP_1588_CLOCK_OPTIONAL regardless of NET_DSA_MV88E6XXX_PTP. Instead, condition more strictly on how NET_DSA_MV88E6XXX_PTP's dependencies are met, making sure that it cannot be enabled when NET_DSA_MV88E6XXX=y and PTP_1588_CLOCK=m. In other words, this commit allows NET_DSA_MV88E6XXX to be built-in while PTP_1588_CLOCK is a module, as long as NET_DSA_MV88E6XXX_PTP is prevented from being enabled. Fixes: e5f31552674e ("ethernet: fix PTP_1588_CLOCK dependencies") Signed-off-by: Johnny S. Lee Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/mv88e6xxx/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 7a2445a34eb7..e3181d5471df 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -2,7 +2,6 @@ config NET_DSA_MV88E6XXX tristate "Marvell 88E6xxx Ethernet switch fabric support" depends on NET_DSA - depends on PTP_1588_CLOCK_OPTIONAL select IRQ_DOMAIN select NET_DSA_TAG_EDSA select NET_DSA_TAG_DSA @@ -13,7 +12,8 @@ config NET_DSA_MV88E6XXX config NET_DSA_MV88E6XXX_PTP bool "PTP support for Marvell 88E6xxx" default n - depends on NET_DSA_MV88E6XXX && PTP_1588_CLOCK + depends on (NET_DSA_MV88E6XXX = y && PTP_1588_CLOCK = y) || \ + (NET_DSA_MV88E6XXX = m && PTP_1588_CLOCK) help Say Y to enable PTP hardware timestamping on Marvell 88E6xxx switch chips that support it. From b32f6bef248562bb5191ada527717ea50b319466 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Fri, 23 Dec 2022 11:37:18 +0400 Subject: [PATCH 035/543] nfc: Fix potential resource leaks [ Upstream commit df49908f3c52d211aea5e2a14a93bbe67a2cb3af ] nfc_get_device() take reference for the device, add missing nfc_put_device() to release it when not need anymore. Also fix the style warnning by use error EOPNOTSUPP instead of ENOTSUPP. Fixes: 5ce3f32b5264 ("NFC: netlink: SE API implementation") Fixes: 29e76924cf08 ("nfc: netlink: Add capability to reply to vendor_cmd with data") Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/nfc/netlink.c | 52 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 9d91087b9399..1fc339084d89 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1497,6 +1497,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) u32 dev_idx, se_idx; u8 *apdu; size_t apdu_len; + int rc; if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_SE_INDEX] || @@ -1510,25 +1511,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) if (!dev) return -ENODEV; - if (!dev->ops || !dev->ops->se_io) - return -ENOTSUPP; + if (!dev->ops || !dev->ops->se_io) { + rc = -EOPNOTSUPP; + goto put_dev; + } apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]); - if (apdu_len == 0) - return -EINVAL; + if (apdu_len == 0) { + rc = -EINVAL; + goto put_dev; + } apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]); - if (!apdu) - return -EINVAL; + if (!apdu) { + rc = -EINVAL; + goto put_dev; + } ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; + if (!ctx) { + rc = -ENOMEM; + goto put_dev; + } ctx->dev_idx = dev_idx; ctx->se_idx = se_idx; - return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); + rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); + +put_dev: + nfc_put_device(dev); + return rc; } static int nfc_genl_vendor_cmd(struct sk_buff *skb, @@ -1551,14 +1564,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]); dev = nfc_get_device(dev_idx); - if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds) + if (!dev) return -ENODEV; + if (!dev->vendor_cmds || !dev->n_vendor_cmds) { + err = -ENODEV; + goto put_dev; + } + if (info->attrs[NFC_ATTR_VENDOR_DATA]) { data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]); - if (data_len == 0) - return -EINVAL; + if (data_len == 0) { + err = -EINVAL; + goto put_dev; + } } else { data = NULL; data_len = 0; @@ -1573,10 +1593,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, dev->cur_cmd_info = info; err = cmd->doit(dev, data, data_len); dev->cur_cmd_info = NULL; - return err; + goto put_dev; } - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + +put_dev: + nfc_put_device(dev); + return err; } /* message building helper */ From 9bf7939f91aff1ae99362fa9a3d0c851b827b148 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 26 Dec 2022 22:19:37 -0500 Subject: [PATCH 036/543] bnxt_en: Simplify bnxt_xdp_buff_init() [ Upstream commit bbfc17e50ba2ed18dfef46b1c433d50a58566bf1 ] bnxt_xdp_buff_init() does not modify the data_ptr or the len parameters, so no need to pass in the addresses of these parameters. Fixes: b231c3f3414c ("bnxt: refactor bnxt_rx_xdp to separate xdp_init_buff/xdp_prepare_buff") Reviewed-by: Andy Gospodarek Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 6 +++--- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 9f8a6ce4b356..0166c99cb7c6 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1922,7 +1922,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, dma_addr = rx_buf->mapping; if (bnxt_xdp_attached(bp, rxr)) { - bnxt_xdp_buff_init(bp, rxr, cons, &data_ptr, &len, &xdp); + bnxt_xdp_buff_init(bp, rxr, cons, data_ptr, len, &xdp); if (agg_bufs) { u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp, cp_cons, agg_bufs, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index c3065ec0a479..1847f191577d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -177,7 +177,7 @@ bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr) } void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, - u16 cons, u8 **data_ptr, unsigned int *len, + u16 cons, u8 *data_ptr, unsigned int len, struct xdp_buff *xdp) { struct bnxt_sw_rx_bd *rx_buf; @@ -191,13 +191,13 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, offset = bp->rx_offset; mapping = rx_buf->mapping - bp->rx_dma_offset; - dma_sync_single_for_cpu(&pdev->dev, mapping + offset, *len, bp->rx_dir); + dma_sync_single_for_cpu(&pdev->dev, mapping + offset, len, bp->rx_dir); if (bp->xdp_has_frags) buflen = BNXT_PAGE_MODE_BUF_SIZE + offset; xdp_init_buff(xdp, buflen, &rxr->xdp_rxq); - xdp_prepare_buff(xdp, *data_ptr - offset, offset, *len, false); + xdp_prepare_buff(xdp, data_ptr - offset, offset, len, false); } void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 505911ae095d..2bbdb8e7c506 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -27,7 +27,7 @@ int bnxt_xdp_xmit(struct net_device *dev, int num_frames, bool bnxt_xdp_attached(struct bnxt *bp, struct bnxt_rx_ring_info *rxr); void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, - u16 cons, u8 **data_ptr, unsigned int *len, + u16 cons, u8 *data_ptr, unsigned int len, struct xdp_buff *xdp); void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, struct xdp_buff *xdp); From 57e2176bac593dc5a73b715679d2adf9c7a12c5f Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 26 Dec 2022 22:19:38 -0500 Subject: [PATCH 037/543] bnxt_en: Fix XDP RX path [ Upstream commit 9b3e607871ea5ee90f10f5be3965fc07f2aa3ef7 ] The XDP program can change the starting address of the RX data buffer and this information needs to be passed back from bnxt_rx_xdp() to bnxt_rx_pkt() for the XDP_PASS case so that the SKB can point correctly to the modified buffer address. Add back the data_ptr parameter to bnxt_rx_xdp() to make this work. Fixes: b231c3f3414c ("bnxt: refactor bnxt_rx_xdp to separate xdp_init_buff/xdp_prepare_buff") Reviewed-by: Andy Gospodarek Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 7 +++++-- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0166c99cb7c6..a83d534a096a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -1937,7 +1937,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } if (xdp_active) { - if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &len, event)) { + if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &data_ptr, &len, event)) { rc = 1; goto next_rx; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 1847f191577d..2ceeaa818c1c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -222,7 +222,8 @@ void bnxt_xdp_buff_frags_free(struct bnxt_rx_ring_info *rxr, * false - packet should be passed to the stack. */ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, - struct xdp_buff xdp, struct page *page, unsigned int *len, u8 *event) + struct xdp_buff xdp, struct page *page, u8 **data_ptr, + unsigned int *len, u8 *event) { struct bpf_prog *xdp_prog = READ_ONCE(rxr->xdp_prog); struct bnxt_tx_ring_info *txr; @@ -255,8 +256,10 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, *event &= ~BNXT_RX_EVENT; *len = xdp.data_end - xdp.data; - if (orig_data != xdp.data) + if (orig_data != xdp.data) { offset = xdp.data - xdp.data_hard_start; + *data_ptr = xdp.data_hard_start + offset; + } switch (act) { case XDP_PASS: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h index 2bbdb8e7c506..ea430d6961df 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h @@ -18,8 +18,8 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp, struct xdp_buff *xdp); void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts); bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, - struct xdp_buff xdp, struct page *page, unsigned int *len, - u8 *event); + struct xdp_buff xdp, struct page *page, u8 **data_ptr, + unsigned int *len, u8 *event); int bnxt_xdp(struct net_device *dev, struct netdev_bpf *xdp); int bnxt_xdp_xmit(struct net_device *dev, int num_frames, struct xdp_frame **frames, u32 flags); From c9542e0d3ec1e883dd09fcb7d2247b71443b6871 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 26 Dec 2022 22:19:39 -0500 Subject: [PATCH 038/543] bnxt_en: Fix first buffer size calculations for XDP multi-buffer [ Upstream commit 1abeacc1979fa4a756695f5030791d8f0fa934b9 ] The size of the first buffer is always page size, and the useable space is the page size minus the offset and the skb_shared_info size. Make sure SKB and XDP buf sizes match so that the skb_shared_info is at the same offset seen from the SKB and XDP_BUF. build_skb() should be passed PAGE_SIZE. xdp_init_buff() should be passed PAGE_SIZE as well. xdp_get_shared_info_from_buff() will automatically deduct the skb_shared_info size if the XDP buffer has frags. There is no need to keep bp->xdp_has_frags. Change BNXT_PAGE_MODE_BUF_SIZE to BNXT_MAX_PAGE_MODE_MTU_SBUF since this constant is really the MTU with ethernet header size subtracted. Also fix the BNXT_MAX_PAGE_MODE_MTU macro with proper parentheses. Fixes: 32861236190b ("bnxt: change receive ring space parameters") Reviewed-by: Somnath Kotur Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 15 +++++++++++---- drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 7 +------ 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a83d534a096a..b0c9c9813d23 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -988,8 +988,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, dma_addr -= bp->rx_dma_offset; dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir, DMA_ATTR_WEAK_ORDERING); - skb = build_skb(page_address(page), BNXT_PAGE_MODE_BUF_SIZE + - bp->rx_dma_offset); + skb = build_skb(page_address(page), PAGE_SIZE); if (!skb) { __free_page(page); return NULL; @@ -3966,8 +3965,10 @@ void bnxt_set_ring_params(struct bnxt *bp) bp->rx_agg_ring_mask = (bp->rx_agg_nr_pages * RX_DESC_CNT) - 1; if (BNXT_RX_PAGE_MODE(bp)) { - rx_space = BNXT_PAGE_MODE_BUF_SIZE; - rx_size = BNXT_MAX_PAGE_MODE_MTU; + rx_space = PAGE_SIZE; + rx_size = PAGE_SIZE - + ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } else { rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN); rx_space = rx_size + NET_SKB_PAD + diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d5fa43cfe524..02741d499bf4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -591,12 +591,20 @@ struct nqe_cn { #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT) #define BNXT_MAX_MTU 9500 -#define BNXT_PAGE_MODE_BUF_SIZE \ + +/* First RX buffer page in XDP multi-buf mode + * + * +-------------------------------------------------------------------------+ + * | XDP_PACKET_HEADROOM | bp->rx_buf_use_size | skb_shared_info| + * | (bp->rx_dma_offset) | | | + * +-------------------------------------------------------------------------+ + */ +#define BNXT_MAX_PAGE_MODE_MTU_SBUF \ ((unsigned int)PAGE_SIZE - VLAN_ETH_HLEN - NET_IP_ALIGN - \ XDP_PACKET_HEADROOM) #define BNXT_MAX_PAGE_MODE_MTU \ - BNXT_PAGE_MODE_BUF_SIZE - \ - SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info)) + (BNXT_MAX_PAGE_MODE_MTU_SBUF - \ + SKB_DATA_ALIGN((unsigned int)sizeof(struct skb_shared_info))) #define BNXT_MIN_PKT_SIZE 52 @@ -2131,7 +2139,6 @@ struct bnxt { #define BNXT_DUMP_CRASH 1 struct bpf_prog *xdp_prog; - u8 xdp_has_frags; struct bnxt_ptp_cfg *ptp_cfg; u8 ptp_all_rx_tstamp; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 2ceeaa818c1c..36d5202c0aee 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -193,9 +193,6 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, mapping = rx_buf->mapping - bp->rx_dma_offset; dma_sync_single_for_cpu(&pdev->dev, mapping + offset, len, bp->rx_dir); - if (bp->xdp_has_frags) - buflen = BNXT_PAGE_MODE_BUF_SIZE + offset; - xdp_init_buff(xdp, buflen, &rxr->xdp_rxq); xdp_prepare_buff(xdp, data_ptr - offset, offset, len, false); } @@ -404,10 +401,8 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n"); return -EOPNOTSUPP; } - if (prog) { + if (prog) tx_xdp = bp->rx_nr_rings; - bp->xdp_has_frags = prog->aux->xdp_has_frags; - } tc = netdev_get_num_tc(dev); if (!tc) From 179e6d7213f12679eec4f514155591ec416fbb8a Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 26 Dec 2022 22:19:40 -0500 Subject: [PATCH 039/543] bnxt_en: Fix HDS and jumbo thresholds for RX packets [ Upstream commit a056ebcc30e2f78451d66f615d2f6bdada3e6438 ] The recent XDP multi-buffer feature has introduced regressions in the setting of HDS and jumbo thresholds. HDS was accidentally disabled in the nornmal mode without XDP. This patch restores jumbo HDS placement when not in XDP mode. In XDP multi-buffer mode, HDS should be disabled and the jumbo threshold should be set to the usable page size in the first page buffer. Fixes: 32861236190b ("bnxt: change receive ring space parameters") Reviewed-by: Mohammad Shuab Siddique Reviewed-by: Ajit Khaparde Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b0c9c9813d23..f5a8bae8d79a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5371,15 +5371,16 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id) req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT); req->enables = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID); - if (BNXT_RX_PAGE_MODE(bp) && !BNXT_RX_JUMBO_MODE(bp)) { + if (BNXT_RX_PAGE_MODE(bp)) { + req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size); + } else { req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 | VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6); req->enables |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID); + req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh); + req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh); } - /* thresholds not implemented in firmware yet */ - req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh); - req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh); req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); return hwrm_req_send(bp, req); } From 392814d4cf9a57d2a19af83fcca57234bde9cc11 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Nov 2022 15:17:52 +0200 Subject: [PATCH 040/543] vdpa/mlx5: Fix rule forwarding VLAN to TIR [ Upstream commit a6ce72c0fb6041f9871f880b2d02b294f7f49cb4 ] Set the VLAN id to the header values field instead of overwriting the headers criteria field. Before this fix, VLAN filtering would not really work and tagged packets would be forwarded unfiltered to the TIR. Fixes: baf2ad3f6a98 ("vdpa/mlx5: Add RX MAC VLAN filter support") Acked-by: Jason Wang Signed-off-by: Eli Cohen Message-Id: <20221114131759.57883-2-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 90913365def4..3fb06dcee943 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1468,11 +1468,13 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); eth_broadcast_addr(dmac_c); ether_addr_copy(dmac_v, mac); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); + } if (tagged) { MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1); - MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid); - MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, vid); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid); } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; From adc6cd16b41815090efee59395cf101fa23aa403 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Nov 2022 15:17:54 +0200 Subject: [PATCH 041/543] vdpa/mlx5: Fix wrong mac address deletion [ Upstream commit 1ab53760d322c82fb4cb5e81b5817065801e3ec4 ] Delete the old MAC from the table and not the new one which is not there yet. Fixes: baf2ad3f6a98 ("vdpa/mlx5: Add RX MAC VLAN filter support") Acked-by: Jason Wang Signed-off-by: Eli Cohen Message-Id: <20221114131759.57883-4-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 3fb06dcee943..444d6572b2d0 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1686,7 +1686,7 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) /* Need recreate the flow table entry, so that the packet could forward back */ - mac_vlan_del(ndev, ndev->config.mac, 0, false); + mac_vlan_del(ndev, mac_back, 0, false); if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) { mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); From 337c24d817e28dd454ca22f1063dfad20822426e Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Thu, 10 Nov 2022 16:23:48 +0800 Subject: [PATCH 042/543] vdpa_sim: fix possible memory leak in vdpasim_net_init() and vdpasim_blk_init() [ Upstream commit aeca7ff254843d49a8739f07f7dab1341450111d ] Inject fault while probing module, if device_register() fails in vdpasim_net_init() or vdpasim_blk_init(), but the refcount of kobject is not decreased to 0, the name allocated in dev_set_name() is leaked. Fix this by calling put_device(), so that name can be freed in callback function kobject_cleanup(). (vdpa_sim_net) unreferenced object 0xffff88807eebc370 (size 16): comm "modprobe", pid 3848, jiffies 4362982860 (age 18.153s) hex dump (first 16 bytes): 76 64 70 61 73 69 6d 5f 6e 65 74 00 6b 6b 6b a5 vdpasim_net.kkk. backtrace: [] __kmalloc_node_track_caller+0x4e/0x150 [] kstrdup+0x33/0x60 [] kobject_set_name_vargs+0x41/0x110 [] dev_set_name+0xab/0xe0 [] device_add+0xe3/0x1a80 [] 0xffffffffa0270013 [] do_one_initcall+0x87/0x2e0 [] do_init_module+0x1ab/0x640 [] load_module+0x5d00/0x77f0 [] __do_sys_finit_module+0x110/0x1b0 [] do_syscall_64+0x35/0x80 [] entry_SYSCALL_64_after_hwframe+0x46/0xb0 (vdpa_sim_blk) unreferenced object 0xffff8881070c1250 (size 16): comm "modprobe", pid 6844, jiffies 4364069319 (age 17.572s) hex dump (first 16 bytes): 76 64 70 61 73 69 6d 5f 62 6c 6b 00 6b 6b 6b a5 vdpasim_blk.kkk. backtrace: [] __kmalloc_node_track_caller+0x4e/0x150 [] kstrdup+0x33/0x60 [] kobject_set_name_vargs+0x41/0x110 [] dev_set_name+0xab/0xe0 [] device_add+0xe3/0x1a80 [] 0xffffffffa0220013 [] do_one_initcall+0x87/0x2e0 [] do_init_module+0x1ab/0x640 [] load_module+0x5d00/0x77f0 [] __do_sys_finit_module+0x110/0x1b0 [] do_syscall_64+0x35/0x80 [] entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: 899c4d187f6a ("vdpa_sim_blk: add support for vdpa management tool") Fixes: a3c06ae158dd ("vdpa_sim_net: Add support for user supported devices") Signed-off-by: ruanjinjie Reviewed-by: Stefano Garzarella Message-Id: <20221110082348.4105476-1-ruanjinjie@huawei.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vdpa/vdpa_sim/vdpa_sim_blk.c | 4 +++- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c index c6db1a1baf76..f745926237a8 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_blk.c @@ -427,8 +427,10 @@ static int __init vdpasim_blk_init(void) int ret; ret = device_register(&vdpasim_blk_mgmtdev); - if (ret) + if (ret) { + put_device(&vdpasim_blk_mgmtdev); return ret; + } ret = vdpa_mgmtdev_register(&mgmt_dev); if (ret) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index c3cb225ea469..11f5a121df24 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -305,8 +305,10 @@ static int __init vdpasim_net_init(void) int ret; ret = device_register(&vdpasim_net_mgmtdev); - if (ret) + if (ret) { + put_device(&vdpasim_net_mgmtdev); return ret; + } ret = vdpa_mgmtdev_register(&mgmt_dev); if (ret) From 9bc360aa816f7694b0f97371cecc850148ff3295 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Tue, 8 Nov 2022 10:17:05 +0000 Subject: [PATCH 043/543] vhost/vsock: Fix error handling in vhost_vsock_init() [ Upstream commit 7a4efe182ca61fb3e5307e69b261c57cbf434cd4 ] A problem about modprobe vhost_vsock failed is triggered with the following log given: modprobe: ERROR: could not insert 'vhost_vsock': Device or resource busy The reason is that vhost_vsock_init() returns misc_register() directly without checking its return value, if misc_register() failed, it returns without calling vsock_core_unregister() on vhost_transport, resulting the vhost_vsock can never be installed later. A simple call graph is shown as below: vhost_vsock_init() vsock_core_register() # register vhost_transport misc_register() device_create_with_groups() device_create_groups_vargs() dev = kzalloc(...) # OOM happened # return without unregister vhost_transport Fix by calling vsock_core_unregister() when misc_register() returns error. Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko") Signed-off-by: Yuan Can Message-Id: <20221108101705.45981-1-yuancan@huawei.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vhost/vsock.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 5703775af129..10a7d23731fe 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -959,7 +959,14 @@ static int __init vhost_vsock_init(void) VSOCK_TRANSPORT_F_H2G); if (ret < 0) return ret; - return misc_register(&vhost_vsock_misc); + + ret = misc_register(&vhost_vsock_misc); + if (ret) { + vsock_core_unregister(&vhost_transport.transport); + return ret; + } + + return 0; }; static void __exit vhost_vsock_exit(void) From 909fb8fbd46b3e16b21880ea862a0263dfc17c7a Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 9 Nov 2022 11:25:02 +0100 Subject: [PATCH 044/543] vringh: fix range used in iotlb_translate() [ Upstream commit f85efa9b0f5381874f727bd98f56787840313f0b ] vhost_iotlb_itree_first() requires `start` and `last` parameters to search for a mapping that overlaps the range. In iotlb_translate() we cyclically call vhost_iotlb_itree_first(), incrementing `addr` by the amount already translated, so rightly we move the `start` parameter passed to vhost_iotlb_itree_first(), but we should hold the `last` parameter constant. Let's fix it by saving the `last` parameter value before incrementing `addr` in the loop. Fixes: 9ad9c49cfe97 ("vringh: IOTLB support") Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Message-Id: <20221109102503.18816-2-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vringh.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index 11f59dd06a74..828c29306565 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -1102,7 +1102,7 @@ static int iotlb_translate(const struct vringh *vrh, struct vhost_iotlb_map *map; struct vhost_iotlb *iotlb = vrh->iotlb; int ret = 0; - u64 s = 0; + u64 s = 0, last = addr + len - 1; spin_lock(vrh->iotlb_lock); @@ -1114,8 +1114,7 @@ static int iotlb_translate(const struct vringh *vrh, break; } - map = vhost_iotlb_itree_first(iotlb, addr, - addr + len - 1); + map = vhost_iotlb_itree_first(iotlb, addr, last); if (!map || map->start > addr) { ret = -EINVAL; break; From 8b31aa969a2ddfa2a7d82750d5154620718bc418 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 9 Nov 2022 11:25:03 +0100 Subject: [PATCH 045/543] vhost: fix range used in translate_desc() [ Upstream commit 98047313cdb46828093894d0ac8b1183b8b317f9 ] vhost_iotlb_itree_first() requires `start` and `last` parameters to search for a mapping that overlaps the range. In translate_desc() we cyclically call vhost_iotlb_itree_first(), incrementing `addr` by the amount already translated, so rightly we move the `start` parameter passed to vhost_iotlb_itree_first(), but we should hold the `last` parameter constant. Let's fix it by saving the `last` parameter value before incrementing `addr` in the loop. Fixes: a9709d6874d5 ("vhost: convert pre sorted vhost memory array to interval tree") Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Message-Id: <20221109102503.18816-3-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vhost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 40097826cff0..3c2359570df9 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2053,7 +2053,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct vhost_dev *dev = vq->dev; struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; struct iovec *_iov; - u64 s = 0; + u64 s = 0, last = addr + len - 1; int ret = 0; while ((u64)len > s) { @@ -2063,7 +2063,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, break; } - map = vhost_iotlb_itree_first(umem, addr, addr + len - 1); + map = vhost_iotlb_itree_first(umem, addr, last); if (map == NULL || map->start > addr) { if (umem != dev->iotlb) { ret = -EFAULT; From a2907867e2c86067accd2f011d6f23ee5533aa6c Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Wed, 9 Nov 2022 16:42:13 +0100 Subject: [PATCH 046/543] vhost-vdpa: fix an iotlb memory leak [ Upstream commit c070c1912a83432530cbb4271d5b9b11fa36b67a ] Before commit 3d5698793897 ("vhost-vdpa: introduce asid based IOTLB") we called vhost_vdpa_iotlb_unmap(v, iotlb, 0ULL, 0ULL - 1) during release to free all the resources allocated when processing user IOTLB messages through vhost_vdpa_process_iotlb_update(). That commit changed the handling of IOTLB a bit, and we accidentally removed some code called during the release. We partially fixed this with commit 037d4305569a ("vhost-vdpa: call vhost_vdpa_cleanup during the release") but a potential memory leak is still there as showed by kmemleak if the application does not send VHOST_IOTLB_INVALIDATE or crashes: unreferenced object 0xffff888007fbaa30 (size 16): comm "blkio-bench", pid 914, jiffies 4294993521 (age 885.500s) hex dump (first 16 bytes): 40 73 41 07 80 88 ff ff 00 00 00 00 00 00 00 00 @sA............. backtrace: [<0000000087736d2a>] kmem_cache_alloc_trace+0x142/0x1c0 [<0000000060740f50>] vhost_vdpa_process_iotlb_msg+0x68c/0x901 [vhost_vdpa] [<0000000083e8e205>] vhost_chr_write_iter+0xc0/0x4a0 [vhost] [<000000008f2f414a>] vhost_vdpa_chr_write_iter+0x18/0x20 [vhost_vdpa] [<00000000de1cd4a0>] vfs_write+0x216/0x4b0 [<00000000a2850200>] ksys_write+0x71/0xf0 [<00000000de8e720b>] __x64_sys_write+0x19/0x20 [<0000000018b12cbb>] do_syscall_64+0x3f/0x90 [<00000000986ec465>] entry_SYSCALL_64_after_hwframe+0x63/0xcd Let's fix this calling vhost_vdpa_iotlb_unmap() on the whole range in vhost_vdpa_remove_as(). We move that call before vhost_dev_cleanup() since we need a valid v->vdev.mm in vhost_vdpa_pa_unmap(). vhost_iotlb_reset() call can be removed, since vhost_vdpa_iotlb_unmap() on the whole range removes all the entries. The kmemleak log reported was observed with a vDPA device that has `use_va` set to true (e.g. VDUSE). This patch has been tested with both types of devices. Fixes: 037d4305569a ("vhost-vdpa: call vhost_vdpa_cleanup during the release") Fixes: 3d5698793897 ("vhost-vdpa: introduce asid based IOTLB") Signed-off-by: Stefano Garzarella Message-Id: <20221109154213.146789-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vhost/vdpa.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 166044642fd5..b08e07fc7d1f 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -65,6 +65,10 @@ static DEFINE_IDA(vhost_vdpa_ida); static dev_t vhost_vdpa_major; +static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, + struct vhost_iotlb *iotlb, + u64 start, u64 last); + static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) { struct vhost_vdpa_as *as = container_of(iotlb, struct @@ -135,7 +139,7 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) return -EINVAL; hlist_del(&as->hash_link); - vhost_iotlb_reset(&as->iotlb); + vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1); kfree(as); return 0; @@ -1162,14 +1166,14 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v) struct vhost_vdpa_as *as; u32 asid; - vhost_dev_cleanup(&v->vdev); - kfree(v->vdev.vqs); - for (asid = 0; asid < v->vdpa->nas; asid++) { as = asid_to_as(v, asid); if (as) vhost_vdpa_remove_as(v, asid); } + + vhost_dev_cleanup(&v->vdev); + kfree(v->vdev.vqs); } static int vhost_vdpa_open(struct inode *inode, struct file *filep) From c72123cd79dd3b230014e209db368e4160e72dfb Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Thu, 10 Nov 2022 15:13:35 +0100 Subject: [PATCH 047/543] vdpa_sim: fix vringh initialization in vdpasim_queue_ready() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 794ec498c9fa79e6bfd71b931410d5897a9c00d4 ] When we initialize vringh, we should pass the features and the number of elements in the virtqueue negotiated with the driver, otherwise operations with vringh may fail. This was discovered in a case where the driver sets a number of elements in the virtqueue different from the value returned by .get_vq_num_max(). In vdpasim_vq_reset() is safe to initialize the vringh with default values, since the virtqueue will not be used until vdpasim_queue_ready() is called again. Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator") Signed-off-by: Stefano Garzarella Message-Id: <20221110141335.62171-1-sgarzare@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eugenio Pérez Signed-off-by: Sasha Levin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index b071f0d842fb..b20689f8fe89 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -67,8 +67,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) { struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, - VDPASIM_QUEUE_MAX, false, + vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, (struct vring_desc *)(uintptr_t)vq->desc_addr, (struct vring_avail *) (uintptr_t)vq->driver_addr, From 0871df190fe6723464efe0f493d476411616f553 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 14 Nov 2022 11:07:40 +0000 Subject: [PATCH 048/543] virtio-crypto: fix memory leak in virtio_crypto_alg_skcipher_close_session() [ Upstream commit b1d65f717cd6305a396a8738e022c6f7c65cfbe8 ] 'vc_ctrl_req' is alloced in virtio_crypto_alg_skcipher_close_session(), and should be freed in the invalid ctrl_status->status error handling case. Otherwise there is a memory leak. Fixes: 0756ad15b1fe ("virtio-crypto: use private buffer for control request") Signed-off-by: Wei Yongjun Message-Id: <20221114110740.537276-1-weiyongjun@huaweicloud.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Gonglei Acked-by: zhenwei pi Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/crypto/virtio/virtio_crypto_skcipher_algs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c index e553ccadbcbc..e5876286828b 100644 --- a/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c @@ -239,7 +239,8 @@ static int virtio_crypto_alg_skcipher_close_session( pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", ctrl_status->status, destroy_session->session_id); - return -EINVAL; + err = -EINVAL; + goto out; } err = 0; From 6ccc891f36d0c20ee220551caabdcd3886ec584b Mon Sep 17 00:00:00 2001 From: Rong Wang Date: Wed, 7 Dec 2022 20:08:13 +0800 Subject: [PATCH 049/543] vdpa/vp_vdpa: fix kfree a wrong pointer in vp_vdpa_remove [ Upstream commit ed843d6ed7310a27cf7c8ee0a82a482eed0cb4a6 ] In vp_vdpa_remove(), the code kfree(&vp_vdpa_mgtdev->mgtdev.id_table) uses a reference of pointer as the argument of kfree, which is the wrong pointer and then may hit crash like this: Unable to handle kernel paging request at virtual address 00ffff003363e30c Internal error: Oops: 96000004 [#1] SMP Call trace: rb_next+0x20/0x5c ext4_readdir+0x494/0x5c4 [ext4] iterate_dir+0x168/0x1b4 __se_sys_getdents64+0x68/0x170 __arm64_sys_getdents64+0x24/0x30 el0_svc_common.constprop.0+0x7c/0x1bc do_el0_svc+0x2c/0x94 el0_svc+0x20/0x30 el0_sync_handler+0xb0/0xb4 el0_sync+0x160/0x180 Code: 54000220 f9400441 b4000161 aa0103e0 (f9400821) SMP: stopping secondary CPUs Starting crashdump kernel... Fixes: ffbda8e9df10 ("vdpa/vp_vdpa : add vdpa tool support in vp_vdpa") Signed-off-by: Rong Wang Signed-off-by: Nanyong Sun Message-Id: <20221207120813.2837529-1-sunnanyong@huawei.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Cindy Lu Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index d448db0c4de3..8fe267ca3e76 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -647,7 +647,7 @@ static void vp_vdpa_remove(struct pci_dev *pdev) mdev = vp_vdpa_mgtdev->mdev; vp_modern_remove(mdev); vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev); - kfree(&vp_vdpa_mgtdev->mgtdev.id_table); + kfree(vp_vdpa_mgtdev->mgtdev.id_table); kfree(mdev); kfree(vp_vdpa_mgtdev); } From 16b22e27fba6fd816d0dcb98f42cc71f0836c27e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 13 Dec 2022 17:07:17 +0800 Subject: [PATCH 050/543] vdpasim: fix memory leak when freeing IOTLBs [ Upstream commit 0b7a04a30eef20e6b24926a45c0ce7906ae85bd6 ] After commit bda324fd037a ("vdpasim: control virtqueue support"), vdpasim->iommu became an array of IOTLB, so we should clean the mappings of each free one by one instead of just deleting the ranges in the first IOTLB which may leak maps. Fixes: bda324fd037a ("vdpasim: control virtqueue support") Cc: Gautam Dawar Signed-off-by: Jason Wang Message-Id: <20221213090717.61529-1-jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Gautam Dawar Signed-off-by: Sasha Levin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index b20689f8fe89..cb88891b44a8 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -689,7 +689,9 @@ static void vdpasim_free(struct vdpa_device *vdpa) } kvfree(vdpasim->buffer); - vhost_iotlb_free(vdpasim->iommu); + for (i = 0; i < vdpasim->dev_attr.nas; i++) + vhost_iotlb_reset(&vdpasim->iommu[i]); + kfree(vdpasim->iommu); kfree(vdpasim->vqs); kfree(vdpasim->config); } From 8aa897595bab34555245156abe3a5d0b9561484d Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 12 Dec 2022 10:42:15 +0200 Subject: [PATCH 051/543] net/mlx5: E-Switch, properly handle ingress tagged packets on VST [ Upstream commit 1f0ae22ab470946143485a02cc1cd7e05c0f9120 ] Fix SRIOV VST mode behavior to insert cvlan when a guest tag is already present in the frame. Previous VST mode behavior was to drop packets or override existing tag, depending on the device version. In this patch we fix this behavior by correctly building the HW steering rule with a push vlan action, or for older devices we ask the FW to stack the vlan when a vlan is already present. Fixes: 07bab9502641 ("net/mlx5: E-Switch, Refactor eswitch ingress acl codes") Fixes: dfcb1ed3c331 ("net/mlx5: E-Switch, Vport ingress/egress ACLs rules for VST mode") Signed-off-by: Moshe Shemesh Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/esw/acl/egress_lgcy.c | 7 +++- .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c | 33 ++++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 30 ++++++++++++----- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 6 ++++ include/linux/mlx5/device.h | 5 +++ include/linux/mlx5/mlx5_ifc.h | 3 +- 6 files changed, 68 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 60a73990017c..6b4c9ffad95b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -67,6 +67,7 @@ static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport) int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + bool vst_mode_steering = esw_vst_mode_is_steering(esw); struct mlx5_flow_destination drop_ctr_dst = {}; struct mlx5_flow_destination *dst = NULL; struct mlx5_fc *drop_counter = NULL; @@ -77,6 +78,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, */ int table_size = 2; int dest_num = 0; + int actions_flag; int err = 0; if (vport->egress.legacy.drop_counter) { @@ -119,8 +121,11 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, vport->vport, vport->info.vlan, vport->info.qos); /* Allowed vlan rule */ + actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + if (vst_mode_steering) + actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan, - MLX5_FLOW_CONTEXT_ACTION_ALLOW); + actions_flag); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index b1a5199260f6..093ed86a0acd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -139,11 +139,14 @@ static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport) int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + bool vst_mode_steering = esw_vst_mode_is_steering(esw); struct mlx5_flow_destination drop_ctr_dst = {}; struct mlx5_flow_destination *dst = NULL; struct mlx5_flow_act flow_act = {}; struct mlx5_flow_spec *spec = NULL; struct mlx5_fc *counter = NULL; + bool vst_check_cvlan = false; + bool vst_push_cvlan = false; /* The ingress acl table contains 4 groups * (2 active rules at the same time - * 1 allow rule from one of the first 3 groups. @@ -203,7 +206,26 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, goto out; } - if (vport->info.vlan || vport->info.qos) + if ((vport->info.vlan || vport->info.qos)) { + if (vst_mode_steering) + vst_push_cvlan = true; + else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always)) + vst_check_cvlan = true; + } + + if (vst_check_cvlan || vport->info.spoofchk) + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + /* Create ingress allow rule */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; + if (vst_push_cvlan) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + flow_act.vlan[0].prio = vport->info.qos; + flow_act.vlan[0].vid = vport->info.vlan; + flow_act.vlan[0].ethtype = ETH_P_8021Q; + } + + if (vst_check_cvlan) MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag); @@ -218,9 +240,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, ether_addr_copy(smac_v, vport->info.mac); } - /* Create ingress allow rule */ - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec, &flow_act, NULL, 0); if (IS_ERR(vport->ingress.allow_rule)) { @@ -232,6 +251,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, goto out; } + if (!vst_check_cvlan && !vport->info.spoofchk) + goto out; + memset(&flow_act, 0, sizeof(flow_act)); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach drop flow counter */ @@ -257,7 +279,8 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, return 0; out: - esw_acl_ingress_lgcy_cleanup(esw, vport); + if (err) + esw_acl_ingress_lgcy_cleanup(esw, vport); kvfree(spec); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 374e3fbdc2cf..788a6ab5c463 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -161,10 +161,17 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport, esw_vport_context.vport_cvlan_strip, 1); if (set_flags & SET_VLAN_INSERT) { - /* insert only if no vlan in packet */ - MLX5_SET(modify_esw_vport_context_in, in, - esw_vport_context.vport_cvlan_insert, 1); - + if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) { + /* insert either if vlan exist in packet or not */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, + MLX5_VPORT_CVLAN_INSERT_ALWAYS); + } else { + /* insert only if no vlan in packet */ + MLX5_SET(modify_esw_vport_context_in, in, + esw_vport_context.vport_cvlan_insert, + MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN); + } MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.cvlan_pcp, qos); MLX5_SET(modify_esw_vport_context_in, in, @@ -774,6 +781,7 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + bool vst_mode_steering = esw_vst_mode_is_steering(esw); u16 vport_num = vport->vport; int flags; int err; @@ -800,8 +808,9 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) flags = (vport->info.vlan || vport->info.qos) ? SET_VLAN_STRIP | SET_VLAN_INSERT : 0; - modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, - vport->info.qos, flags); + if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, + vport->info.qos, flags); return 0; } @@ -1805,6 +1814,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags) { struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + bool vst_mode_steering = esw_vst_mode_is_steering(esw); int err = 0; if (IS_ERR(evport)) @@ -1812,9 +1822,11 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, if (vlan > 4095 || qos > 7) return -EINVAL; - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); - if (err) - return err; + if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) { + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); + if (err) + return err; + } evport->info.vlan = vlan; evport->info.qos = qos; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 3029bc1c0dd0..5db76af35d3f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -518,6 +518,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags); +static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw) +{ + return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) && + MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan)); +} + static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev, u8 vlan_depth) { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 1ff91cb79ded..2383076a7306 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1085,6 +1085,11 @@ enum { MLX5_VPORT_ADMIN_STATE_AUTO = 0x2, }; +enum { + MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN = 0x1, + MLX5_VPORT_CVLAN_INSERT_ALWAYS = 0x3, +}; + enum { MLX5_L3_PROT_TYPE_IPV4 = 0, MLX5_L3_PROT_TYPE_IPV6 = 1, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5a4e914e2a6f..e45bdec73baf 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -907,7 +907,8 @@ struct mlx5_ifc_e_switch_cap_bits { u8 vport_svlan_insert[0x1]; u8 vport_cvlan_insert_if_not_exist[0x1]; u8 vport_cvlan_insert_overwrite[0x1]; - u8 reserved_at_5[0x2]; + u8 reserved_at_5[0x1]; + u8 vport_cvlan_insert_always[0x1]; u8 esw_shared_ingress_acl[0x1]; u8 esw_uplink_ingress_acl[0x1]; u8 root_ft_on_other_esw[0x1]; From 3c29ad4f2cc510f5730e5ffbcbb059c5393a0b27 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 18 Oct 2022 12:51:52 +0200 Subject: [PATCH 052/543] net/mlx5: Add forgotten cleanup calls into mlx5_init_once() error path [ Upstream commit 2a35b2c2e6a252eda2134aae6a756861d9299531 ] There are two cleanup calls missing in mlx5_init_once() error path. Add them making the error path flow to be the same as mlx5_cleanup_once(). Fixes: 52ec462eca9b ("net/mlx5: Add reserved-gids support") Fixes: 7c39afb394c7 ("net/mlx5: PTP code migration to driver core section") Signed-off-by: Jiri Pirko Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index e58775a7d955..6776bf5b8d55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1051,6 +1051,8 @@ err_rl_cleanup: err_tables_cleanup: mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); + mlx5_cleanup_clock(dev); + mlx5_cleanup_reserved_gids(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_fw_reset_cleanup(dev); err_events_cleanup: From 95a327eaebe2e8b0e0cdab121c0d798919ec3244 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 18 Dec 2022 12:42:14 +0200 Subject: [PATCH 053/543] net/mlx5: Fix io_eq_size and event_eq_size params validation [ Upstream commit 44aee8ea15ac205490a41b00cbafcccbf9f7f82b ] io_eq_size and event_eq_size params are of param type DEVLINK_PARAM_TYPE_U32. But, the validation callback is addressing them as DEVLINK_PARAM_TYPE_U16. This cause mismatch in validation in big-endian systems, in which values in range were rejected while 268500991 was accepted. Fix it by checking the U32 value in the validation callback. Fixes: 0844fa5f7b89 ("net/mlx5: Let user configure io_eq_size param") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 66c6a7017695..9e4e8d551884 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -563,7 +563,7 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack) { - return (val.vu16 >= 64 && val.vu16 <= 4096) ? 0 : -EINVAL; + return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL; } static const struct devlink_param mlx5_devlink_params[] = { From e639bc695f05335c11a5e387b6ad44249b35d673 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Thu, 24 Nov 2022 13:34:12 +0200 Subject: [PATCH 054/543] net/mlx5: Avoid recovery in probe flows [ Upstream commit 9078e843efec530f279a155f262793c58b0746bd ] Currently, recovery is done without considering whether the device is still in probe flow. This may lead to recovery before device have finished probed successfully. e.g.: while mlx5_init_one() is running. Recovery flow is using functionality that is loaded only by mlx5_init_one(), and there is no point in running recovery without mlx5_init_one() finished successfully. Fix it by waiting for probe flow to finish and checking whether the device is probed before trying to perform recovery. Fixes: 51d138c2610a ("net/mlx5: Fix health error state handling") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 86ed87d704f7..96417c5feed7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -674,6 +674,12 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) dev = container_of(priv, struct mlx5_core_dev, priv); devlink = priv_to_devlink(dev); + mutex_lock(&dev->intf_state_mutex); + if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) { + mlx5_core_err(dev, "health works are not permitted at this stage\n"); + return; + } + mutex_unlock(&dev->intf_state_mutex); enter_error_state(dev, false); if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { devl_lock(devlink); From 2799cbdedb6048603d98b9d651ad4a48e66b8b24 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 9 Nov 2022 14:42:59 +0200 Subject: [PATCH 055/543] net/mlx5: Fix RoCE setting at HCA level [ Upstream commit c4ad5f2bdad56265b23d3635494ecdb205431807 ] mlx5 PF can disable RoCE for its VFs and SFs. In such case RoCE is marked as unsupported on those VFs/SFs. The cited patch added an option for disable (and enable) RoCE at HCA level. However, that commit didn't check whether RoCE is supported on the HCA and enabled user to try and set RoCE to on. Fix it by checking whether the HCA supports RoCE. Fixes: fbfa97b4d79f ("net/mlx5: Disable roce at HCA level") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 9e4e8d551884..97e9ec44a759 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -468,7 +468,7 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, bool new_state = val.vbool; if (new_state && !MLX5_CAP_GEN(dev, roce) && - !MLX5_CAP_GEN(dev, roce_rw_supported)) { + !(MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce))) { NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6776bf5b8d55..00758312df06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -614,7 +614,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); - if (MLX5_CAP_GEN(dev, roce_rw_supported)) + if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce)) MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_on(dev)); From 53d051e2a235f441c946c7dd0d9d68bd548f2944 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 28 Nov 2022 15:24:21 +0200 Subject: [PATCH 056/543] net/mlx5e: IPoIB, Don't allow CQE compression to be turned on by default [ Upstream commit b12d581e83e3ae1080c32ab83f123005bd89a840 ] mlx5e_build_nic_params will turn CQE compression on if the hardware capability is enabled and the slow_pci_heuristic condition is detected. As IPoIB doesn't support CQE compression, make sure to disable the feature in the IPoIB profile init. Please note that the feature is not exposed to the user for IPoIB interfaces, so it can't be subsequently turned on. Fixes: b797a684b0dd ("net/mlx5e: Enable CQE compression when PCI is slower than link") Signed-off-by: Dragos Tatulea Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 4e3a75496dd9..84f5352b0ce1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -71,6 +71,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, params->packet_merge.type = MLX5E_PACKET_MERGE_NONE; params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; params->tunneled_offload_en = false; + + /* CQE compression is not supported for IPoIB */ + params->rx_cqe_compress_def = false; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); } /* Called directly after IPoIB netdevice was created to initialize SW structs */ From 38e1a340f741720c3c23d2d47553f601ae5b0d89 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 27 Nov 2022 09:21:28 +0200 Subject: [PATCH 057/543] net/mlx5e: Fix RX reporter for XSK RQs [ Upstream commit f8c18a5749cf917096f75dd59885b7a0fe9298ba ] RX reporter mistakenly reads from the regular (inactive) RQ when XSK RQ is active. Fix it here. Fixes: 3db4c85cde7a ("net/mlx5e: xsk: Use queue indices starting from 0 for XSK queues") Signed-off-by: Tariq Toukan Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 5f6f95ad6888..1ae15b8536a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -459,7 +459,11 @@ static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter, goto unlock; for (i = 0; i < priv->channels.num; i++) { - struct mlx5e_rq *rq = &priv->channels.c[i]->rq; + struct mlx5e_channel *c = priv->channels.c[i]; + struct mlx5e_rq *rq; + + rq = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state) ? + &c->xskrq : &c->rq; err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg); if (err) From 7d88b67d6bec4ea4f79de4f4e8928eea7c52bfe9 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 28 Nov 2022 13:54:29 +0800 Subject: [PATCH 058/543] net/mlx5e: CT: Fix ct debugfs folder name [ Upstream commit 849190e3e4ccf452fbe2240eace30a9ca83fb8d2 ] Need to use sprintf to build a string instead of sscanf. Otherwise dirname is null and both "ct_nic" and "ct_fdb" won't be created. But its redundant anyway as driver could be in switchdev mode but still add nic rules. So use "ct" as folder name. Fixes: 77422a8f6f61 ("net/mlx5e: CT: Add ct driver counters") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 864ce0c393e6..f01f7dfdbcf8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2080,14 +2080,9 @@ out_err: static void mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv) { - bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB; struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs; - char dirname[16] = {}; - if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0) - return; - - ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev)); + ct_dbgfs->root = debugfs_create_dir("ct", mlx5_debugfs_get_dev_root(ct_priv->dev)); debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root, &ct_dbgfs->stats.offloaded); debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root, From c6e6fb4240319b804cdd03fdb7d8787271330b35 Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 5 Dec 2022 09:22:50 +0800 Subject: [PATCH 059/543] net/mlx5e: Always clear dest encap in neigh-update-del [ Upstream commit 2951b2e142ecf6e0115df785ba91e91b6da74602 ] The cited commit introduced a bug for multiple encapsulations flow. If one dest encap becomes invalid, the flow is set slow path flag. But when other dests encap become invalid, they are not cleared due to slow path flag of the flow. When neigh-update-add is running, it will use invalid encap. Fix it by checking slow path flag after clearing dest encap. Fixes: 9a5f9cc794e1 ("net/mlx5e: Fix possible use-after-free deleting fdb rule") Signed-off-by: Chris Mi Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index ff73d25bc6eb..2aaf8ab857b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -222,7 +222,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, int err; list_for_each_entry(flow, flow_list, tmp_list) { - if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) + if (!mlx5e_is_offloaded_flow(flow)) continue; attr = mlx5e_tc_get_encap_attr(flow); @@ -231,6 +231,13 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; + /* Clear pkt_reformat before checking slow path flag. Because + * in next iteration, the same flow is already set slow path + * flag, but still need to clear the pkt_reformat. + */ + if (flow_flag_test(flow, SLOW)) + continue; + /* update from encap rule to slow path rule */ spec = &flow->attr->parse_attr->spec; rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); From cacda6f9757f13ed129f93de31e40af870e19c44 Mon Sep 17 00:00:00 2001 From: Adham Faris Date: Wed, 14 Dec 2022 16:02:57 +0200 Subject: [PATCH 060/543] net/mlx5e: Fix hw mtu initializing at XDP SQ allocation [ Upstream commit 1e267ab88dc44c48f556218f7b7f14c76f7aa066 ] Current xdp xmit functions logic (mlx5e_xmit_xdp_frame_mpwqe or mlx5e_xmit_xdp_frame), validates xdp packet length by comparing it to hw mtu (configured at xdp sq allocation) before xmiting it. This check does not account for ethernet fcs length (calculated and filled by the nic). Hence, when we try sending packets with length > (hw-mtu - ethernet-fcs-size), the device port drops it and tx_errors_phy is incremented. Desired behavior is to catch these packets and drop them by the driver. Fix this behavior in XDP SQ allocation function (mlx5e_alloc_xdpsq) by subtracting ethernet FCS header size (4 Bytes) from current hw mtu value, since ethernet FCS is calculated and written to ethernet frames by the nic. Fixes: d8bec2b29a82 ("net/mlx5e: Support bpf_xdp_adjust_head()") Signed-off-by: Adham Faris Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5e41dfdf79c8..951ede433813 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1298,7 +1298,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->channel = c; sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; - sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; sq->xsk_pool = xsk_pool; sq->stats = sq->xsk_pool ? From c1103a60713450ff7e0d00cfb12970609b9e79e4 Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Sun, 1 Aug 2021 14:45:17 +0300 Subject: [PATCH 061/543] net/mlx5e: Set geneve_tlv_option_0_exist when matching on geneve option [ Upstream commit e54638a8380bd9c146a883035fffd0a821813682 ] The cited patch added support of matching on geneve option by setting geneve_tlv_option_0_data mask and key but didn't set geneve_tlv_option_0_exist bit which is required on some HWs when matching geneve_tlv_option_0_data parameter, this may cause in some cases for packets to wrongly match on rules with different geneve option. Example of such case is packet with geneve_tlv_object class=789 and data=456 will wrongly match on rule with match geneve_tlv_object class=123 and data=456. Fix it by setting geneve_tlv_option_0_exist bit when supported by the HW when matching on geneve_tlv_option_0_data parameter. Fixes: 9272e3df3023 ("net/mlx5e: Geneve, Add support for encap/decap flows offload") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index f5b26f5a7de4..054d80c4e65c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -273,6 +273,11 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, geneve_tlv_option_0_data, be32_to_cpu(opt_data_key)); MLX5_SET(fte_match_set_misc3, misc_3_c, geneve_tlv_option_0_data, be32_to_cpu(opt_data_mask)); + if (MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, + ft_field_support.geneve_tlv_option_0_exist)) { + MLX5_SET_TO_ONES(fte_match_set_misc, misc_c, geneve_tlv_option_0_exist); + MLX5_SET_TO_ONES(fte_match_set_misc, misc_v, geneve_tlv_option_0_exist); + } spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; From 8f1b8b3133504bf9125ee507ddcc3a8fb41a41f0 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 15 Dec 2022 14:28:34 +0200 Subject: [PATCH 062/543] net/mlx5: Lag, fix failure to cancel delayed bond work [ Upstream commit 4d1c1379d71777ddeda3e54f8fc26e9ecbfd1009 ] Commit 0d4e8ed139d8 ("net/mlx5: Lag, avoid lockdep warnings") accidentally removed a call to cancel delayed bond work thus it may cause queued delay to expire and fall on an already destroyed work queue. Fix by restoring the call cancel_delayed_work_sync() before destroying the workqueue. This prevents call trace such as this: [ 329.230417] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 329.231444] #PF: supervisor write access in kernel mode [ 329.232233] #PF: error_code(0x0002) - not-present page [ 329.233007] PGD 0 P4D 0 [ 329.233476] Oops: 0002 [#1] SMP [ 329.234012] CPU: 5 PID: 145 Comm: kworker/u20:4 Tainted: G OE 6.0.0-rc5_mlnx #1 [ 329.235282] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 329.236868] Workqueue: mlx5_cmd_0000:08:00.1 cmd_work_handler [mlx5_core] [ 329.237886] RIP: 0010:_raw_spin_lock+0xc/0x20 [ 329.238585] Code: f0 0f b1 17 75 02 f3 c3 89 c6 e9 6f 3c 5f ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 0f 1f 44 00 00 31 c0 ba 01 00 00 00 0f b1 17 75 02 f3 c3 89 c6 e9 45 3c 5f ff 0f 1f 44 00 00 0f 1f [ 329.241156] RSP: 0018:ffffc900001b0e98 EFLAGS: 00010046 [ 329.241940] RAX: 0000000000000000 RBX: ffffffff82374ae0 RCX: 0000000000000000 [ 329.242954] RDX: 0000000000000001 RSI: 0000000000000014 RDI: 0000000000000000 [ 329.243974] RBP: ffff888106ccf000 R08: ffff8881004000c8 R09: ffff888100400000 [ 329.244990] R10: 0000000000000000 R11: ffffffff826669f8 R12: 0000000000002000 [ 329.246009] R13: 0000000000000005 R14: ffff888100aa7ce0 R15: ffff88852ca80000 [ 329.247030] FS: 0000000000000000(0000) GS:ffff88852ca80000(0000) knlGS:0000000000000000 [ 329.248260] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 329.249111] CR2: 0000000000000000 CR3: 000000016d675001 CR4: 0000000000770ee0 [ 329.250133] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 329.251152] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 329.252176] PKRU: 55555554 Fixes: 0d4e8ed139d8 ("net/mlx5: Lag, avoid lockdep warnings") Signed-off-by: Eli Cohen Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 32c3e0a649a7..ad32b80e8501 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -228,6 +228,7 @@ static void mlx5_ldev_free(struct kref *ref) if (ldev->nb.notifier_call) unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); + cancel_delayed_work_sync(&ldev->bond_work); destroy_workqueue(ldev->wq); mlx5_lag_mpesw_cleanup(ldev); mutex_destroy(&ldev->lock); From a1f2920a051fc3dd7e2cdc316bb2f6bb6bb75cf2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 23 Dec 2022 10:28:44 -0800 Subject: [PATCH 063/543] bpf: Always use maximal size for copy_array() [ Upstream commit 45435d8da71f9f3e6860e6e6ea9667b6ec17ec64 ] Instead of counting on prior allocations to have sized allocations to the next kmalloc bucket size, always perform a krealloc that is at least ksize(dst) in size (which is a no-op), so the size can be correctly tracked by all the various allocation size trackers (KASAN, __alloc_size, etc). Reported-by: Hyunwoo Kim Link: https://lore.kernel.org/bpf/20221223094551.GA1439509@ubuntu Fixes: ceb35b666d42 ("bpf/verifier: Use kmalloc_size_roundup() to match ksize() usage") Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: John Fastabend Cc: Andrii Nakryiko Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: KP Singh Cc: Stanislav Fomichev Cc: Hao Luo Cc: Jiri Olsa Cc: bpf@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221223182836.never.866-kees@kernel.org Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 242fe307032f..b4d5b343c191 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1000,6 +1000,8 @@ static void print_insn_state(struct bpf_verifier_env *env, */ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags) { + size_t alloc_bytes; + void *orig = dst; size_t bytes; if (ZERO_OR_NULL_PTR(src)) @@ -1008,11 +1010,11 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL; - if (ksize(dst) < ksize(src)) { - kfree(dst); - dst = kmalloc_track_caller(kmalloc_size_roundup(bytes), flags); - if (!dst) - return NULL; + alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes)); + dst = krealloc(orig, alloc_bytes, flags); + if (!dst) { + kfree(orig); + return NULL; } memcpy(dst, src, bytes); From 01ca3695f19a7c535474ec6bc2064bd83dbeba63 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 26 Dec 2022 22:27:52 +0900 Subject: [PATCH 064/543] tcp: Add TIME_WAIT sockets in bhash2. [ Upstream commit 936a192f974018b4f6040f6f77b1cc1e75bd8666 ] Jiri Slaby reported regression of bind() with a simple repro. [0] The repro creates a TIME_WAIT socket and tries to bind() a new socket with the same local address and port. Before commit 28044fc1d495 ("net: Add a bhash2 table hashed by port and address"), the bind() failed with -EADDRINUSE, but now it succeeds. The cited commit should have put TIME_WAIT sockets into bhash2; otherwise, inet_bhash2_conflict() misses TIME_WAIT sockets when validating bind() requests if the address is not a wildcard one. The straight option is to move sk_bind2_node from struct sock to struct sock_common to add twsk to bhash2 as implemented as RFC. [1] However, the binary layout change in the struct sock could affect performances moving hot fields on different cachelines. To avoid that, we add another TIME_WAIT list in inet_bind2_bucket and check it while validating bind(). [0]: https://lore.kernel.org/netdev/6b971a4e-c7d8-411e-1f92-fda29b5b2fb9@kernel.org/ [1]: https://lore.kernel.org/netdev/20221221151258.25748-2-kuniyu@amazon.com/ Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address") Reported-by: Jiri Slaby Suggested-by: Paolo Abeni Signed-off-by: Kuniyuki Iwashima Acked-by: Joanne Koong Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/net/inet_hashtables.h | 4 ++++ include/net/inet_timewait_sock.h | 5 +++++ net/ipv4/inet_connection_sock.c | 28 +++++++++++++++++++++++----- net/ipv4/inet_hashtables.c | 8 +++++--- net/ipv4/inet_timewait_sock.c | 31 +++++++++++++++++++++++++++++-- 5 files changed, 66 insertions(+), 10 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 69174093078f..99bd823e97f6 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -108,6 +108,10 @@ struct inet_bind2_bucket { struct hlist_node node; /* List of sockets hashed to this bucket */ struct hlist_head owners; + /* bhash has twsk in owners, but bhash2 has twsk in + * deathrow not to add a member in struct sock_common. + */ + struct hlist_head deathrow; }; static inline struct net *ib_net(const struct inet_bind_bucket *ib) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 5b47545f22d3..4a8e578405cb 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -73,9 +73,14 @@ struct inet_timewait_sock { u32 tw_priority; struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; + struct inet_bind2_bucket *tw_tb2; + struct hlist_node tw_bind2_node; }; #define tw_tclass tw_tos +#define twsk_for_each_bound_bhash2(__tw, list) \ + hlist_for_each_entry(__tw, list, tw_bind2_node) + static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) { return (struct inet_timewait_sock *)sk; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4a34bc7cb15e..0465ada82799 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -173,22 +173,40 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2, return false; } +static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2, + kuid_t sk_uid, bool relax, + bool reuseport_cb_ok, bool reuseport_ok) +{ + if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) + return false; + + return inet_bind_conflict(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok); +} + static bool inet_bhash2_conflict(const struct sock *sk, const struct inet_bind2_bucket *tb2, kuid_t sk_uid, bool relax, bool reuseport_cb_ok, bool reuseport_ok) { + struct inet_timewait_sock *tw2; struct sock *sk2; sk_for_each_bound_bhash2(sk2, &tb2->owners) { - if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) - continue; - - if (inet_bind_conflict(sk, sk2, sk_uid, relax, - reuseport_cb_ok, reuseport_ok)) + if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok)) return true; } + + twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) { + sk2 = (struct sock *)tw2; + + if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax, + reuseport_cb_ok, reuseport_ok)) + return true; + } + return false; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3cec471a2cd2..67f5e5440802 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -116,6 +116,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb, #endif tb->rcv_saddr = sk->sk_rcv_saddr; INIT_HLIST_HEAD(&tb->owners); + INIT_HLIST_HEAD(&tb->deathrow); hlist_add_head(&tb->node, &head->chain); } @@ -137,7 +138,7 @@ struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, /* Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) { - if (hlist_empty(&tb->owners)) { + if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) { __hlist_del(&tb->node); kmem_cache_free(cachep, tb); } @@ -1103,15 +1104,16 @@ ok: /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, tb2, port); - spin_unlock(&head2->lock); - if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); + + spin_unlock(&head2->lock); spin_unlock(&head->lock); + if (tw) inet_twsk_deschedule_put(tw); local_bh_enable(); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 66fc940f9521..1d77d992e6e7 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -29,6 +29,7 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) { + struct inet_bind2_bucket *tb2 = tw->tw_tb2; struct inet_bind_bucket *tb = tw->tw_tb; if (!tb) @@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + + __hlist_del(&tw->tw_bind2_node); + tw->tw_tb2 = NULL; + inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); + __sock_put((struct sock *)tw); } @@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) { struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); - struct inet_bind_hashbucket *bhead; + struct inet_bind_hashbucket *bhead, *bhead2; spin_lock(lock); sk_nulls_del_node_init_rcu((struct sock *)tw); @@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, hashinfo->bhash_size)]; + bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw, + twsk_net(tw), tw->tw_num); spin_lock(&bhead->lock); + spin_lock(&bhead2->lock); inet_twsk_bind_unhash(tw, hashinfo); + spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); refcount_dec(&tw->tw_dr->tw_refcount); @@ -93,6 +103,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, hlist_add_head(&tw->tw_bind_node, list); } +static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind2_node, list); +} + /* * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it @@ -105,17 +121,28 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - struct inet_bind_hashbucket *bhead; + struct inet_bind_hashbucket *bhead, *bhead2; + /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num, hashinfo->bhash_size)]; + bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num); + spin_lock(&bhead->lock); + spin_lock(&bhead2->lock); + tw->tw_tb = icsk->icsk_bind_hash; WARN_ON(!icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); + + tw->tw_tb2 = icsk->icsk_bind2_hash; + WARN_ON(!icsk->icsk_bind2_hash); + inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow); + + spin_unlock(&bhead2->lock); spin_unlock(&bhead->lock); spin_lock(lock); From 1f73316755457713ca45c1a398779cf8d70383d0 Mon Sep 17 00:00:00 2001 From: Jian Shen Date: Wed, 28 Dec 2022 14:27:49 +0800 Subject: [PATCH 065/543] net: hns3: refine the handling for VF heartbeat [ Upstream commit fec7352117fa301bfbc31bacc14bb9a579376b36 ] Currently, the PF check the VF alive by the KEEP_ALVE mailbox from VF. VF keep sending the mailbox per 2 seconds. Once PF lost the mailbox for more than 8 seconds, it will regards the VF is abnormal, and stop notifying the state change to VF, include link state, vf mac, reset, even though it receives the KEEP_ALIVE mailbox again. It's inreasonable. This patch fixes it. PF will record the state change which need to notify VF when lost the VF's KEEP_ALIVE mailbox. And notify VF when receive the mailbox again. Introduce a new flag HCLGE_VPORT_STATE_INITED, used to distinguish the case whether VF driver loaded or not. For VF will query these states when initializing, so it's unnecessary to notify it in this case. Fixes: aa5c4f175be6 ("net: hns3: add reset handling for VF when doing PF reset") Signed-off-by: Jian Shen Signed-off-by: Hao Lan Reported-by: kernel test robot Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../hisilicon/hns3/hns3pf/hclge_main.c | 57 +++++++++++---- .../hisilicon/hns3/hns3pf/hclge_main.h | 7 ++ .../hisilicon/hns3/hns3pf/hclge_mbx.c | 71 ++++++++++++++++--- 3 files changed, 112 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 6c2742f59c77..07ad5f35219e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3910,9 +3910,17 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset) return ret; } - if (!reset || !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) + if (!reset || + !test_bit(HCLGE_VPORT_STATE_INITED, &vport->state)) continue; + if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state) && + hdev->reset_type == HNAE3_FUNC_RESET) { + set_bit(HCLGE_VPORT_NEED_NOTIFY_RESET, + &vport->need_notify); + continue; + } + /* Inform VF to process the reset. * hclge_inform_reset_assert_to_vf may fail if VF * driver is not loaded. @@ -4609,18 +4617,25 @@ static void hclge_reset_service_task(struct hclge_dev *hdev) static void hclge_update_vport_alive(struct hclge_dev *hdev) { +#define HCLGE_ALIVE_SECONDS_NORMAL 8 + + unsigned long alive_time = HCLGE_ALIVE_SECONDS_NORMAL * HZ; int i; /* start from vport 1 for PF is always alive */ for (i = 1; i < hdev->num_alloc_vport; i++) { struct hclge_vport *vport = &hdev->vport[i]; - if (time_after(jiffies, vport->last_active_jiffies + 8 * HZ)) + if (!test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) || + !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) + continue; + if (time_after(jiffies, vport->last_active_jiffies + + alive_time)) { clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); - - /* If vf is not alive, set to default value */ - if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) - vport->mps = HCLGE_MAC_DEFAULT_FRAME; + dev_warn(&hdev->pdev->dev, + "VF %u heartbeat timeout\n", + i - HCLGE_VF_VPORT_START_NUM); + } } } @@ -8064,9 +8079,11 @@ int hclge_vport_start(struct hclge_vport *vport) { struct hclge_dev *hdev = vport->back; + set_bit(HCLGE_VPORT_STATE_INITED, &vport->state); set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); set_bit(HCLGE_VPORT_STATE_PROMISC_CHANGE, &vport->state); vport->last_active_jiffies = jiffies; + vport->need_notify = 0; if (test_bit(vport->vport_id, hdev->vport_config_block)) { if (vport->vport_id) { @@ -8084,7 +8101,9 @@ int hclge_vport_start(struct hclge_vport *vport) void hclge_vport_stop(struct hclge_vport *vport) { + clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state); clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + vport->need_notify = 0; } static int hclge_client_start(struct hnae3_handle *handle) @@ -9208,7 +9227,8 @@ static int hclge_set_vf_mac(struct hnae3_handle *handle, int vf, return 0; } - dev_info(&hdev->pdev->dev, "MAC of VF %d has been set to %s\n", + dev_info(&hdev->pdev->dev, + "MAC of VF %d has been set to %s, will be active after VF reset\n", vf, format_mac_addr); return 0; } @@ -10465,12 +10485,16 @@ static int hclge_set_vf_vlan_filter(struct hnae3_handle *handle, int vfid, * for DEVICE_VERSION_V3, vf doesn't need to know about the port based * VLAN state. */ - if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 && - test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) - (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0], - vport->vport_id, - state, &vlan_info); - + if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3) { + if (test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) + (void)hclge_push_vf_port_base_vlan_info(&hdev->vport[0], + vport->vport_id, + state, + &vlan_info); + else + set_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, + &vport->need_notify); + } return 0; } @@ -11941,7 +11965,7 @@ static void hclge_reset_vport_state(struct hclge_dev *hdev) int i; for (i = 0; i < hdev->num_alloc_vport; i++) { - hclge_vport_stop(vport); + clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); vport++; } } @@ -12955,6 +12979,11 @@ static void hclge_clear_vport_vf_info(struct hclge_vport *vport, int vfid) struct hclge_vlan_info vlan_info; int ret; + clear_bit(HCLGE_VPORT_STATE_INITED, &vport->state); + clear_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + vport->need_notify = 0; + vport->mps = 0; + /* after disable sriov, clean VF rate configured by PF */ ret = hclge_tm_qs_shaper_cfg(vport, 0); if (ret) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 495b639b0dc2..13f23d606e77 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -995,9 +995,15 @@ enum HCLGE_VPORT_STATE { HCLGE_VPORT_STATE_MAC_TBL_CHANGE, HCLGE_VPORT_STATE_PROMISC_CHANGE, HCLGE_VPORT_STATE_VLAN_FLTR_CHANGE, + HCLGE_VPORT_STATE_INITED, HCLGE_VPORT_STATE_MAX }; +enum HCLGE_VPORT_NEED_NOTIFY { + HCLGE_VPORT_NEED_NOTIFY_RESET, + HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, +}; + struct hclge_vlan_info { u16 vlan_proto; /* so far support 802.1Q only */ u16 qos; @@ -1044,6 +1050,7 @@ struct hclge_vport { struct hnae3_handle roce; unsigned long state; + unsigned long need_notify; unsigned long last_active_jiffies; u32 mps; /* Max packet size */ struct hclge_vf_info vf_info; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index a7b06c63143c..04ff9bf12185 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -124,17 +124,26 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len, return status; } +static int hclge_inform_vf_reset(struct hclge_vport *vport, u16 reset_type) +{ + __le16 msg_data; + u8 dest_vfid; + + dest_vfid = (u8)vport->vport_id; + msg_data = cpu_to_le16(reset_type); + + /* send this requested info to VF */ + return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data), + HCLGE_MBX_ASSERTING_RESET, dest_vfid); +} + int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) { struct hclge_dev *hdev = vport->back; - __le16 msg_data; u16 reset_type; - u8 dest_vfid; BUILD_BUG_ON(HNAE3_MAX_RESET > U16_MAX); - dest_vfid = (u8)vport->vport_id; - if (hdev->reset_type == HNAE3_FUNC_RESET) reset_type = HNAE3_VF_PF_FUNC_RESET; else if (hdev->reset_type == HNAE3_FLR_RESET) @@ -142,11 +151,7 @@ int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport) else reset_type = HNAE3_VF_FUNC_RESET; - msg_data = cpu_to_le16(reset_type); - - /* send this requested info to VF */ - return hclge_send_mbx_msg(vport, (u8 *)&msg_data, sizeof(msg_data), - HCLGE_MBX_ASSERTING_RESET, dest_vfid); + return hclge_inform_vf_reset(vport, reset_type); } static void hclge_free_vector_ring_chain(struct hnae3_ring_chain_node *head) @@ -652,9 +657,56 @@ static int hclge_reset_vf(struct hclge_vport *vport) return hclge_func_reset_cmd(hdev, vport->vport_id); } +static void hclge_notify_vf_config(struct hclge_vport *vport) +{ + struct hclge_dev *hdev = vport->back; + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + struct hclge_port_base_vlan_config *vlan_cfg; + int ret; + + hclge_push_vf_link_status(vport); + if (test_bit(HCLGE_VPORT_NEED_NOTIFY_RESET, &vport->need_notify)) { + ret = hclge_inform_vf_reset(vport, HNAE3_VF_PF_FUNC_RESET); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to inform VF %u reset!", + vport->vport_id - HCLGE_VF_VPORT_START_NUM); + return; + } + vport->need_notify = 0; + return; + } + + if (ae_dev->dev_version < HNAE3_DEVICE_VERSION_V3 && + test_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify)) { + vlan_cfg = &vport->port_base_vlan_cfg; + ret = hclge_push_vf_port_base_vlan_info(&hdev->vport[0], + vport->vport_id, + vlan_cfg->state, + &vlan_cfg->vlan_info); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to inform VF %u port base vlan!", + vport->vport_id - HCLGE_VF_VPORT_START_NUM); + return; + } + clear_bit(HCLGE_VPORT_NEED_NOTIFY_VF_VLAN, &vport->need_notify); + } +} + static void hclge_vf_keep_alive(struct hclge_vport *vport) { + struct hclge_dev *hdev = vport->back; + vport->last_active_jiffies = jiffies; + + if (test_bit(HCLGE_VPORT_STATE_INITED, &vport->state) && + !test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) { + set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + dev_info(&hdev->pdev->dev, "VF %u is alive!", + vport->vport_id - HCLGE_VF_VPORT_START_NUM); + hclge_notify_vf_config(vport); + } } static int hclge_set_vf_mtu(struct hclge_vport *vport, @@ -954,6 +1006,7 @@ static int hclge_mbx_vf_uninit_handler(struct hclge_mbx_ops_param *param) hclge_rm_vport_all_mac_table(param->vport, true, HCLGE_MAC_ADDR_MC); hclge_rm_vport_all_vlan_table(param->vport, true); + param->vport->mps = 0; return 0; } From aafd7d5ecc14cc20c33992703e8ca3ccdef9cc86 Mon Sep 17 00:00:00 2001 From: Jiguang Xiao Date: Wed, 28 Dec 2022 16:14:47 +0800 Subject: [PATCH 066/543] net: amd-xgbe: add missed tasklet_kill [ Upstream commit d530ece70f16f912e1d1bfeea694246ab78b0a4b ] The driver does not call tasklet_kill in several places. Add the calls to fix it. Fixes: 85b85c853401 ("amd-xgbe: Re-issue interrupt if interrupt status not cleared") Signed-off-by: Jiguang Xiao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 3 +++ drivers/net/ethernet/amd/xgbe/xgbe-i2c.c | 4 +++- drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 7b666106feee..614c0278419b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1064,6 +1064,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata) devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + tasklet_kill(&pdata->tasklet_dev); + tasklet_kill(&pdata->tasklet_ecc); + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c index 22d4fc547a0a..a9ccc4258ee5 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c @@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata) xgbe_i2c_disable(pdata); xgbe_i2c_clear_all_interrupts(pdata); - if (pdata->dev_irq != pdata->i2c_irq) + if (pdata->dev_irq != pdata->i2c_irq) { devm_free_irq(pdata->dev, pdata->i2c_irq, pdata); + tasklet_kill(&pdata->tasklet_i2c); + } } static int xgbe_i2c_start(struct xgbe_prv_data *pdata) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 4e97b4869522..0c5c1b155683 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1390,8 +1390,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) /* Disable auto-negotiation */ xgbe_an_disable_all(pdata); - if (pdata->dev_irq != pdata->an_irq) + if (pdata->dev_irq != pdata->an_irq) { devm_free_irq(pdata->dev, pdata->an_irq, pdata); + tasklet_kill(&pdata->tasklet_an); + } pdata->phy_if.phy_impl.stop(pdata); From 1ee3d84b0d6bd882f96908ec39f11c24fbc306d3 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:05 +0000 Subject: [PATCH 067/543] net: ena: Fix toeplitz initial hash value [ Upstream commit 332b49ff637d6c1a75b971022a8b992cf3c57db1 ] On driver initialization, RSS hash initial value is set to zero, instead of the default value. This happens because we pass NULL as the RSS key parameter, which caused us to never initialize the RSS hash value. This patch fixes it by making sure the initial value is set, no matter what the value of the RSS key is. Fixes: 91a65b7d3ed8 ("net: ena: fix potential crash when rxfh key is NULL") Signed-off-by: Nati Koler Signed-off-by: David Arinzon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_com.c | 29 +++++++---------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 8c8b4c88c7de..451c3a1b6255 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -2400,29 +2400,18 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, return -EOPNOTSUPP; } - switch (func) { - case ENA_ADMIN_TOEPLITZ: - if (key) { - if (key_len != sizeof(hash_key->key)) { - netdev_err(ena_dev->net_device, - "key len (%u) doesn't equal the supported size (%zu)\n", - key_len, sizeof(hash_key->key)); - return -EINVAL; - } - memcpy(hash_key->key, key, key_len); - rss->hash_init_val = init_val; - hash_key->key_parts = key_len / sizeof(hash_key->key[0]); + if ((func == ENA_ADMIN_TOEPLITZ) && key) { + if (key_len != sizeof(hash_key->key)) { + netdev_err(ena_dev->net_device, + "key len (%u) doesn't equal the supported size (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; } - break; - case ENA_ADMIN_CRC32: - rss->hash_init_val = init_val; - break; - default: - netdev_err(ena_dev->net_device, "Invalid hash function (%d)\n", - func); - return -EINVAL; + memcpy(hash_key->key, key, key_len); + hash_key->key_parts = key_len / sizeof(hash_key->key[0]); } + rss->hash_init_val = init_val; old_func = rss->hash_func; rss->hash_func = func; rc = ena_com_set_hash_function(ena_dev); From c7304c752a20411c95e57540dc1f48fa7a420416 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:06 +0000 Subject: [PATCH 068/543] net: ena: Don't register memory info on XDP exchange [ Upstream commit 9c9e539956fa67efb8a65e32b72a853740b33445 ] Since the queues aren't destroyed when we only exchange XDP programs, there's no need to re-register them again. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 5a454b58498f..e313bb45319c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -512,16 +512,18 @@ static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, struct bpf_prog *prog, int first, int count) { + struct bpf_prog *old_bpf_prog; struct ena_ring *rx_ring; int i = 0; for (i = first; i < count; i++) { rx_ring = &adapter->rx_ring[i]; - xchg(&rx_ring->xdp_bpf_prog, prog); - if (prog) { + old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog); + + if (!old_bpf_prog && prog) { ena_xdp_register_rxq_info(rx_ring); rx_ring->rx_headroom = XDP_PACKET_HEADROOM; - } else { + } else if (old_bpf_prog && !prog) { ena_xdp_unregister_rxq_info(rx_ring); rx_ring->rx_headroom = NET_SKB_PAD; } From 7d82fbedaecb8730e6dbb763b23151fea0781992 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:07 +0000 Subject: [PATCH 069/543] net: ena: Account for the number of processed bytes in XDP [ Upstream commit c7f5e34d906320fdc996afa616676161c029cc02 ] The size of packets that were forwarded or dropped by XDP wasn't added to the total processed bytes statistic. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index e313bb45319c..69f2364b8468 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1719,6 +1719,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, } if (xdp_verdict != XDP_PASS) { xdp_flags |= xdp_verdict; + total_len += ena_rx_ctx.ena_bufs[0].len; res_budget--; continue; } From 5a5084e947b149d8bfed81549309c0e371eadca9 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:08 +0000 Subject: [PATCH 070/543] net: ena: Use bitmask to indicate packet redirection [ Upstream commit 59811faa2c54dbcf44d575b5a8f6e7077da88dc2 ] Redirecting packets with XDP Redirect is done in two phases: 1. A packet is passed by the driver to the kernel using xdp_do_redirect(). 2. After finishing polling for new packets the driver lets the kernel know that it can now process the redirected packet using xdp_do_flush_map(). The packets' redirection is handled in the napi context of the queue that called xdp_do_redirect() To avoid calling xdp_do_flush_map() each time the driver first checks whether any packets were redirected, using xdp_flags |= xdp_verdict; and if (xdp_flags & XDP_REDIRECT) xdp_do_flush_map() essentially treating XDP instructions as a bitmask, which isn't the case: enum xdp_action { XDP_ABORTED = 0, XDP_DROP, XDP_PASS, XDP_TX, XDP_REDIRECT, }; Given the current possible values of xdp_action, the current design doesn't have a bug (since XDP_REDIRECT = 100b), but it is still flawed. This patch makes the driver use a bitmask instead, to avoid future issues. Fixes: a318c70ad152 ("net: ena: introduce XDP redirect implementation") Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 26 ++++++++++++-------- drivers/net/ethernet/amazon/ena/ena_netdev.h | 9 +++++++ 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 69f2364b8468..821355c5db10 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -374,9 +374,9 @@ static int ena_xdp_xmit(struct net_device *dev, int n, static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) { + u32 verdict = ENA_XDP_PASS; struct bpf_prog *xdp_prog; struct ena_ring *xdp_ring; - u32 verdict = XDP_PASS; struct xdp_frame *xdpf; u64 *xdp_stat; @@ -393,7 +393,7 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) if (unlikely(!xdpf)) { trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = XDP_ABORTED; + verdict = ENA_XDP_DROP; break; } @@ -409,29 +409,35 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) spin_unlock(&xdp_ring->xdp_tx_lock); xdp_stat = &rx_ring->rx_stats.xdp_tx; + verdict = ENA_XDP_TX; break; case XDP_REDIRECT: if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) { xdp_stat = &rx_ring->rx_stats.xdp_redirect; + verdict = ENA_XDP_REDIRECT; break; } trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = XDP_ABORTED; + verdict = ENA_XDP_DROP; break; case XDP_ABORTED: trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = ENA_XDP_DROP; break; case XDP_DROP: xdp_stat = &rx_ring->rx_stats.xdp_drop; + verdict = ENA_XDP_DROP; break; case XDP_PASS: xdp_stat = &rx_ring->rx_stats.xdp_pass; + verdict = ENA_XDP_PASS; break; default: bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict); xdp_stat = &rx_ring->rx_stats.xdp_invalid; + verdict = ENA_XDP_DROP; } ena_increase_stat(xdp_stat, 1, &rx_ring->syncp); @@ -1621,12 +1627,12 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp) * we expect, then we simply drop it */ if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU)) - return XDP_DROP; + return ENA_XDP_DROP; ret = ena_xdp_execute(rx_ring, xdp); /* The xdp program might expand the headers */ - if (ret == XDP_PASS) { + if (ret == ENA_XDP_PASS) { rx_info->page_offset = xdp->data - xdp->data_hard_start; rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data; } @@ -1665,7 +1671,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, xdp_init_buff(&xdp, ENA_PAGE_SIZE, &rx_ring->xdp_rxq); do { - xdp_verdict = XDP_PASS; + xdp_verdict = ENA_XDP_PASS; skb = NULL; ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; ena_rx_ctx.max_bufs = rx_ring->sgl_size; @@ -1693,7 +1699,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp); /* allocate skb and fill it */ - if (xdp_verdict == XDP_PASS) + if (xdp_verdict == ENA_XDP_PASS) skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, @@ -1711,13 +1717,13 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, /* Packets was passed for transmission, unmap it * from RX side. */ - if (xdp_verdict == XDP_TX || xdp_verdict == XDP_REDIRECT) { + if (xdp_verdict & ENA_XDP_FORWARDED) { ena_unmap_rx_buff(rx_ring, &rx_ring->rx_buffer_info[req_id]); rx_ring->rx_buffer_info[req_id].page = NULL; } } - if (xdp_verdict != XDP_PASS) { + if (xdp_verdict != ENA_XDP_PASS) { xdp_flags |= xdp_verdict; total_len += ena_rx_ctx.ena_bufs[0].len; res_budget--; @@ -1763,7 +1769,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, ena_refill_rx_bufs(rx_ring, refill_required); } - if (xdp_flags & XDP_REDIRECT) + if (xdp_flags & ENA_XDP_REDIRECT) xdp_do_flush_map(); return work_done; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 1bdce99bf688..290ae9bf47ee 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -409,6 +409,15 @@ enum ena_xdp_errors_t { ENA_XDP_NO_ENOUGH_QUEUES, }; +enum ENA_XDP_ACTIONS { + ENA_XDP_PASS = 0, + ENA_XDP_TX = BIT(0), + ENA_XDP_REDIRECT = BIT(1), + ENA_XDP_DROP = BIT(2) +}; + +#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT) + static inline bool ena_xdp_present(struct ena_adapter *adapter) { return !!adapter->xdp_bpf_prog; From 2474a8ed1ef36ba237507cc2250bcd586627304f Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:09 +0000 Subject: [PATCH 071/543] net: ena: Fix rx_copybreak value update [ Upstream commit c7062aaee099f2f43d6f07a71744b44b94b94b34 ] Make the upper bound on rx_copybreak tighter, by making sure it is smaller than the minimum of mtu and ENA_PAGE_SIZE. With the current upper bound of mtu, rx_copybreak can be larger than a page. Such large rx_copybreak will not bring any performance benefit to the user and therefore makes no sense. In addition, the value update was only reflected in the adapter structure, but not applied for each ring, causing it to not take effect. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Osama Abboud Signed-off-by: Arthur Kiyanovski Signed-off-by: David Arinzon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 6 +----- drivers/net/ethernet/amazon/ena/ena_netdev.c | 18 ++++++++++++++++++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 2 ++ 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 98d6386b7f39..444ccef76da2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -887,11 +887,7 @@ static int ena_set_tunable(struct net_device *netdev, switch (tuna->id) { case ETHTOOL_RX_COPYBREAK: len = *(u32 *)data; - if (len > adapter->netdev->mtu) { - ret = -EINVAL; - break; - } - adapter->rx_copybreak = len; + ret = ena_set_rx_copybreak(adapter, len); break; default: ret = -EINVAL; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 821355c5db10..663f9cd3babf 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2814,6 +2814,24 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, return dev_was_up ? ena_up(adapter) : 0; } +int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak) +{ + struct ena_ring *rx_ring; + int i; + + if (rx_copybreak > min_t(u16, adapter->netdev->mtu, ENA_PAGE_SIZE)) + return -EINVAL; + + adapter->rx_copybreak = rx_copybreak; + + for (i = 0; i < adapter->num_io_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + rx_ring->rx_copybreak = rx_copybreak; + } + + return 0; +} + int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count) { struct ena_com_dev *ena_dev = adapter->ena_dev; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 290ae9bf47ee..f9d862b630fa 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -392,6 +392,8 @@ int ena_update_queue_sizes(struct ena_adapter *adapter, int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count); +int ena_set_rx_copybreak(struct ena_adapter *adapter, u32 rx_copybreak); + int ena_get_sset_count(struct net_device *netdev, int sset); static inline void ena_reset_device(struct ena_adapter *adapter, From eff3be564b50e3ea4df6ff7cbfa4be078f8a1bc3 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:10 +0000 Subject: [PATCH 072/543] net: ena: Set default value for RX interrupt moderation [ Upstream commit e712f3e4920b3a1a5e6b536827d118e14862896c ] RX ring can be NULL in XDP use cases where only TX queues are configured. In this scenario, the RX interrupt moderation value sent to the device remains in its default value of 0. In this change, setting the default value of the RX interrupt moderation to be the same as of the TX. Fixes: 548c4940b9f1 ("net: ena: Implement XDP_TX action") Signed-off-by: David Arinzon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 663f9cd3babf..c4aff712b5d8 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1823,8 +1823,9 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) static void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring) { + u32 rx_interval = tx_ring->smoothed_interval; struct ena_eth_io_intr_reg intr_reg; - u32 rx_interval = 0; + /* Rx ring can be NULL when for XDP tx queues which don't have an * accompanying rx_ring pair. */ From 191c4b94c18ba2c617475d35596cea03bd530376 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Thu, 29 Dec 2022 07:30:11 +0000 Subject: [PATCH 073/543] net: ena: Update NUMA TPH hint register upon NUMA node update [ Upstream commit a8ee104f986e720cea52133885cc822d459398c7 ] The device supports a PCIe optimization hint, which indicates on which NUMA the queue is currently processed. This hint is utilized by PCIe in order to reduce its access time by accessing the correct NUMA resources and maintaining cache coherence. The driver calls the register update for the hint (called TPH - TLP Processing Hint) during the NAPI loop. Though the update is expected upon a NUMA change (when a queue is moved from one NUMA to the other), the current logic performs a register update when the queue is moved to a different CPU, but the CPU is not necessarily in a different NUMA. The changes include: 1. Performing the TPH update only when the queue has switched a NUMA node. 2. Moving the TPH update call to be triggered only when NAPI was scheduled from interrupt context, as opposed to a busy-polling loop. This is due to the fact that during busy-polling, the frequency of CPU switches for a particular queue is significantly higher, thus, the likelihood to switch NUMA is much higher. Therefore, providing the frequent updates to the device upon a NUMA update are unlikely to be beneficial. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: David Arinzon Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 27 +++++++++++++------- drivers/net/ethernet/amazon/ena/ena_netdev.h | 6 +++-- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index c4aff712b5d8..5ce01ac72637 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -680,6 +680,7 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter, ring->ena_dev = adapter->ena_dev; ring->per_napi_packets = 0; ring->cpu = 0; + ring->numa_node = 0; ring->no_interrupt_event_cnt = 0; u64_stats_init(&ring->syncp); } @@ -783,6 +784,7 @@ static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; tx_ring->cpu = ena_irq->cpu; + tx_ring->numa_node = node; return 0; err_push_buf_intermediate_buf: @@ -915,6 +917,7 @@ static int ena_setup_rx_resources(struct ena_adapter *adapter, rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; rx_ring->cpu = ena_irq->cpu; + rx_ring->numa_node = node; return 0; } @@ -1863,20 +1866,27 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring, if (likely(tx_ring->cpu == cpu)) goto out; + tx_ring->cpu = cpu; + if (rx_ring) + rx_ring->cpu = cpu; + numa_node = cpu_to_node(cpu); + + if (likely(tx_ring->numa_node == numa_node)) + goto out; + put_cpu(); if (numa_node != NUMA_NO_NODE) { ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); - if (rx_ring) + tx_ring->numa_node = numa_node; + if (rx_ring) { + rx_ring->numa_node = numa_node; ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); + } } - tx_ring->cpu = cpu; - if (rx_ring) - rx_ring->cpu = cpu; - return; out: put_cpu(); @@ -1997,11 +2007,10 @@ static int ena_io_poll(struct napi_struct *napi, int budget) if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) ena_adjust_adaptive_rx_intr_moderation(ena_napi); + ena_update_ring_numa_node(tx_ring, rx_ring); ena_unmask_interrupt(tx_ring, rx_ring); } - ena_update_ring_numa_node(tx_ring, rx_ring); - ret = rx_work_done; } else { ret = budget; @@ -2386,7 +2395,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) ctx.mem_queue_type = ena_dev->tx_mem_queue_type; ctx.msix_vector = msix_vector; ctx.queue_size = tx_ring->ring_size; - ctx.numa_node = cpu_to_node(tx_ring->cpu); + ctx.numa_node = tx_ring->numa_node; rc = ena_com_create_io_queue(ena_dev, &ctx); if (rc) { @@ -2454,7 +2463,7 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; ctx.msix_vector = msix_vector; ctx.queue_size = rx_ring->ring_size; - ctx.numa_node = cpu_to_node(rx_ring->cpu); + ctx.numa_node = rx_ring->numa_node; rc = ena_com_create_io_queue(ena_dev, &ctx); if (rc) { diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index f9d862b630fa..2cb141079474 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -262,9 +262,11 @@ struct ena_ring { bool disable_meta_caching; u16 no_interrupt_event_cnt; - /* cpu for TPH */ + /* cpu and NUMA for TPH */ int cpu; - /* number of tx/rx_buffer_info's entries */ + int numa_node; + + /* number of tx/rx_buffer_info's entries */ int ring_size; enum ena_admin_placement_policy_type tx_mem_queue_type; From ee84d37a5f08ed1121cdd16f8f3ed87552087a21 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 10:29:25 +0400 Subject: [PATCH 074/543] net: phy: xgmiitorgmii: Fix refcount leak in xgmiitorgmii_probe [ Upstream commit d039535850ee47079d59527e96be18d8e0daa84b ] of_phy_find_device() return device node with refcount incremented. Call put_device() to relese it when not needed anymore. Fixes: ab4e6ee578e8 ("net: phy: xgmiitorgmii: Check phy_driver ready before accessing") Signed-off-by: Miaoqian Lin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/phy/xilinx_gmii2rgmii.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c index 8dcb49ed1f3d..7fd9fe6a602b 100644 --- a/drivers/net/phy/xilinx_gmii2rgmii.c +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -105,6 +105,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev) if (!priv->phy_dev->drv) { dev_info(dev, "Attached phy not ready\n"); + put_device(&priv->phy_dev->mdio.dev); return -EPROBE_DEFER; } From 2892b358c8966170901c86fc86173ae522fc3957 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Sun, 11 Dec 2022 00:05:58 +0200 Subject: [PATCH 075/543] gpio: pca953x: avoid to use uninitialized value pinctrl [ Upstream commit 90fee3dd5bfc1b9f4c8c0ba6cd2a35c9d79ca4de ] There is a variable pinctrl declared without initializer. And then has the case (switch operation chose the default case) to directly use this uninitialized value, this is not a safe behavior. So here initialize the pinctrl as 0 to avoid this issue. This is reported by Coverity. Fixes: 13c5d4ce8060 ("gpio: pca953x: Add support for PCAL6534") Signed-off-by: Haibo Chen Signed-off-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-pca953x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index ebe1943b85dd..bf21803a0036 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -473,6 +473,9 @@ static u8 pcal6534_recalc_addr(struct pca953x_chip *chip, int reg, int off) case PCAL6524_DEBOUNCE: pinctrl = ((reg & PCAL_PINCTRL_MASK) >> 1) + 0x1c; break; + default: + pinctrl = 0; + break; } return pinctrl + addr + (off / BANK_SZ); From e597b003c736217b0c99ccf1b240c25009105238 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 28 Dec 2022 14:56:09 +0200 Subject: [PATCH 076/543] RDMA/mlx5: Fix mlx5_ib_get_hw_stats when used for device [ Upstream commit 38b50aa44495d5eb4218f0b82fc2da76505cec53 ] Currently, when mlx5_ib_get_hw_stats() is used for device (port_num = 0), there is a special handling in order to use the correct counters, but, port_num is being passed down the stack without any change. Also, some functions assume that port_num >=1. As a result, the following oops can occur. BUG: unable to handle page fault for address: ffff89510294f1a8 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP CPU: 8 PID: 1382 Comm: devlink Tainted: G W 6.1.0-rc4_for_upstream_base_2022_11_10_16_12 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:_raw_spin_lock+0xc/0x20 Call Trace: mlx5_ib_get_native_port_mdev+0x73/0xe0 [mlx5_ib] do_get_hw_stats.constprop.0+0x109/0x160 [mlx5_ib] mlx5_ib_get_hw_stats+0xad/0x180 [mlx5_ib] ib_setup_device_attrs+0xf0/0x290 [ib_core] ib_register_device+0x3bb/0x510 [ib_core] ? atomic_notifier_chain_register+0x67/0x80 __mlx5_ib_add+0x2b/0x80 [mlx5_ib] mlx5r_probe+0xb8/0x150 [mlx5_ib] ? auxiliary_match_id+0x6a/0x90 auxiliary_bus_probe+0x3c/0x70 ? driver_sysfs_add+0x6b/0x90 really_probe+0xcd/0x380 __driver_probe_device+0x80/0x170 driver_probe_device+0x1e/0x90 __device_attach_driver+0x7d/0x100 ? driver_allows_async_probing+0x60/0x60 ? driver_allows_async_probing+0x60/0x60 bus_for_each_drv+0x7b/0xc0 __device_attach+0xbc/0x200 bus_probe_device+0x87/0xa0 device_add+0x404/0x940 ? dev_set_name+0x53/0x70 __auxiliary_device_add+0x43/0x60 add_adev+0x99/0xe0 [mlx5_core] mlx5_attach_device+0xc8/0x120 [mlx5_core] mlx5_load_one_devl_locked+0xb2/0xe0 [mlx5_core] devlink_reload+0x133/0x250 devlink_nl_cmd_reload+0x480/0x570 ? devlink_nl_pre_doit+0x44/0x2b0 genl_family_rcv_msg_doit.isra.0+0xc2/0x110 genl_rcv_msg+0x180/0x2b0 ? devlink_nl_cmd_region_read_dumpit+0x540/0x540 ? devlink_reload+0x250/0x250 ? devlink_put+0x50/0x50 ? genl_family_rcv_msg_doit.isra.0+0x110/0x110 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1f6/0x2c0 netlink_sendmsg+0x237/0x490 sock_sendmsg+0x33/0x40 __sys_sendto+0x103/0x160 ? handle_mm_fault+0x10e/0x290 ? do_user_addr_fault+0x1c0/0x5f0 __x64_sys_sendto+0x25/0x30 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fix it by setting port_num to 1 in order to get device status and remove unused variable. Fixes: aac4492ef23a ("IB/mlx5: Update counter implementation for dual port RoCE") Link: https://lore.kernel.org/r/98b82994c3cd3fa593b8a75ed3f3901e208beb0f.1672231736.git.leonro@nvidia.com Signed-off-by: Shay Drory Reviewed-by: Patrisious Haddad Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/counters.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 945758f39523..3e1272695d99 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -278,7 +278,6 @@ static int do_get_hw_stats(struct ib_device *ibdev, const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); struct mlx5_core_dev *mdev; int ret, num_counters; - u32 mdev_port_num; if (!stats) return -EINVAL; @@ -299,8 +298,9 @@ static int do_get_hw_stats(struct ib_device *ibdev, } if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, - &mdev_port_num); + if (!port_num) + port_num = 1; + mdev = mlx5_ib_get_native_port_mdev(dev, port_num, NULL); if (!mdev) { /* If port is not affiliated yet, its in down state * which doesn't have any counters yet, so it would be From 5f87d4dc581e130a6e226de824ff7b0af3f28e63 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 28 Dec 2022 14:56:10 +0200 Subject: [PATCH 077/543] RDMA/mlx5: Fix validation of max_rd_atomic caps for DC [ Upstream commit 8de8482fe5732fbef4f5af82bc0c0362c804cd1f ] Currently, when modifying DC, we validate max_rd_atomic user attribute against the RC cap, validate against DC. RC and DC QP types have different device limitations. This can cause userspace created DC QPs to malfunction. Fixes: c32a4f296e1d ("IB/mlx5: Add support for DC Initiator QP") Link: https://lore.kernel.org/r/0c5aee72cea188c3bb770f4207cce7abc9b6fc74.1672231736.git.leonro@nvidia.com Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/qp.c | 49 +++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 40d9410ec303..cf953d23d18d 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4502,6 +4502,40 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, return false; } +static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_type qp_type) +{ + int log_max_ra_res; + int log_max_ra_req; + + if (qp_type == MLX5_IB_QPT_DCI) { + log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_res_dc); + log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_req_dc); + } else { + log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_res_qp); + log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, + log_max_ra_req_qp); + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > log_max_ra_res) { + mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", + attr->max_rd_atomic); + return false; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > log_max_ra_req) { + mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", + attr->max_dest_rd_atomic); + return false; + } + return true; +} + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -4589,21 +4623,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { - mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", - attr->max_rd_atomic); + if (!validate_rd_atomic(dev, attr, attr_mask, qp_type)) goto out; - } - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > - (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { - mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", - attr->max_dest_rd_atomic); - goto out; - } if (cur_state == new_state && cur_state == IB_QPS_RESET) { err = 0; From d87357c82ce39acbd990b06df5a651b1d7cedf1f Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 30 Dec 2022 17:18:28 +0800 Subject: [PATCH 078/543] selftests: net: fix cleanup_v6() for arp_ndisc_evict_nocarrier [ Upstream commit 9c4d7f45d60745a1cea0e841fa5e3444c398d2f1 ] The cleanup_v6() will cause the arp_ndisc_evict_nocarrier script exit with 255 (No such file or directory), even the tests are good: # selftests: net: arp_ndisc_evict_nocarrier.sh # run arp_evict_nocarrier=1 test # RTNETLINK answers: File exists # ok # run arp_evict_nocarrier=0 test # RTNETLINK answers: File exists # ok # run all.arp_evict_nocarrier=0 test # RTNETLINK answers: File exists # ok # run ndisc_evict_nocarrier=1 test # ok # run ndisc_evict_nocarrier=0 test # ok # run all.ndisc_evict_nocarrier=0 test # ok not ok 1 selftests: net: arp_ndisc_evict_nocarrier.sh # exit=255 This is because it's trying to modify the parameter for ipv4 instead. Also, tests for ipv6 (run_ndisc_evict_nocarrier_enabled() and run_ndisc_evict_nocarrier_disabled() are working on veth1, reflect this fact in cleanup_v6(). Fixes: f86ca07eb531 ("selftests: net: add arp_ndisc_evict_nocarrier") Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index b5af08af8559..b4ec1eeee6c9 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -24,8 +24,8 @@ cleanup_v6() ip netns del me ip netns del peer - sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1 - sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1 + sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 } create_ns() From 718d28ce581efcb7da78dc09df438c5fe6f8a0d7 Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Fri, 30 Dec 2022 17:18:29 +0800 Subject: [PATCH 079/543] selftests: net: return non-zero for failures reported in arp_ndisc_evict_nocarrier [ Upstream commit 1856628baa17032531916984808d1bdfd62700d4 ] Return non-zero return value if there is any failure reported in this script during the test. Otherwise it can only reflect the status of the last command. Fixes: f86ca07eb531 ("selftests: net: add arp_ndisc_evict_nocarrier") Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../selftests/net/arp_ndisc_evict_nocarrier.sh | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index b4ec1eeee6c9..4a110bb01e53 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -18,6 +18,7 @@ readonly V4_ADDR1=10.0.10.2 readonly V6_ADDR0=2001:db8:91::1 readonly V6_ADDR1=2001:db8:91::2 nsid=100 +ret=0 cleanup_v6() { @@ -61,7 +62,7 @@ setup_v6() { if [ $? -ne 0 ]; then cleanup_v6 echo "failed" - exit + exit 1 fi # Set veth2 down, which will put veth1 in NOCARRIER state @@ -88,7 +89,7 @@ setup_v4() { if [ $? -ne 0 ]; then cleanup_v4 echo "failed" - exit + exit 1 fi # Set veth1 down, which will put veth0 in NOCARRIER state @@ -115,6 +116,7 @@ run_arp_evict_nocarrier_enabled() { if [ $? -eq 0 ];then echo "failed" + ret=1 else echo "ok" fi @@ -134,6 +136,7 @@ run_arp_evict_nocarrier_disabled() { echo "ok" else echo "failed" + ret=1 fi cleanup_v4 @@ -164,6 +167,7 @@ run_ndisc_evict_nocarrier_enabled() { if [ $? -eq 0 ];then echo "failed" + ret=1 else echo "ok" fi @@ -182,6 +186,7 @@ run_ndisc_evict_nocarrier_disabled() { echo "ok" else echo "failed" + ret=1 fi cleanup_v6 @@ -198,6 +203,7 @@ run_ndisc_evict_nocarrier_disabled_all() { echo "ok" else echo "failed" + ret=1 fi cleanup_v6 @@ -218,3 +224,4 @@ if [ "$(id -u)" -ne 0 ];then fi run_all_tests +exit $ret From b8e3dd2eba02f7586e01b23d7c3674facb4c565c Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Mon, 19 Dec 2022 09:43:05 +0100 Subject: [PATCH 080/543] drm/meson: Reduce the FIFO lines held when AFBC is not used [ Upstream commit 3b754ed6d1cd90017e66e5cc16f3923e4a952ffc ] Having a bigger number of FIFO lines held after vsync is only useful to SoCs using AFBC to give time to the AFBC decoder to be reset, configured and enabled again. For SoCs not using AFBC this, on the contrary, is causing on some displays issues and a few pixels vertical offset in the displayed image. Conditionally increase the number of lines held after vsync only for SoCs using AFBC, leaving the default value for all the others. Fixes: 24e0d4058eff ("drm/meson: hold 32 lines after vsync to give time for AFBC start") Signed-off-by: Carlo Caione Acked-by: Martin Blumenstingl Acked-by: Neil Armstrong [narmstrong: added fixes tag] Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20221216-afbc_s905x-v1-0-033bebf780d9@baylibre.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/meson/meson_viu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c index d4b907889a21..cd399b0b7181 100644 --- a/drivers/gpu/drm/meson/meson_viu.c +++ b/drivers/gpu/drm/meson/meson_viu.c @@ -436,15 +436,14 @@ void meson_viu_init(struct meson_drm *priv) /* Initialize OSD1 fifo control register */ reg = VIU_OSD_DDR_PRIORITY_URGENT | - VIU_OSD_HOLD_FIFO_LINES(31) | VIU_OSD_FIFO_DEPTH_VAL(32) | /* fifo_depth_val: 32*8=256 */ VIU_OSD_WORDS_PER_BURST(4) | /* 4 words in 1 burst */ VIU_OSD_FIFO_LIMITS(2); /* fifo_lim: 2*16=32 */ if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) - reg |= VIU_OSD_BURST_LENGTH_32; + reg |= (VIU_OSD_BURST_LENGTH_32 | VIU_OSD_HOLD_FIFO_LINES(31)); else - reg |= VIU_OSD_BURST_LENGTH_64; + reg |= (VIU_OSD_BURST_LENGTH_64 | VIU_OSD_HOLD_FIFO_LINES(4)); writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT)); writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT)); From 516fac1e074940525da01d4ef218ee608ea5ceee Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 14 Nov 2022 08:33:09 -0500 Subject: [PATCH 081/543] filelock: new helper: vfs_inode_has_locks [ Upstream commit ab1ddef98a715eddb65309ffa83267e4e84a571e ] Ceph has a need to know whether a particular inode has any locks set on it. It's currently tracking that by a num_locks field in its filp->private_data, but that's problematic as it tries to decrement this field when releasing locks and that can race with the file being torn down. Add a new vfs_inode_has_locks helper that just returns whether any locks are currently held on the inode. Reviewed-by: Xiubo Li Reviewed-by: Christoph Hellwig Signed-off-by: Jeff Layton Stable-dep-of: 461ab10ef7e6 ("ceph: switch to vfs_inode_has_locks() to fix file lock bug") Signed-off-by: Sasha Levin --- fs/locks.c | 23 +++++++++++++++++++++++ include/linux/fs.h | 6 ++++++ 2 files changed, 29 insertions(+) diff --git a/fs/locks.c b/fs/locks.c index 607f94a0e789..7dc129cc1a26 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2669,6 +2669,29 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) } EXPORT_SYMBOL_GPL(vfs_cancel_lock); +/** + * vfs_inode_has_locks - are any file locks held on @inode? + * @inode: inode to check for locks + * + * Return true if there are any FL_POSIX or FL_FLOCK locks currently + * set on @inode. + */ +bool vfs_inode_has_locks(struct inode *inode) +{ + struct file_lock_context *ctx; + bool ret; + + ctx = smp_load_acquire(&inode->i_flctx); + if (!ctx) + return false; + + spin_lock(&ctx->flc_lock); + ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock); + spin_unlock(&ctx->flc_lock); + return ret; +} +EXPORT_SYMBOL_GPL(vfs_inode_has_locks); + #ifdef CONFIG_PROC_FS #include #include diff --git a/include/linux/fs.h b/include/linux/fs.h index 6b115bce14b9..081d1f539628 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1170,6 +1170,7 @@ extern int locks_delete_block(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); +bool vfs_inode_has_locks(struct inode *inode); extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec64 *time); @@ -1284,6 +1285,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return 0; } +static inline bool vfs_inode_has_locks(struct inode *inode) +{ + return false; +} + static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) { return -ENOLCK; From 589886147edf2d1873cae9e5b4267f3d9ace251b Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 17 Nov 2022 10:43:21 +0800 Subject: [PATCH 082/543] ceph: switch to vfs_inode_has_locks() to fix file lock bug [ Upstream commit 461ab10ef7e6ea9b41a0571a7fc6a72af9549a3c ] For the POSIX locks they are using the same owner, which is the thread id. And multiple POSIX locks could be merged into single one, so when checking whether the 'file' has locks may fail. For a file where some openers use locking and others don't is a really odd usage pattern though. Locks are like stoplights -- they only work if everyone pays attention to them. Just switch ceph_get_caps() to check whether any locks are set on the inode. If there are POSIX/OFD/FLOCK locks on the file at the time, we should set CHECK_FILELOCK, regardless of what fd was used to set the lock. Fixes: ff5d913dfc71 ("ceph: return -EIO if read/write against filp that lost file locks") Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov Signed-off-by: Sasha Levin --- fs/ceph/caps.c | 2 +- fs/ceph/locks.c | 4 ---- fs/ceph/super.h | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e54814d0c2f7..cd69bf267d1b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2915,7 +2915,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got while (true) { flags &= CEPH_FILE_MODE_MASK; - if (atomic_read(&fi->num_locks)) + if (vfs_inode_has_locks(inode)) flags |= CHECK_FILELOCK; _got = 0; ret = try_get_cap_refs(inode, need, want, endoff, diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index 3e2843e86e27..b191426bf880 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -32,18 +32,14 @@ void __init ceph_flock_init(void) static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) { - struct ceph_file_info *fi = dst->fl_file->private_data; struct inode *inode = file_inode(dst->fl_file); atomic_inc(&ceph_inode(inode)->i_filelock_ref); - atomic_inc(&fi->num_locks); } static void ceph_fl_release_lock(struct file_lock *fl) { - struct ceph_file_info *fi = fl->fl_file->private_data; struct inode *inode = file_inode(fl->fl_file); struct ceph_inode_info *ci = ceph_inode(inode); - atomic_dec(&fi->num_locks); if (atomic_dec_and_test(&ci->i_filelock_ref)) { /* clear error when all locks are released */ spin_lock(&ci->i_ceph_lock); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 40630e6f691c..ae4126f63410 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -788,7 +788,6 @@ struct ceph_file_info { struct list_head rw_contexts; u32 filp_gen; - atomic_t num_locks; }; struct ceph_dir_file_info { From 9a402a210798662b04cbe6ca466e916a15efa03a Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Mon, 2 Jan 2023 12:20:39 +0400 Subject: [PATCH 083/543] gpio: sifive: Fix refcount leak in sifive_gpio_probe [ Upstream commit 694175cd8a1643cde3acb45c9294bca44a8e08e9 ] of_irq_find_parent() returns a node pointer with refcount incremented, We should use of_node_put() on it when not needed anymore. Add missing of_node_put() to avoid refcount leak. Fixes: 96868dce644d ("gpio/sifive: Add GPIO driver for SiFive SoCs") Signed-off-by: Miaoqian Lin Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-sifive.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c index 238f3210970c..bc5660f61c57 100644 --- a/drivers/gpio/gpio-sifive.c +++ b/drivers/gpio/gpio-sifive.c @@ -215,6 +215,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) return -ENODEV; } parent = irq_find_host(irq_parent); + of_node_put(irq_parent); if (!parent) { dev_err(dev, "no IRQ parent domain\n"); return -ENODEV; From 85655c63877aeafdc23226510ea268a9fa0af807 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 1 Jan 2023 16:57:43 -0500 Subject: [PATCH 084/543] net: sched: atm: dont intepret cls results when asked to drop [ Upstream commit a2965c7be0522eaa18808684b7b82b248515511b ] If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume res.class contains a valid pointer Fixes: b0188d4dbe5f ("[NET_SCHED]: sch_atm: Lindent") Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_atm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index f52255fea652..4a981ca90b0b 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -393,10 +393,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, result = tcf_classify(skb, NULL, fl, &res, true); if (result < 0) continue; + if (result == TC_ACT_SHOT) + goto done; + flow = (struct atm_flow_data *)res.class; if (!flow) flow = lookup_flow(sch, res.classid); - goto done; + goto drop; } } flow = NULL; From dc46e39b727fddc5aacc0272ef83ee872d51be16 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 1 Jan 2023 16:57:44 -0500 Subject: [PATCH 085/543] net: sched: cbq: dont intepret cls results when asked to drop [ Upstream commit caa4b35b4317d5147b3ab0fbdc9c075c7d2e9c12 ] If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume that res.class contains a valid pointer Sample splat reported by Kyle Zeng [ 5.405624] 0: reclassify loop, rule prio 0, protocol 800 [ 5.406326] ================================================================== [ 5.407240] BUG: KASAN: slab-out-of-bounds in cbq_enqueue+0x54b/0xea0 [ 5.407987] Read of size 1 at addr ffff88800e3122aa by task poc/299 [ 5.408731] [ 5.408897] CPU: 0 PID: 299 Comm: poc Not tainted 5.10.155+ #15 [ 5.409516] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 5.410439] Call Trace: [ 5.410764] dump_stack+0x87/0xcd [ 5.411153] print_address_description+0x7a/0x6b0 [ 5.411687] ? vprintk_func+0xb9/0xc0 [ 5.411905] ? printk+0x76/0x96 [ 5.412110] ? cbq_enqueue+0x54b/0xea0 [ 5.412323] kasan_report+0x17d/0x220 [ 5.412591] ? cbq_enqueue+0x54b/0xea0 [ 5.412803] __asan_report_load1_noabort+0x10/0x20 [ 5.413119] cbq_enqueue+0x54b/0xea0 [ 5.413400] ? __kasan_check_write+0x10/0x20 [ 5.413679] __dev_queue_xmit+0x9c0/0x1db0 [ 5.413922] dev_queue_xmit+0xc/0x10 [ 5.414136] ip_finish_output2+0x8bc/0xcd0 [ 5.414436] __ip_finish_output+0x472/0x7a0 [ 5.414692] ip_finish_output+0x5c/0x190 [ 5.414940] ip_output+0x2d8/0x3c0 [ 5.415150] ? ip_mc_finish_output+0x320/0x320 [ 5.415429] __ip_queue_xmit+0x753/0x1760 [ 5.415664] ip_queue_xmit+0x47/0x60 [ 5.415874] __tcp_transmit_skb+0x1ef9/0x34c0 [ 5.416129] tcp_connect+0x1f5e/0x4cb0 [ 5.416347] tcp_v4_connect+0xc8d/0x18c0 [ 5.416577] __inet_stream_connect+0x1ae/0xb40 [ 5.416836] ? local_bh_enable+0x11/0x20 [ 5.417066] ? lock_sock_nested+0x175/0x1d0 [ 5.417309] inet_stream_connect+0x5d/0x90 [ 5.417548] ? __inet_stream_connect+0xb40/0xb40 [ 5.417817] __sys_connect+0x260/0x2b0 [ 5.418037] __x64_sys_connect+0x76/0x80 [ 5.418267] do_syscall_64+0x31/0x50 [ 5.418477] entry_SYSCALL_64_after_hwframe+0x61/0xc6 [ 5.418770] RIP: 0033:0x473bb7 [ 5.418952] Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34 24 89 [ 5.420046] RSP: 002b:00007fffd20eb0f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a [ 5.420472] RAX: ffffffffffffffda RBX: 00007fffd20eb578 RCX: 0000000000473bb7 [ 5.420872] RDX: 0000000000000010 RSI: 00007fffd20eb110 RDI: 0000000000000007 [ 5.421271] RBP: 00007fffd20eb150 R08: 0000000000000001 R09: 0000000000000004 [ 5.421671] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 [ 5.422071] R13: 00007fffd20eb568 R14: 00000000004fc740 R15: 0000000000000002 [ 5.422471] [ 5.422562] Allocated by task 299: [ 5.422782] __kasan_kmalloc+0x12d/0x160 [ 5.423007] kasan_kmalloc+0x5/0x10 [ 5.423208] kmem_cache_alloc_trace+0x201/0x2e0 [ 5.423492] tcf_proto_create+0x65/0x290 [ 5.423721] tc_new_tfilter+0x137e/0x1830 [ 5.423957] rtnetlink_rcv_msg+0x730/0x9f0 [ 5.424197] netlink_rcv_skb+0x166/0x300 [ 5.424428] rtnetlink_rcv+0x11/0x20 [ 5.424639] netlink_unicast+0x673/0x860 [ 5.424870] netlink_sendmsg+0x6af/0x9f0 [ 5.425100] __sys_sendto+0x58d/0x5a0 [ 5.425315] __x64_sys_sendto+0xda/0xf0 [ 5.425539] do_syscall_64+0x31/0x50 [ 5.425764] entry_SYSCALL_64_after_hwframe+0x61/0xc6 [ 5.426065] [ 5.426157] The buggy address belongs to the object at ffff88800e312200 [ 5.426157] which belongs to the cache kmalloc-128 of size 128 [ 5.426955] The buggy address is located 42 bytes to the right of [ 5.426955] 128-byte region [ffff88800e312200, ffff88800e312280) [ 5.427688] The buggy address belongs to the page: [ 5.427992] page:000000009875fabc refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xe312 [ 5.428562] flags: 0x100000000000200(slab) [ 5.428812] raw: 0100000000000200 dead000000000100 dead000000000122 ffff888007843680 [ 5.429325] raw: 0000000000000000 0000000000100010 00000001ffffffff ffff88800e312401 [ 5.429875] page dumped because: kasan: bad access detected [ 5.430214] page->mem_cgroup:ffff88800e312401 [ 5.430471] [ 5.430564] Memory state around the buggy address: [ 5.430846] ffff88800e312180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 5.431267] ffff88800e312200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc [ 5.431705] >ffff88800e312280: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 5.432123] ^ [ 5.432391] ffff88800e312300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fc [ 5.432810] ffff88800e312380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 5.433229] ================================================================== [ 5.433648] Disabling lock debugging due to kernel taint Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Kyle Zeng Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_cbq.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 6568e17c4c63..36db5f6782f2 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -230,6 +230,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) result = tcf_classify(skb, NULL, fl, &res, true); if (!fl || result < 0) goto fallback; + if (result == TC_ACT_SHOT) + return NULL; cl = (void *)res.class; if (!cl) { @@ -250,8 +252,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; fallthrough; - case TC_ACT_SHOT: - return NULL; case TC_ACT_RECLASSIFY: return cbq_reclassify(skb, cl); } From 5896f55810680391a32652ca2b91245a05c11e22 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 2 Jan 2023 08:55:56 +0200 Subject: [PATCH 086/543] vxlan: Fix memory leaks in error path [ Upstream commit 06bf62944144a92d83dd14fd1378d2a288259561 ] The memory allocated by vxlan_vnigroup_init() is not freed in the error path, leading to memory leaks [1]. Fix by calling vxlan_vnigroup_uninit() in the error path. The leaks can be reproduced by annotating gro_cells_init() with ALLOW_ERROR_INJECTION() and then running: # echo "100" > /sys/kernel/debug/fail_function/probability # echo "1" > /sys/kernel/debug/fail_function/times # echo "gro_cells_init" > /sys/kernel/debug/fail_function/inject # printf %#x -12 > /sys/kernel/debug/fail_function/gro_cells_init/retval # ip link add name vxlan0 type vxlan dstport 4789 external vnifilter RTNETLINK answers: Cannot allocate memory [1] unreferenced object 0xffff88810db84a00 (size 512): comm "ip", pid 330, jiffies 4295010045 (age 66.016s) hex dump (first 32 bytes): f8 d5 76 0e 81 88 ff ff 01 00 00 00 00 00 00 02 ..v............. 03 00 04 00 48 00 00 00 00 00 00 01 04 00 01 00 ....H........... backtrace: [] kmalloc_trace+0x2a/0x60 [] vxlan_vnigroup_init+0x4c/0x160 [] vxlan_init+0x1ae/0x280 [] register_netdevice+0x57a/0x16d0 [] __vxlan_dev_create+0x7c7/0xa50 [] vxlan_newlink+0xd6/0x130 [] __rtnl_newlink+0x112b/0x18a0 [] rtnl_newlink+0x6c/0xa0 [] rtnetlink_rcv_msg+0x43f/0xd40 [] netlink_rcv_skb+0x170/0x440 [] netlink_unicast+0x53f/0x810 [] netlink_sendmsg+0x958/0xe70 [] ____sys_sendmsg+0x78f/0xa90 [] ___sys_sendmsg+0x13a/0x1e0 [] __sys_sendmsg+0x11c/0x1f0 [] do_syscall_64+0x38/0x80 unreferenced object 0xffff88810e76d5f8 (size 192): comm "ip", pid 330, jiffies 4295010045 (age 66.016s) hex dump (first 32 bytes): 04 00 00 00 00 00 00 00 db e1 4f e7 00 00 00 00 ..........O..... 08 d6 76 0e 81 88 ff ff 08 d6 76 0e 81 88 ff ff ..v.......v..... backtrace: [] __kmalloc_node+0x4e/0x90 [] kvmalloc_node+0xa6/0x1f0 [] bucket_table_alloc.isra.0+0x83/0x460 [] rhashtable_init+0x43b/0x7c0 [] vxlan_vnigroup_init+0x6c/0x160 [] vxlan_init+0x1ae/0x280 [] register_netdevice+0x57a/0x16d0 [] __vxlan_dev_create+0x7c7/0xa50 [] vxlan_newlink+0xd6/0x130 [] __rtnl_newlink+0x112b/0x18a0 [] rtnl_newlink+0x6c/0xa0 [] rtnetlink_rcv_msg+0x43f/0xd40 [] netlink_rcv_skb+0x170/0x440 [] netlink_unicast+0x53f/0x810 [] netlink_sendmsg+0x958/0xe70 [] ____sys_sendmsg+0x78f/0xa90 Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device") Signed-off-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/vxlan/vxlan_core.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 6ab669dcd1c6..d4be39b19a6b 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2917,16 +2917,23 @@ static int vxlan_init(struct net_device *dev) vxlan_vnigroup_init(vxlan); dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - - err = gro_cells_init(&vxlan->gro_cells, dev); - if (err) { - free_percpu(dev->tstats); - return err; + if (!dev->tstats) { + err = -ENOMEM; + goto err_vnigroup_uninit; } + err = gro_cells_init(&vxlan->gro_cells, dev); + if (err) + goto err_free_percpu; + return 0; + +err_free_percpu: + free_percpu(dev->tstats); +err_vnigroup_uninit: + if (vxlan->cfg.flags & VXLAN_F_VNIFILTER) + vxlan_vnigroup_uninit(vxlan); + return err; } static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) From 2bab138b44f0072433db40a14d75fe208d17732b Mon Sep 17 00:00:00 2001 From: Horatiu Vultur Date: Mon, 2 Jan 2023 13:12:15 +0100 Subject: [PATCH 087/543] net: sparx5: Fix reading of the MAC address [ Upstream commit 588ab2dc25f60efeb516b4abedb6c551949cc185 ] There is an issue with the checking of the return value of 'of_get_mac_address', which returns 0 on success and negative value on failure. The driver interpretated the result the opposite way. Therefore if there was a MAC address defined in the DT, then the driver was generating a random MAC address otherwise it would use address 0. Fix this by checking correctly the return value of 'of_get_mac_address' Fixes: b74ef9f9cb91 ("net: sparx5: Do not use mac_addr uninitialized in mchp_sparx5_probe()") Signed-off-by: Horatiu Vultur Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index b6bbb3c9bd7a..3423c95cc84a 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -824,7 +824,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) if (err) goto cleanup_config; - if (!of_get_mac_address(np, sparx5->base_mac)) { + if (of_get_mac_address(np, sparx5->base_mac)) { dev_info(sparx5->dev, "MAC addr was not set, use random MAC\n"); eth_random_addr(sparx5->base_mac); sparx5->base_mac[5] = 0; From 080b56c66acad21616140ef3091d72ccb0674465 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 30 Dec 2022 13:24:37 +0100 Subject: [PATCH 088/543] netfilter: ipset: fix hash:net,port,net hang with /0 subnet MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a31d47be64b9b74f8cfedffe03e0a8a1f9e51f23 ] The hash:net,port,net set type supports /0 subnets. However, the patch commit 5f7b51bf09baca8e titled "netfilter: ipset: Limit the maximal range of consecutive elements to add/delete" did not take into account it and resulted in an endless loop. The bug is actually older but the patch 5f7b51bf09baca8e brings it out earlier. Handle /0 subnets properly in hash:net,port,net set types. Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete") Reported-by: Марк Коренберг Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/ipset/ip_set_hash_netportnet.c | 40 ++++++++++---------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index 19bcdb3141f6..005a7ce87217 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } +static u32 +hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr) +{ + if (from == 0 && to == UINT_MAX) { + *cidr = 0; + return to; + } + return ip_set_range_to_cidr(from, to, cidr); +} + static int hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netportnet4 *h = set->data; + struct hash_netportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netportnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2, ipn; - u64 n = 0, m = 0; + u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; bool with_ports = false; int ret; @@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); - n++; - } while (ipn++ < ip_to); - ipn = ip2_from; - do { - ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); - m++; - } while (ipn++ < ip2_to); - - if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); @@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], do { e.ip[0] = htonl(ip); - ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); + ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]); for (; p <= port_to; p++) { e.port = htons(p); do { + i++; e.ip[1] = htonl(ip2); - ip2 = ip_set_range_to_cidr(ip2, ip2_to, - &e.cidr[1]); + if (i > IPSET_MAX_RANGE) { + hash_netportnet4_data_next(&h->next, + &e); + return -ERANGE; + } + ip2 = hash_netportnet4_range_to_cidr(ip2, + ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; From 8964cc36ba011dc0e1041131fa2e91fb4c2a811b Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Fri, 30 Dec 2022 13:24:38 +0100 Subject: [PATCH 089/543] netfilter: ipset: Rework long task execution when adding/deleting entries [ Upstream commit 5e29dc36bd5e2166b834ceb19990d9e68a734d7d ] When adding/deleting large number of elements in one step in ipset, it can take a reasonable amount of time and can result in soft lockup errors. The patch 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete") tried to fix it by limiting the max elements to process at all. However it was not enough, it is still possible that we get hung tasks. Lowering the limit is not reasonable, so the approach in this patch is as follows: rely on the method used at resizing sets and save the state when we reach a smaller internal batch limit, unlock/lock and proceed from the saved state. Thus we can avoid long continuous tasks and at the same time removed the limit to add/delete large number of elements in one step. The nfnl mutex is held during the whole operation which prevents one to issue other ipset commands in parallel. Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete") Reported-by: syzbot+9204e7399656300bf271@syzkaller.appspotmail.com Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- include/linux/netfilter/ipset/ip_set.h | 2 +- net/netfilter/ipset/ip_set_core.c | 7 ++++--- net/netfilter/ipset/ip_set_hash_ip.c | 14 ++++++------- net/netfilter/ipset/ip_set_hash_ipmark.c | 13 ++++++------ net/netfilter/ipset/ip_set_hash_ipport.c | 13 ++++++------ net/netfilter/ipset/ip_set_hash_ipportip.c | 13 ++++++------ net/netfilter/ipset/ip_set_hash_ipportnet.c | 13 +++++++----- net/netfilter/ipset/ip_set_hash_net.c | 17 +++++++-------- net/netfilter/ipset/ip_set_hash_netiface.c | 15 ++++++-------- net/netfilter/ipset/ip_set_hash_netnet.c | 23 +++++++-------------- net/netfilter/ipset/ip_set_hash_netport.c | 19 +++++++---------- 11 files changed, 68 insertions(+), 81 deletions(-) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index ada1296c87d5..72f5ebc5c97a 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -197,7 +197,7 @@ struct ip_set_region { }; /* Max range where every element is added/deleted in one step */ -#define IPSET_MAX_RANGE (1<<20) +#define IPSET_MAX_RANGE (1<<14) /* The max revision number supported by any set type + 1 */ #define IPSET_REVISION_MAX 9 diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e7ba5b6dd2b7..46ebee9400da 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); ip_set_unlock(set); retried = true; - } while (ret == -EAGAIN && - set->variant->resize && - (ret = set->variant->resize(set, retried)) == 0); + } while (ret == -ERANGE || + (ret == -EAGAIN && + set->variant->resize && + (ret = set->variant->resize(set, retried)) == 0)); if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) return 0; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 75d556d71652..24adcdd7a0b1 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -98,11 +98,11 @@ static int hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ip4 *h = set->data; + struct hash_ip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ip4_elem e = { 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, hosts; + u32 ip = 0, ip_to = 0, hosts, i = 0; int ret = 0; if (tb[IPSET_ATTR_LINENO]) @@ -147,14 +147,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); - /* 64bit division is not allowed on 32bit */ - if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to;) { + for (; ip <= ip_to; i++) { e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_ip4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index 153de3457423..a22ec1a6f6ec 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -97,11 +97,11 @@ static int hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipmark4 *h = set->data; + struct hash_ipmark4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipmark4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0; + u32 ip, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], ip_set_mask_from_to(ip, ip_to, cidr); } - if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); - for (; ip <= ip_to; ip++) { + for (; ip <= ip_to; ip++, i++) { e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_ipmark4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 7303138e46be..10481760a9b2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -105,11 +105,11 @@ static int hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipport4 *h = set->data; + struct hash_ipport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; @@ -173,17 +173,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_ipport4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 334fb1ad0e86..39a01934b153 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -108,11 +108,11 @@ static int hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportip4 *h = set->data; + struct hash_ipportip4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportip4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip, ip_to = 0, p = 0, port, port_to; + u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; @@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_ipportip4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 7df94f437f60..5c6de605a9fb 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -160,12 +160,12 @@ static int hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_ipportnet4 *h = set->data; + struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; - u32 ip2_from = 0, ip2_to = 0, ip2; + u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; bool with_ports = false; u8 cidr; int ret; @@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], swap(port, port_to); } - if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; - ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); @@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], for (; p <= port_to; p++) { e.port = htons(p); do { + i++; e.ip2 = htonl(ip2); ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); e.cidr = cidr - 1; + if (i > IPSET_MAX_RANGE) { + hash_ipportnet4_data_next(&h->next, + &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 1422739d9aa2..ce0a9ce5a91f 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -136,11 +136,11 @@ static int hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_net4 *h = set->data; + struct hash_net4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, ipn, n = 0; + u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); - n++; - } while (ipn++ < ip_to); - - if (n > IPSET_MAX_RANGE) - return -ERANGE; if (retried) ip = ntohl(h->next.ip); do { + i++; e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_net4_data_next(&h->next, &e); + return -ERANGE; + } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 9810f5bf63f5..031073286236 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 ip = 0, ip_to = 0, ipn, n = 0; + u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); - n++; - } while (ipn++ < ip_to); - - if (n > IPSET_MAX_RANGE) - return -ERANGE; if (retried) ip = ntohl(h->next.ip); do { + i++; e.ip = htonl(ip); + if (i > IPSET_MAX_RANGE) { + hash_netiface4_data_next(&h->next, &e); + return -ERANGE; + } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index 3d09eefe998a..c07b70bf32db 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -163,13 +163,12 @@ static int hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netnet4 *h = set->data; + struct hash_netnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netnet4_elem e = { }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0; - u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; - u64 n = 0, m = 0; + u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) @@ -245,19 +244,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); - n++; - } while (ipn++ < ip_to); - ipn = ip2_from; - do { - ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); - m++; - } while (ipn++ < ip2_to); - - if (n*m > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip[0]); @@ -270,7 +256,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], e.ip[0] = htonl(ip); ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); do { + i++; e.ip[1] = htonl(ip2); + if (i > IPSET_MAX_RANGE) { + hash_netnet4_data_next(&h->next, &e); + return -ERANGE; + } ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 09cf72eb37f8..d1a0628df4ef 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -154,12 +154,11 @@ static int hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { - const struct hash_netport4 *h = set->data; + struct hash_netport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); - u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; - u64 n = 0; + u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0; bool with_ports = false; u8 cidr; int ret; @@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], } else { ip_set_mask_from_to(ip, ip_to, e.cidr + 1); } - ipn = ip; - do { - ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); - n++; - } while (ipn++ < ip_to); - - if (n*(port_to - port + 1) > IPSET_MAX_RANGE) - return -ERANGE; if (retried) { ip = ntohl(h->next.ip); @@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], e.ip = htonl(ip); ip = ip_set_range_to_cidr(ip, ip_to, &cidr); e.cidr = cidr - 1; - for (; p <= port_to; p++) { + for (; p <= port_to; p++, i++) { e.port = htons(p); + if (i > IPSET_MAX_RANGE) { + hash_netport4_data_next(&h->next, &e); + return -ERANGE; + } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; From 466655ddac0ce29ecb1250ec81ddabd83c8c9d62 Mon Sep 17 00:00:00 2001 From: Xiu Jianfeng Date: Wed, 9 Nov 2022 17:19:05 +0800 Subject: [PATCH 090/543] drm/virtio: Fix memory leak in virtio_gpu_object_create() [ Upstream commit a764da46cd15f8b40292d2c0b29c4bf9a3e66c7e ] The virtio_gpu_object_shmem_init() will alloc memory and save it in @ents, so when virtio_gpu_array_alloc() fails, this memory should be freed, this patch fixes it. Fixes: e7fef0923303 ("drm/virtio: Simplify error handling of virtio_gpu_object_create()") Signed-off-by: Xiu Jianfeng Reviewed-by: Dmitry Osipenko Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20221109091905.55451-1-xiujianfeng@huawei.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/virtio/virtgpu_object.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index 8d7728181de0..c7e74cf13022 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -184,7 +184,7 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, struct virtio_gpu_object_array *objs = NULL; struct drm_gem_shmem_object *shmem_obj; struct virtio_gpu_object *bo; - struct virtio_gpu_mem_entry *ents; + struct virtio_gpu_mem_entry *ents = NULL; unsigned int nents; int ret; @@ -210,7 +210,7 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, ret = -ENOMEM; objs = virtio_gpu_array_alloc(1); if (!objs) - goto err_put_id; + goto err_free_entry; virtio_gpu_array_add_obj(objs, &bo->base.base); ret = virtio_gpu_array_lock_resv(objs); @@ -239,6 +239,8 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev, err_put_objs: virtio_gpu_array_put_free(objs); +err_free_entry: + kvfree(ents); err_put_id: virtio_gpu_resource_id_put(vgdev, bo->hw_res_handle); err_free_gem: From 2bb8016d4bb042ae43a6f98298b504ba73ae9b38 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 13:09:00 +0400 Subject: [PATCH 091/543] perf tools: Fix resources leak in perf_data__open_dir() [ Upstream commit 0a6564ebd953c4590663c9a3c99a3ea9920ade6f ] In perf_data__open_dir(), opendir() opens the directory stream. Add missing closedir() to release it after use. Fixes: eb6176709b235b96 ("perf data: Add perf_data__open_dir_data function") Reviewed-by: Adrian Hunter Signed-off-by: Miaoqian Lin Cc: Alexander Shishkin Cc: Alexey Bayduraev Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20221229090903.1402395-1-linmq006@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index a7f68c309545..fc16299c915f 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -132,6 +132,7 @@ int perf_data__open_dir(struct perf_data *data) file->size = st.st_size; } + closedir(dir); if (!files) return -EINVAL; @@ -140,6 +141,7 @@ int perf_data__open_dir(struct perf_data *data) return 0; out_err: + closedir(dir); close_dir(files, nr); return ret; } From 71f9fd5bcf0954377eec3dc1119c44064f10679b Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 8 Nov 2022 15:14:20 +0100 Subject: [PATCH 092/543] drm/imx: ipuv3-plane: Fix overlay plane width [ Upstream commit 92d43bd3bc9728c1fb114d7011d46f5ea9489e28 ] ipu_src_rect_width() was introduced to support odd screen resolutions such as 1366x768 by internally rounding up primary plane width to a multiple of 8 and compensating with reduced horizontal blanking. This also caused overlay plane width to be rounded up, which was not intended. Fix overlay plane width by limiting the rounding up to the primary plane. drm_rect_width(&new_state->src) >> 16 is the same value as drm_rect_width(dst) because there is no plane scaling support. Fixes: 94dfec48fca7 ("drm/imx: Add 8 pixel alignment fix") Reviewed-by: Lucas Stach Link: https://lore.kernel.org/r/20221108141420.176696-1-p.zabel@pengutronix.de Signed-off-by: Philipp Zabel Link: https://patchwork.freedesktop.org/patch/msgid/20221108141420.176696-1-p.zabel@pengutronix.de Tested-by: Ian Ray (cherry picked from commit 4333472f8d7befe62359fecb1083cd57a6e07bfc) Signed-off-by: Philipp Zabel Signed-off-by: Sasha Levin --- drivers/gpu/drm/imx/ipuv3-plane.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index dba4f7d81d69..80142d9a4a55 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -614,6 +614,11 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, break; } + if (ipu_plane->dp_flow == IPU_DP_FLOW_SYNC_BG) + width = ipu_src_rect_width(new_state); + else + width = drm_rect_width(&new_state->src) >> 16; + eba = drm_plane_state_to_eba(new_state, 0); /* @@ -622,8 +627,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, */ if (ipu_state->use_pre) { axi_id = ipu_chan_assign_axi_id(ipu_plane->dma); - ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, - ipu_src_rect_width(new_state), + ipu_prg_channel_configure(ipu_plane->ipu_ch, axi_id, width, drm_rect_height(&new_state->src) >> 16, fb->pitches[0], fb->format->format, fb->modifier, &eba); @@ -678,9 +682,8 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, break; } - ipu_dmfc_config_wait4eot(ipu_plane->dmfc, ALIGN(drm_rect_width(dst), 8)); + ipu_dmfc_config_wait4eot(ipu_plane->dmfc, width); - width = ipu_src_rect_width(new_state); height = drm_rect_height(&new_state->src) >> 16; info = drm_format_info(fb->format->format); ipu_calculate_bursts(width, info->cpp[0], fb->pitches[0], @@ -744,8 +747,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, ipu_cpmem_set_burstsize(ipu_plane->ipu_ch, 16); ipu_cpmem_zero(ipu_plane->alpha_ch); - ipu_cpmem_set_resolution(ipu_plane->alpha_ch, - ipu_src_rect_width(new_state), + ipu_cpmem_set_resolution(ipu_plane->alpha_ch, width, drm_rect_height(&new_state->src) >> 16); ipu_cpmem_set_format_passthrough(ipu_plane->alpha_ch, 8); ipu_cpmem_set_high_priority(ipu_plane->alpha_ch); From 73fee7e1e5ea11b51c51c46e0577a197ca3602cf Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 2 Jan 2023 23:05:33 +0900 Subject: [PATCH 093/543] fs/ntfs3: don't hold ni_lock when calling truncate_setsize() [ Upstream commit 0226635c304cfd5c9db9b78c259cb713819b057e ] syzbot is reporting hung task at do_user_addr_fault() [1], for there is a silent deadlock between PG_locked bit and ni_lock lock. Since filemap_update_page() calls filemap_read_folio() after calling folio_trylock() which will set PG_locked bit, ntfs_truncate() must not call truncate_setsize() which will wait for PG_locked bit to be cleared when holding ni_lock lock. Link: https://lore.kernel.org/all/00000000000060d41f05f139aa44@google.com/ Link: https://syzkaller.appspot.com/bug?extid=bed15dbf10294aa4f2ae [1] Reported-by: syzbot Debugged-by: Linus Torvalds Co-developed-by: Hillf Danton Signed-off-by: Hillf Danton Signed-off-by: Tetsuo Handa Fixes: 4342306f0f0d ("fs/ntfs3: Add file operations and implementation") Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/ntfs3/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 4f2ffc7ef296..f31c0389a2e7 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -486,10 +486,10 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size) new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); - ni_lock(ni); - truncate_setsize(inode, new_size); + ni_lock(ni); + down_write(&ni->file.run_lock); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, &new_valid, ni->mi.sbi->options->prealloc, NULL); From faecbaf1ffcdbc7d4ab7c6ad7e222d14a2d71771 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Mon, 2 Jan 2023 12:53:35 +0300 Subject: [PATCH 094/543] drivers/net/bonding/bond_3ad: return when there's no aggregator [ Upstream commit 9c807965483f42df1d053b7436eedd6cf28ece6f ] Otherwise we would dereference a NULL aggregator pointer when calling __set_agg_ports_ready on the line below. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Daniil Tatianin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/bonding/bond_3ad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index e58a1e0cadd2..9270977e6c7f 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1540,6 +1540,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) slave_err(bond->dev, port->slave->dev, "Port %d did not find a suitable aggregator\n", port->actor_port_number); + return; } } /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE From 667ce030bdfb62a86266444b2c3fd6ab98b9df9b Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Tue, 3 Jan 2023 09:20:12 +0530 Subject: [PATCH 095/543] octeontx2-pf: Fix lmtst ID used in aura free [ Upstream commit 4af1b64f80fbe1275fb02c5f1c0cef099a4a231f ] Current code uses per_cpu pointer to get the lmtst_id mapped to the core on which aura_free() is executed. Using per_cpu pointer without preemption disable causing mismatch between lmtst_id and core on which pointer gets freed. This patch fixes the issue by disabling preemption around aura_free. Fixes: ef6c8da71eaf ("octeontx2-pf: cn10K: Reserve LMTST lines per core") Signed-off-by: Sunil Goutham Signed-off-by: Geetha sowjanya Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../marvell/octeontx2/nic/otx2_common.c | 30 +++++++++++++------ 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 9e10e7471b88..88f8772a61cd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1012,6 +1012,7 @@ static void otx2_pool_refill_task(struct work_struct *work) rbpool = cq->rbpool; free_ptrs = cq->pool_ptrs; + get_cpu(); while (cq->pool_ptrs) { if (otx2_alloc_rbuf(pfvf, rbpool, &bufptr)) { /* Schedule a WQ if we fails to free atleast half of the @@ -1031,6 +1032,7 @@ static void otx2_pool_refill_task(struct work_struct *work) pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM); cq->pool_ptrs--; } + put_cpu(); cq->refill_task_sched = false; } @@ -1368,6 +1370,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) if (err) goto fail; + get_cpu(); /* Allocate pointers and free them to aura/pool */ for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); @@ -1376,18 +1379,24 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) sq = &qset->sq[qidx]; sq->sqb_count = 0; sq->sqb_ptrs = kcalloc(num_sqbs, sizeof(*sq->sqb_ptrs), GFP_KERNEL); - if (!sq->sqb_ptrs) - return -ENOMEM; + if (!sq->sqb_ptrs) { + err = -ENOMEM; + goto err_mem; + } for (ptr = 0; ptr < num_sqbs; ptr++) { - if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) - return -ENOMEM; + err = otx2_alloc_rbuf(pfvf, pool, &bufptr); + if (err) + goto err_mem; pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; } } - return 0; +err_mem: + put_cpu(); + return err ? -ENOMEM : 0; + fail: otx2_mbox_reset(&pfvf->mbox.mbox, 0); otx2_aura_pool_free(pfvf); @@ -1426,18 +1435,21 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) if (err) goto fail; + get_cpu(); /* Allocate pointers and free them to aura/pool */ for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { pool = &pfvf->qset.pool[pool_id]; for (ptr = 0; ptr < num_ptrs; ptr++) { - if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) - return -ENOMEM; + err = otx2_alloc_rbuf(pfvf, pool, &bufptr); + if (err) + goto err_mem; pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr + OTX2_HEAD_ROOM); } } - - return 0; +err_mem: + put_cpu(); + return err ? -ENOMEM : 0; fail: otx2_mbox_reset(&pfvf->mbox.mbox, 0); otx2_aura_pool_free(pfvf); From a713602807f32afc04add331410c77ef790ef77a Mon Sep 17 00:00:00 2001 From: Szymon Heidrich Date: Tue, 3 Jan 2023 10:17:09 +0100 Subject: [PATCH 096/543] usb: rndis_host: Secure rndis_query check against int overflow [ Upstream commit c7dd13805f8b8fc1ce3b6d40f6aff47e66b72ad2 ] Variables off and len typed as uint32 in rndis_query function are controlled by incoming RNDIS response message thus their value may be manipulated. Setting off to a unexpectetly large value will cause the sum with len and 8 to overflow and pass the implemented validation step. Consequently the response pointer will be referring to a location past the expected buffer boundaries allowing information leakage e.g. via RNDIS_OID_802_3_PERMANENT_ADDRESS OID. Fixes: ddda08624013 ("USB: rndis_host, various cleanups") Signed-off-by: Szymon Heidrich Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/rndis_host.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index f79333fe1783..7b3739b29c8f 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf, off = le32_to_cpu(u.get_c->offset); len = le32_to_cpu(u.get_c->len); - if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE)) + if (unlikely((off > CONTROL_BUFFER_SIZE - 8) || + (len > CONTROL_BUFFER_SIZE - 8 - off))) goto response_error; if (*reply_len != -1 && len != *reply_len) From 75b9086028ceee04470c8de05a1a02eaf90b361c Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 30 Dec 2022 11:26:27 +0100 Subject: [PATCH 097/543] perf lock contention: Fix core dump related to not finding the "__sched_text_end" symbol on s/390 [ Upstream commit d8d85ce86dc82de4f88b821a78f533b9d5b22a45 ] The test case perf lock contention dumps core on s390. Run the following commands: # ./perf lock record -- ./perf bench sched messaging # Running 'sched/messaging' benchmark: # 20 sender and receiver processes per group # 10 groups == 400 processes run Total time: 2.799 [sec] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.073 MB perf.data (100 samples) ] # # ./perf lock contention Segmentation fault (core dumped) # The function call stack is lengthy, here are the top 5 functions: # gdb ./perf core.24048 GNU gdb (GDB) Fedora Linux 12.1-6.fc37 Core was generated by `./perf lock contention'. Program terminated with signal SIGSEGV, Segmentation fault. #0 0x00000000011dd25c in machine__is_lock_function (machine=0x3029e28, addr=1789230) at util/machine.c:3356 3356 machine->sched.text_end = kmap->unmap_ip(kmap, sym->start); (gdb) where #0 0x00000000011dd25c in machine__is_lock_function (machine=0x3029e28, addr=1789230) at util/machine.c:3356 #1 0x000000000109f244 in callchain_id (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:957 #2 0x000000000109e094 in get_key_by_aggr_mode (key=0x3ffea4f7290, addr=27758136, evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:586 #3 0x000000000109f4d0 in report_lock_contention_begin_event (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:1004 #4 0x00000000010a00ae in evsel__process_contention_begin (evsel=0x30313e0, sample=0x3ffea4f77d0) at builtin-lock.c:1254 #5 0x00000000010a0e14 in process_sample_event (tool=0x3ffea4f8480, event=0x3ff85601ef8, sample=0x3ffea4f77d0, evsel=0x30313e0, machine=0x3029e28) at builtin-lock.c:1464 ..... The issue is in function machine__is_lock_function() in file ./util/machine.c lines 3355: /* should not fail from here */ sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_end", &kmap); machine->sched.text_end = kmap->unmap_ip(kmap, sym->start) On s390 the symbol __sched_text_end is *NOT* in the symbol list and the resulting pointer sym is set to NULL. The sym->start is then a NULL pointer access and generates the core dump. The reason why __sched_text_end is not in the symbol list on s390 is simple: When the symbol list is created at perf start up with function calls dso__load +--> dso__load_vmlinux_path +--> dso__load_vmlinux +--> dso__load_sym +--> dso__load_sym_internal (reads kernel symbols) +--> symbols__fixup_end +--> symbols__fixup_duplicate The issue is in function symbols__fixup_duplicate(). It deletes all symbols with have the same address. On s390: # nm -g ~/linux/vmlinux| fgrep c68390 0000000000c68390 T __cpuidle_text_start 0000000000c68390 T __sched_text_end # two symbols have identical addresses and __sched_text_end is considered duplicate (in ascending sort order) and removed from the symbol list. Therefore it is missing and an invalid pointer reference occurs. The code checks for symbol __sched_text_start and when it exists assumes symbol __sched_text_end is also in the symbol table. However this is not the case on s390. Same situation exists for symbol __lock_text_start: 0000000000c68770 T __cpuidle_text_end 0000000000c68770 T __lock_text_start This symbol is also removed from the symbol table but used in function machine__is_lock_function(). To fix this and keep duplicate symbols in the symbol table, set symbol_conf.allow_aliases to true. This prevents the removal of duplicate symbols in function symbols__fixup_duplicate(). Output After: # ./perf lock contention contended total wait max wait avg wait type caller 48 124.39 ms 123.99 ms 2.59 ms rwsem:W unlink_anon_vmas+0x24a 47 83.68 ms 83.26 ms 1.78 ms rwsem:W free_pgtables+0x132 5 41.22 us 10.55 us 8.24 us rwsem:W free_pgtables+0x140 4 40.12 us 20.55 us 10.03 us rwsem:W copy_process+0x1ac8 # Fixes: 0d2997f750d1de39 ("perf lock: Look up callchain for the contended locks") Signed-off-by: Thomas Richter Acked-by: Namhyung Kim Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: https://lore.kernel.org/r/20221230102627.2410847-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-lock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 66520712a167..470106643ed5 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -1582,6 +1582,7 @@ static int __cmd_report(bool display_info) /* for lock function check */ symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; symbol__init(&session->header.env); if (!perf_session__has_traces(session, "lock record")) @@ -1660,6 +1661,7 @@ static int __cmd_contention(int argc, const char **argv) /* for lock function check */ symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; symbol__init(&session->header.env); if (use_bpf) { From 36caf0281b9113eff98dff0160d6b03c090e6347 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 3 Jan 2023 22:44:01 -0800 Subject: [PATCH 098/543] perf stat: Fix handling of unsupported cgroup events when using BPF counters [ Upstream commit 2d656b0f81b22101db0447f890e39fdd736b745e ] When --for-each-cgroup option is used, it fails when any of events is not supported and exits immediately. This is not how 'perf stat' handles unsupported events. Let's ignore the failure and proceed with others so that the output is similar to when BPF counters are not used: Before: $ sudo ./perf stat -a --bpf-counters -e L1-icache-loads,L1-dcache-loads --for-each-cgroup system.slice,user.slice sleep 1 Failed to open first cgroup events $ After it shows output similat to when --bpf-counters isn't specified: $ sudo ./perf stat -a --bpf-counters -e L1-icache-loads,L1-dcache-loads --for-each-cgroup system.slice,user.slice sleep 1 Performance counter stats for 'system wide': L1-icache-loads system.slice 29,892,418 L1-dcache-loads system.slice L1-icache-loads user.slice 52,497,220 L1-dcache-loads user.slice $ Fixes: 944138f048f7d759 ("perf stat: Enable BPF counter with --for-each-cgroup") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Song Liu Link: https://lore.kernel.org/r/20230104064402.1551516-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/bpf_counter_cgroup.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 3c2df7522f6f..1c82377ed78b 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -116,27 +116,19 @@ static int bperf_load_program(struct evlist *evlist) /* open single copy of the events w/o cgroup */ err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1); - if (err) { - pr_err("Failed to open first cgroup events\n"); - goto out; - } + if (err == 0) + evsel->supported = true; map_fd = bpf_map__fd(skel->maps.events); perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) { int fd = FD(evsel, j); __u32 idx = evsel->core.idx * total_cpus + cpu.cpu; - err = bpf_map_update_elem(map_fd, &idx, &fd, - BPF_ANY); - if (err < 0) { - pr_err("Failed to update perf_event fd\n"); - goto out; - } + bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY); } evsel->cgrp = leader_cgrp; } - evsel->supported = true; if (evsel->cgrp == cgrp) continue; From c776df09f469dff767bcd0fd01a2ba3b3801baa6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 3 Jan 2023 22:44:02 -0800 Subject: [PATCH 099/543] perf stat: Fix handling of --for-each-cgroup with --bpf-counters to match non BPF mode [ Upstream commit 54b353a20c7e8be98414754f5aff98c8a68fcc1f ] The --for-each-cgroup can have the same cgroup multiple times, but this confuses BPF counters (since they have the same cgroup id), making only the last cgroup events to be counted. Let's check the cgroup name before adding a new entry to the cgroups list. Before: $ sudo ./perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1 Performance counter stats for 'system wide': msec cpu-clock / context-switches / cpu-migrations / page-faults / cycles / instructions / branches / branch-misses / 8,016.04 msec cpu-clock / # 7.998 CPUs utilized 6,152 context-switches / # 767.461 /sec 250 cpu-migrations / # 31.187 /sec 442 page-faults / # 55.139 /sec 613,111,487 cycles / # 0.076 GHz 280,599,604 instructions / # 0.46 insn per cycle 57,692,724 branches / # 7.197 M/sec 3,385,168 branch-misses / # 5.87% of all branches 1.002220125 seconds time elapsed After it becomes similar to the non-BPF mode: $ sudo ./perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1 Performance counter stats for 'system wide': 8,013.38 msec cpu-clock / # 7.998 CPUs utilized 6,859 context-switches / # 855.944 /sec 334 cpu-migrations / # 41.680 /sec 345 page-faults / # 43.053 /sec 782,326,119 cycles / # 0.098 GHz 471,645,724 instructions / # 0.60 insn per cycle 94,963,430 branches / # 11.851 M/sec 3,685,511 branch-misses / # 3.88% of all branches 1.001864539 seconds time elapsed Committer notes: As a reminder, to test with BPF counters one has to use BUILD_BPF_SKEL=1 in the make command line and have clang/llvm installed when building perf, otherwise the --bpf-counters option will not be available: # perf stat -a --bpf-counters --for-each-cgroup /,/ sleep 1 Error: unknown option `bpf-counters' Usage: perf stat [] [] -a, --all-cpus system-wide collection from all CPUs # Fixes: bb1c15b60b981d10 ("perf stat: Support regex pattern in --for-each-cgroup") Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: bpf@vger.kernel.org Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Link: https://lore.kernel.org/r/20230104064402.1551516-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/cgroup.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index e99b41f9be45..cd978c240e0d 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -224,6 +224,19 @@ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unus return 0; } +static int check_and_add_cgroup_name(const char *fpath) +{ + struct cgroup_name *cn; + + list_for_each_entry(cn, &cgroup_list, list) { + if (!strcmp(cn->name, fpath)) + return 0; + } + + /* pretend if it's added by ftw() */ + return add_cgroup_name(fpath, NULL, FTW_D, NULL); +} + static void release_cgroup_list(void) { struct cgroup_name *cn; @@ -242,7 +255,7 @@ static int list_cgroups(const char *str) struct cgroup_name *cn; char *s; - /* use given name as is - for testing purpose */ + /* use given name as is when no regex is given */ for (;;) { p = strchr(str, ','); e = p ? p : eos; @@ -253,13 +266,13 @@ static int list_cgroups(const char *str) s = strndup(str, e - str); if (!s) return -1; - /* pretend if it's added by ftw() */ - ret = add_cgroup_name(s, NULL, FTW_D, NULL); + + ret = check_and_add_cgroup_name(s); free(s); - if (ret) + if (ret < 0) return -1; } else { - if (add_cgroup_name("", NULL, FTW_D, NULL) < 0) + if (check_and_add_cgroup_name("/") < 0) return -1; } From 20a07570c1667a48fe50fdfa59f4ece57775b69a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 15 Nov 2022 16:15:18 +0300 Subject: [PATCH 100/543] drm/i915: unpin on error in intel_vgpu_shadow_mm_pin() [ Upstream commit 3792fc508c095abd84b10ceae12bd773e61fdc36 ] Call intel_vgpu_unpin_mm() on this error path. Fixes: 418741480809 ("drm/i915/gvt: Adding ppgtt to GVT GEM context after shadow pdps settled.") Signed-off-by: Dan Carpenter Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/Y3OQ5tgZIVxyQ/WV@kili Reviewed-by: Zhenyu Wang Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gvt/scheduler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index d6fe94cd0fdb..8342d95f56cb 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -696,6 +696,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || !workload->shadow_mm->ppgtt_mm.shadowed) { + intel_vgpu_unpin_mm(workload->shadow_mm); gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); return -EINVAL; } From 1022519da69d99d455c58ca181a6c499c562c70e Mon Sep 17 00:00:00 2001 From: Zheng Wang Date: Fri, 30 Dec 2022 00:56:41 +0800 Subject: [PATCH 101/543] drm/i915/gvt: fix double free bug in split_2MB_gtt_entry [ Upstream commit 4a61648af68f5ba4884f0e3b494ee1cabc4b6620 ] If intel_gvt_dma_map_guest_page failed, it will call ppgtt_invalidate_spt, which will finally free the spt. But the caller function ppgtt_populate_spt_by_guest_entry does not notice that, it will free spt again in its error path. Fix this by canceling the mapping of DMA address and freeing sub_spt. Besides, leave the handle of spt destroy to caller function instead of callee function when error occurs. Fixes: b901b252b6cf ("drm/i915/gvt: Add 2M huge gtt support") Signed-off-by: Zheng Wang Reviewed-by: Zhenyu Wang Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221229165641.1192455-1-zyytlz.wz@163.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gvt/gtt.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index ce0eb03709c3..80c60754a5c1 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1214,10 +1214,8 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, for_each_shadow_entry(sub_spt, &sub_se, sub_index) { ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index, PAGE_SIZE, &dma_addr); - if (ret) { - ppgtt_invalidate_spt(spt); - return ret; - } + if (ret) + goto err; sub_se.val64 = se->val64; /* Copy the PAT field from PDE. */ @@ -1236,6 +1234,17 @@ static int split_2MB_gtt_entry(struct intel_vgpu *vgpu, ops->set_pfn(se, sub_spt->shadow_page.mfn); ppgtt_set_shadow_entry(spt, se, index); return 0; +err: + /* Cancel the existing addess mappings of DMA addr. */ + for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) { + gvt_vdbg_mm("invalidate 4K entry\n"); + ppgtt_invalidate_pte(sub_spt, &sub_se); + } + /* Release the new allocated spt. */ + trace_spt_change(sub_spt->vgpu->id, "release", sub_spt, + sub_spt->guest_page.gfn, sub_spt->shadow_page.type); + ppgtt_free_spt(sub_spt); + return ret; } static int split_64KB_gtt_entry(struct intel_vgpu *vgpu, From 4e0c2961e53d22114c9a0505cfe90627e845e97a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 4 Jan 2023 21:32:35 +0800 Subject: [PATCH 102/543] ublk: honor IO_URING_F_NONBLOCK for handling control command [ Upstream commit fa8e442e832a3647cdd90f3e606c473a51bc1b26 ] Most of control command handlers may sleep, so return -EAGAIN in case of IO_URING_F_NONBLOCK to defer the handling into io wq context. Fixes: 71f28f3136af ("ublk_drv: add io_uring based userspace block driver") Reported-by: Jens Axboe Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20230104133235.836536-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/ublk_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index e9de9d846b73..17b677b5d3b2 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1992,6 +1992,9 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd; int ret = -EINVAL; + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + ublk_ctrl_cmd_dump(cmd); if (!(issue_flags & IO_URING_F_SQE128)) From 50c81b35df01db12b348c5cbf4b1917dc9a7db54 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Tue, 3 Jan 2023 16:30:21 -0700 Subject: [PATCH 103/543] qed: allow sleep in qed_mcp_trace_dump() [ Upstream commit 5401c3e0992860b11fb4b25796e4c4f1921740df ] By default, qed_mcp_cmd_and_union() delays 10us at a time in a loop that can run 500K times, so calls to qed_mcp_nvm_rd_cmd() may block the current thread for over 5s. We observed thread scheduling delays over 700ms in production, with stacktraces pointing to this code as the culprit. qed_mcp_trace_dump() is called from ethtool, so sleeping is permitted. It already can sleep in qed_mcp_halt(), which calls qed_mcp_cmd(). Add a "can sleep" parameter to qed_find_nvram_image() and qed_nvram_read() so they can sleep during qed_mcp_trace_dump(). qed_mcp_trace_get_meta_info() and qed_mcp_trace_read_meta(), called only by qed_mcp_trace_dump(), allow these functions to sleep. I can't tell if the other caller (qed_grc_dump_mcp_hw_dump()) can sleep, so keep b_can_sleep set to false when it calls these functions. An example stacktrace from a custom warning we added to the kernel showing a thread that has not scheduled despite long needing resched: [ 2745.362925,17] ------------[ cut here ]------------ [ 2745.362941,17] WARNING: CPU: 23 PID: 5640 at arch/x86/kernel/irq.c:233 do_IRQ+0x15e/0x1a0() [ 2745.362946,17] Thread not rescheduled for 744 ms after irq 99 [ 2745.362956,17] Modules linked in: ... [ 2745.363339,17] CPU: 23 PID: 5640 Comm: lldpd Tainted: P O 4.4.182+ #202104120910+6d1da174272d.61x [ 2745.363343,17] Hardware name: FOXCONN MercuryB/Quicksilver Controller, BIOS H11P1N09 07/08/2020 [ 2745.363346,17] 0000000000000000 ffff885ec07c3ed8 ffffffff8131eb2f ffff885ec07c3f20 [ 2745.363358,17] ffffffff81d14f64 ffff885ec07c3f10 ffffffff81072ac2 ffff88be98ed0000 [ 2745.363369,17] 0000000000000063 0000000000000174 0000000000000074 0000000000000000 [ 2745.363379,17] Call Trace: [ 2745.363382,17] [] dump_stack+0x8e/0xcf [ 2745.363393,17] [] warn_slowpath_common+0x82/0xc0 [ 2745.363398,17] [] warn_slowpath_fmt+0x4c/0x50 [ 2745.363404,17] [] ? rcu_irq_exit+0xae/0xc0 [ 2745.363408,17] [] do_IRQ+0x15e/0x1a0 [ 2745.363413,17] [] common_interrupt+0x89/0x89 [ 2745.363416,17] [] ? delay_tsc+0x24/0x50 [ 2745.363425,17] [] __udelay+0x34/0x40 [ 2745.363457,17] [] qed_mcp_cmd_and_union+0x36f/0x7d0 [qed] [ 2745.363473,17] [] qed_mcp_nvm_rd_cmd+0x4d/0x90 [qed] [ 2745.363490,17] [] qed_mcp_trace_dump+0x4a7/0x630 [qed] [ 2745.363504,17] [] ? qed_fw_asserts_dump+0x1d6/0x1f0 [qed] [ 2745.363520,17] [] qed_dbg_mcp_trace_get_dump_buf_size+0x37/0x80 [qed] [ 2745.363536,17] [] qed_dbg_feature_size+0x61/0xa0 [qed] [ 2745.363551,17] [] qed_dbg_all_data_size+0x247/0x260 [qed] [ 2745.363560,17] [] qede_get_regs_len+0x30/0x40 [qede] [ 2745.363566,17] [] ethtool_get_drvinfo+0xe3/0x190 [ 2745.363570,17] [] dev_ethtool+0x1362/0x2140 [ 2745.363575,17] [] ? finish_task_switch+0x76/0x260 [ 2745.363580,17] [] ? __schedule+0x3c6/0x9d0 [ 2745.363585,17] [] ? hrtimer_start_range_ns+0x1d0/0x370 [ 2745.363589,17] [] ? dev_get_by_name_rcu+0x6b/0x90 [ 2745.363594,17] [] dev_ioctl+0xe8/0x710 [ 2745.363599,17] [] sock_do_ioctl+0x48/0x60 [ 2745.363603,17] [] sock_ioctl+0x1c7/0x280 [ 2745.363608,17] [] ? seccomp_phase1+0x83/0x220 [ 2745.363612,17] [] do_vfs_ioctl+0x2b3/0x4e0 [ 2745.363616,17] [] SyS_ioctl+0x41/0x70 [ 2745.363619,17] [] entry_SYSCALL_64_fastpath+0x1e/0x79 [ 2745.363622,17] ---[ end trace f6954aa440266421 ]--- Fixes: c965db4446291 ("qed: Add support for debug data collection") Signed-off-by: Caleb Sander Acked-by: Alok Prasad Link: https://lore.kernel.org/r/20230103233021.1457646-1-csander@purestorage.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_debug.c | 28 +++++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 86ecb080b153..cdcead614e9f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -1832,7 +1832,8 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 image_type, u32 *nvram_offset_bytes, - u32 *nvram_size_bytes) + u32 *nvram_size_bytes, + bool b_can_sleep) { u32 ret_mcp_resp, ret_mcp_param, ret_txn_size; struct mcp_file_att file_att; @@ -1846,7 +1847,8 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, &ret_mcp_resp, &ret_mcp_param, &ret_txn_size, - (u32 *)&file_att, false); + (u32 *)&file_att, + b_can_sleep); /* Check response */ if (nvm_result || (ret_mcp_resp & FW_MSG_CODE_MASK) != @@ -1873,7 +1875,9 @@ static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 nvram_offset_bytes, - u32 nvram_size_bytes, u32 *ret_buf) + u32 nvram_size_bytes, + u32 *ret_buf, + bool b_can_sleep) { u32 ret_mcp_resp, ret_mcp_param, ret_read_size, bytes_to_copy; s32 bytes_left = nvram_size_bytes; @@ -1899,7 +1903,7 @@ static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn, &ret_mcp_resp, &ret_mcp_param, &ret_read_size, (u32 *)((u8 *)ret_buf + read_offset), - false)) + b_can_sleep)) return DBG_STATUS_NVRAM_READ_FAILED; /* Check response */ @@ -3380,7 +3384,8 @@ static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn, p_ptt, NVM_TYPE_HW_DUMP_OUT, &hw_dump_offset_bytes, - &hw_dump_size_bytes); + &hw_dump_size_bytes, + false); if (status != DBG_STATUS_OK) return 0; @@ -3397,7 +3402,9 @@ static u32 qed_grc_dump_mcp_hw_dump(struct qed_hwfn *p_hwfn, status = qed_nvram_read(p_hwfn, p_ptt, hw_dump_offset_bytes, - hw_dump_size_bytes, dump_buf + offset); + hw_dump_size_bytes, + dump_buf + offset, + false); if (status != DBG_STATUS_OK) { DP_NOTICE(p_hwfn, "Failed to read MCP HW Dump image from NVRAM\n"); @@ -4123,7 +4130,9 @@ static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn, return qed_find_nvram_image(p_hwfn, p_ptt, nvram_image_type, - trace_meta_offset, trace_meta_size); + trace_meta_offset, + trace_meta_size, + true); } /* Reads the MCP Trace meta data from NVRAM into the specified buffer */ @@ -4139,7 +4148,10 @@ static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn, /* Read meta data from NVRAM */ status = qed_nvram_read(p_hwfn, p_ptt, - nvram_offset_in_bytes, size_in_bytes, buf); + nvram_offset_in_bytes, + size_in_bytes, + buf, + true); if (status != DBG_STATUS_OK) return status; From 7d242f4a0c8319821548c7176c09a6e0e71f223c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 3 Jan 2023 12:19:17 +0100 Subject: [PATCH 104/543] net/ulp: prevent ULP without clone op from entering the LISTEN status [ Upstream commit 2c02d41d71f90a5168391b6a5f2954112ba2307c ] When an ULP-enabled socket enters the LISTEN status, the listener ULP data pointer is copied inside the child/accepted sockets by sk_clone_lock(). The relevant ULP can take care of de-duplicating the context pointer via the clone() operation, but only MPTCP and SMC implement such op. Other ULPs may end-up with a double-free at socket disposal time. We can't simply clear the ULP data at clone time, as TLS replaces the socket ops with custom ones assuming a valid TLS ULP context is available. Instead completely prevent clone-less ULP sockets from entering the LISTEN status. Fixes: 734942cc4ea6 ("tcp: ULP infrastructure") Reported-by: slipper Signed-off-by: Paolo Abeni Link: https://lore.kernel.org/r/4b80c3d1dbe3d0ab072f80450c202d9bc88b4b03.1672740602.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/inet_connection_sock.c | 14 ++++++++++++++ net/ipv4/tcp_ulp.c | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0465ada82799..647b3c6b575e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1200,12 +1200,26 @@ void inet_csk_prepare_forced_close(struct sock *sk) } EXPORT_SYMBOL(inet_csk_prepare_forced_close); +static int inet_ulp_can_listen(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone) + return -EINVAL; + + return 0; +} + int inet_csk_listen_start(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); int err; + err = inet_ulp_can_listen(sk); + if (unlikely(err)) + return err; + reqsk_queue_alloc(&icsk->icsk_accept_queue); sk->sk_ack_backlog = 0; diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 9ae50b1bd844..05b6077b9f2c 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -139,6 +139,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) if (sk->sk_socket) clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); + err = -EINVAL; + if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN) + goto out_err; + err = ulp_ops->init(sk); if (err) goto out_err; From 3ad47c8aa5648226184415e4a0cb1bf67ffbfd48 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 4 Jan 2023 14:51:46 +0800 Subject: [PATCH 105/543] caif: fix memory leak in cfctrl_linkup_request() [ Upstream commit fe69230f05897b3de758427b574fc98025dfc907 ] When linktype is unknown or kzalloc failed in cfctrl_linkup_request(), pkt is not released. Add release process to error path. Fixes: b482cd2053e3 ("net-caif: add CAIF core protocol stack") Fixes: 8d545c8f958f ("caif: Disconnect without waiting for response") Signed-off-by: Zhengchao Shao Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20230104065146.1153009-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/caif/cfctrl.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index cc405d8c7c30..8480684f2762 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer, default: pr_warn("Request setup of bad link type = %d\n", param->linktype); + cfpkt_destroy(pkt); return -EINVAL; } req = kzalloc(sizeof(*req), GFP_KERNEL); - if (!req) + if (!req) { + cfpkt_destroy(pkt); return -ENOMEM; + } + req->client_layer = user_layer; req->cmd = CFCTRL_CMD_LINK_SETUP; req->param = *param; From 2d1dbb030ca885bfa6570ca8a0d75f6777221540 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2022 17:45:51 +0100 Subject: [PATCH 106/543] udf: Fix extension of the last extent in the file [ Upstream commit 83c7423d1eb6806d13c521d1002cc1a012111719 ] When extending the last extent in the file within the last block, we wrongly computed the length of the last extent. This is mostly a cosmetical problem since the extent does not contain any data and the length will be fixed up by following operations but still. Fixes: 1f3868f06855 ("udf: Fix extending file within last block") Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/udf/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index f713d108f21d..e92a16435a29 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -600,7 +600,7 @@ static void udf_do_extend_final_block(struct inode *inode, */ if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) return; - added_bytes = (last_ext->extLength & UDF_EXTENT_LENGTH_MASK) - new_elen; + added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); last_ext->extLength += added_bytes; UDF_I(inode)->i_lenExtents += added_bytes; From f190519b07c75717b4dddf82bf012bfaeb506eb5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 3 Jan 2023 13:17:46 +0100 Subject: [PATCH 107/543] usb: dwc3: xilinx: include linux/gpio/consumer.h [ Upstream commit e498a04443240c15c3c857165f7b652b87f4fd96 ] The newly added gpio consumer calls cause a build failure in configurations that fail to include the right header implicitly: drivers/usb/dwc3/dwc3-xilinx.c: In function 'dwc3_xlnx_init_zynqmp': drivers/usb/dwc3/dwc3-xilinx.c:207:22: error: implicit declaration of function 'devm_gpiod_get_optional'; did you mean 'devm_clk_get_optional'? [-Werror=implicit-function-declaration] 207 | reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); | ^~~~~~~~~~~~~~~~~~~~~~~ | devm_clk_get_optional Fixes: ca05b38252d7 ("usb: dwc3: xilinx: Add gpio-reset support") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230103121755.956027-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/dwc3/dwc3-xilinx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index 8607d4c23283..0745e9f11b2e 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include From 45917be9f0af339a45b4619f31c902d37b8aed59 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 4 Jan 2023 11:06:28 -0800 Subject: [PATCH 108/543] hfs/hfsplus: avoid WARN_ON() for sanity check, use proper error handling [ Upstream commit cb7a95af78d29442b8294683eca4897544b8ef46 ] Commit 55d1cbbbb29e ("hfs/hfsplus: use WARN_ON for sanity check") fixed a build warning by turning a comment into a WARN_ON(), but it turns out that syzbot then complains because it can trigger said warning with a corrupted hfs image. The warning actually does warn about a bad situation, but we are much better off just handling it as the error it is. So rather than warn about us doing bad things, stop doing the bad things and return -EIO. While at it, also fix a memory leak that was introduced by an earlier fix for a similar syzbot warning situation, and add a check for one case that historically wasn't handled at all (ie neither comment nor subsequent WARN_ON). Reported-by: syzbot+7bb7cd3595533513a9e7@syzkaller.appspotmail.com Fixes: 55d1cbbbb29e ("hfs/hfsplus: use WARN_ON for sanity check") Fixes: 8d824e69d9f3 ("hfs: fix OOB Read in __hfs_brec_find") Link: https://lore.kernel.org/lkml/000000000000dbce4e05f170f289@google.com/ Tested-by: Michael Schmitz Cc: Arnd Bergmann Cc: Matthew Wilcox Cc: Viacheslav Dubeyko Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- fs/hfs/inode.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index a0746be3c1de..80d17c520d0b 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -458,15 +458,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) /* panic? */ return -EIO; + res = -EIO; if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN) - return -EIO; + goto out; fd.search_key->cat = HFS_I(main_inode)->cat_key; if (hfs_brec_find(&fd)) - /* panic? */ goto out; if (S_ISDIR(main_inode->i_mode)) { - WARN_ON(fd.entrylength < sizeof(struct hfs_cat_dir)); + if (fd.entrylength < sizeof(struct hfs_cat_dir)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); if (rec.type != HFS_CDR_DIR || @@ -479,6 +480,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); } else if (HFS_IS_RSRC(inode)) { + if (fd.entrylength < sizeof(struct hfs_cat_file)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); hfs_inode_write_fork(inode, rec.file.RExtRec, @@ -486,7 +489,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } else { - WARN_ON(fd.entrylength < sizeof(struct hfs_cat_file)); + if (fd.entrylength < sizeof(struct hfs_cat_file)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); if (rec.type != HFS_CDR_FIL || @@ -503,9 +507,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } + res = 0; out: hfs_find_exit(&fd); - return 0; + return res; } static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, From f6e548529bb923e21fa67c16ca1c2baff3e9ce74 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 9 Dec 2022 13:45:29 +0200 Subject: [PATCH 109/543] ASoC: SOF: Revert: "core: unregister clients and machine drivers in .shutdown" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 44fda61d2bcfb74a942df93959e083a4e8eff75f ] The unregister machine drivers call is not safe to do when kexec is used. Kexec-lite gets blocked with following backtrace: [ 84.943749] Freezing user space processes ... (elapsed 0.111 seconds) done. [ 246.784446] INFO: task kexec-lite:5123 blocked for more than 122 seconds. [ 246.819035] Call Trace: [ 246.821782] [ 246.824186] __schedule+0x5f9/0x1263 [ 246.828231] schedule+0x87/0xc5 [ 246.831779] snd_card_disconnect_sync+0xb5/0x127 ... [ 246.889249] snd_sof_device_shutdown+0xb4/0x150 [ 246.899317] pci_device_shutdown+0x37/0x61 [ 246.903990] device_shutdown+0x14c/0x1d6 [ 246.908391] kernel_kexec+0x45/0xb9 This reverts commit 83bfc7e793b555291785136c3ae86abcdc046887. Reported-by: Ricardo Ribalda Cc: Ricardo Ribalda Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20221209114529.3909192-3-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/core.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index 3e6141d03770..625977a29d8a 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -475,19 +475,10 @@ EXPORT_SYMBOL(snd_sof_device_remove); int snd_sof_device_shutdown(struct device *dev) { struct snd_sof_dev *sdev = dev_get_drvdata(dev); - struct snd_sof_pdata *pdata = sdev->pdata; if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE)) cancel_work_sync(&sdev->probe_work); - /* - * make sure clients and machine driver(s) are unregistered to force - * all userspace devices to be closed prior to the DSP shutdown sequence - */ - sof_unregister_clients(sdev); - - snd_sof_machine_unregister(sdev, pdata); - if (sdev->fw_state == SOF_FW_BOOT_COMPLETE) return snd_sof_shutdown(sdev); From 30f3e4afe09a7f76f6cd2a62f584a19e8c720b62 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Mon, 5 Dec 2022 21:39:01 +0900 Subject: [PATCH 110/543] 9p/client: fix data race on req->status [ Upstream commit 1a4f69ef15ec29b213e2b086b2502644e8ef76ee ] KCSAN reported a race between writing req->status in p9_client_cb and accessing it in p9_client_rpc's wait_event. Accesses to req itself is protected by the data barrier (writing req fields, write barrier, writing status // reading status, read barrier, reading other req fields), but status accesses themselves apparently also must be annotated properly with WRITE_ONCE/READ_ONCE when we access it without locks. Follows: - error paths writing status in various threads all can notify p9_client_rpc, so these all also need WRITE_ONCE - there's a similar read loop in trans_virtio for zc case that also needs READ_ONCE - other reads in trans_fd should be protected by the trans_fd lock and lists state machine, as corresponding writers all are within trans_fd and should be under the same lock. If KCSAN complains on them we likely will have something else to fix as well, so it's better to leave them unmarked and look again if required. Link: https://lkml.kernel.org/r/20221205124756.426350-1-asmadeus@codewreck.org Reported-by: Naresh Kamboju Suggested-by: Marco Elver Acked-by: Marco Elver Reviewed-by: Christian Schoenebeck Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin --- net/9p/client.c | 15 ++++++++------- net/9p/trans_fd.c | 12 ++++++------ net/9p/trans_rdma.c | 4 ++-- net/9p/trans_virtio.c | 9 +++++---- net/9p/trans_xen.c | 4 ++-- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/net/9p/client.c b/net/9p/client.c index b554f8357f96..b5aa25f82b78 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -443,7 +443,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) * the status change is visible to another thread */ smp_wmb(); - req->status = status; + WRITE_ONCE(req->status, status); wake_up(&req->wq); p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag); @@ -605,7 +605,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) /* if we haven't received a response for oldreq, * remove it from the list */ - if (oldreq->status == REQ_STATUS_SENT) { + if (READ_ONCE(oldreq->status) == REQ_STATUS_SENT) { if (c->trans_mod->cancelled) c->trans_mod->cancelled(c, oldreq); } @@ -702,7 +702,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) } again: /* Wait for the response */ - err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD); + err = wait_event_killable(req->wq, + READ_ONCE(req->status) >= REQ_STATUS_RCVD); /* Make sure our req is coherent with regard to updates in other * threads - echoes to wmb() in the callback @@ -716,7 +717,7 @@ again: goto again; } - if (req->status == REQ_STATUS_ERROR) { + if (READ_ONCE(req->status) == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } @@ -729,7 +730,7 @@ again: p9_client_flush(c, req); /* if we received the response anyway, don't signal error */ - if (req->status == REQ_STATUS_RCVD) + if (READ_ONCE(req->status) == REQ_STATUS_RCVD) err = 0; } recalc_sigpending: @@ -798,7 +799,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, if (err != -ERESTARTSYS) goto recalc_sigpending; } - if (req->status == REQ_STATUS_ERROR) { + if (READ_ONCE(req->status) == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } @@ -811,7 +812,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, p9_client_flush(c, req); /* if we received the response anyway, don't signal error */ - if (req->status == REQ_STATUS_RCVD) + if (READ_ONCE(req->status) == REQ_STATUS_RCVD) err = 0; } recalc_sigpending: diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 07db2f436d44..5a1aecf7fe48 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -202,11 +202,11 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { list_move(&req->req_list, &cancel_list); - req->status = REQ_STATUS_ERROR; + WRITE_ONCE(req->status, REQ_STATUS_ERROR); } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { list_move(&req->req_list, &cancel_list); - req->status = REQ_STATUS_ERROR; + WRITE_ONCE(req->status, REQ_STATUS_ERROR); } spin_unlock(&m->req_lock); @@ -467,7 +467,7 @@ static void p9_write_work(struct work_struct *work) req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); - req->status = REQ_STATUS_SENT; + WRITE_ONCE(req->status, REQ_STATUS_SENT); p9_debug(P9_DEBUG_TRANS, "move req %p\n", req); list_move_tail(&req->req_list, &m->req_list); @@ -676,7 +676,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) return m->err; spin_lock(&m->req_lock); - req->status = REQ_STATUS_UNSENT; + WRITE_ONCE(req->status, REQ_STATUS_UNSENT); list_add_tail(&req->req_list, &m->unsent_req_list); spin_unlock(&m->req_lock); @@ -703,7 +703,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) if (req->status == REQ_STATUS_UNSENT) { list_del(&req->req_list); - req->status = REQ_STATUS_FLSHD; + WRITE_ONCE(req->status, REQ_STATUS_FLSHD); p9_req_put(client, req); ret = 0; } @@ -732,7 +732,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) * remove it from the list. */ list_del(&req->req_list); - req->status = REQ_STATUS_FLSHD; + WRITE_ONCE(req->status, REQ_STATUS_FLSHD); spin_unlock(&m->req_lock); p9_req_put(client, req); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 6ff706760676..e9a830c69058 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -507,7 +507,7 @@ dont_need_post_recv: * because doing if after could erase the REQ_STATUS_RCVD * status in case of a very fast reply. */ - req->status = REQ_STATUS_SENT; + WRITE_ONCE(req->status, REQ_STATUS_SENT); err = ib_post_send(rdma->qp, &wr, NULL); if (err) goto send_error; @@ -517,7 +517,7 @@ dont_need_post_recv: /* Handle errors that happened during or while preparing the send: */ send_error: - req->status = REQ_STATUS_ERROR; + WRITE_ONCE(req->status, REQ_STATUS_ERROR); kfree(c); p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e757f0601304..3f3eb03cda7d 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -263,7 +263,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); - req->status = REQ_STATUS_SENT; + WRITE_ONCE(req->status, REQ_STATUS_SENT); req_retry: spin_lock_irqsave(&chan->lock, flags); @@ -469,7 +469,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, inlen = n; } } - req->status = REQ_STATUS_SENT; + WRITE_ONCE(req->status, REQ_STATUS_SENT); req_retry_pinned: spin_lock_irqsave(&chan->lock, flags); @@ -532,9 +532,10 @@ req_retry_pinned: spin_unlock_irqrestore(&chan->lock, flags); kicked = 1; p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); - err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD); + err = wait_event_killable(req->wq, + READ_ONCE(req->status) >= REQ_STATUS_RCVD); // RERROR needs reply (== error string) in static data - if (req->status == REQ_STATUS_RCVD && + if (READ_ONCE(req->status) == REQ_STATUS_RCVD && unlikely(req->rc.sdata[4] == P9_RERROR)) handle_rerror(req, in_hdr_len, offs, in_pages); diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index aaa5fd364691..cf1b89ba522b 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -157,7 +157,7 @@ again: &masked_prod, masked_cons, XEN_9PFS_RING_SIZE(ring)); - p9_req->status = REQ_STATUS_SENT; + WRITE_ONCE(p9_req->status, REQ_STATUS_SENT); virt_wmb(); /* write ring before updating pointer */ prod += size; ring->intf->out_prod = prod; @@ -212,7 +212,7 @@ static void p9_xen_response(struct work_struct *work) dev_warn(&priv->dev->dev, "requested packet size too big: %d for tag %d with capacity %zd\n", h.size, h.tag, req->rc.capacity); - req->status = REQ_STATUS_ERROR; + WRITE_ONCE(req->status, REQ_STATUS_ERROR); goto recv_error; } From ee887708e25ee2d3002e2894eca1b7c4be17f0eb Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 13 Dec 2022 13:32:46 +0100 Subject: [PATCH 111/543] ASoC: Intel: bytcr_rt5640: Add quirk for the Advantech MICA-071 tablet [ Upstream commit a1dec9d70b6ad97087b60b81d2492134a84208c6 ] The Advantech MICA-071 tablet deviates from the defaults for a non CR Bay Trail based tablet in several ways: 1. It uses an analog MIC on IN3 rather then using DMIC1 2. It only has 1 speaker 3. It needs the OVCD current threshold to be set to 1500uA instead of the default 2000uA to reliable differentiate between headphones vs headsets Add a quirk with these settings for this tablet. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20221213123246.11226-1-hdegoede@redhat.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/bytcr_rt5640.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index fb9d9e271845..ddd2625bed90 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -570,6 +570,21 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { + /* Advantech MICA-071 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Advantech"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MICA-071"), + }, + /* OVCD Th = 1500uA to reliable detect head-phones vs -set */ + .driver_data = (void *)(BYT_RT5640_IN3_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_1500UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_MCLK_EN), + }, { .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"), From b48f8c9a81bea30892f793639a0884a2bd3e15da Mon Sep 17 00:00:00 2001 From: YC Hung Date: Tue, 13 Dec 2022 19:56:17 +0800 Subject: [PATCH 112/543] ASoC: SOF: mediatek: initialize panic_info to zero [ Upstream commit 7bd220f2ba9014b78f0304178103393554b8c4fe ] Coverity spotted that panic_info is not initialized to zero in mtk_adsp_dump. Using uninitialized value panic_info.linenum when calling snd_sof_get_status. Fix this coverity by initializing panic_info struct as zero. Signed-off-by: YC Hung Reviewed-by: Curtis Malainey Link: https://lore.kernel.org/r/20221213115617.25086-1-yc.hung@mediatek.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/mediatek/mtk-adsp-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/sof/mediatek/mtk-adsp-common.c b/sound/soc/sof/mediatek/mtk-adsp-common.c index 1e0769c668a7..de8dbe27cd0d 100644 --- a/sound/soc/sof/mediatek/mtk-adsp-common.c +++ b/sound/soc/sof/mediatek/mtk-adsp-common.c @@ -60,7 +60,7 @@ void mtk_adsp_dump(struct snd_sof_dev *sdev, u32 flags) { char *level = (flags & SOF_DBG_DUMP_OPTIONAL) ? KERN_DEBUG : KERN_ERR; struct sof_ipc_dsp_oops_xtensa xoops; - struct sof_ipc_panic_info panic_info; + struct sof_ipc_panic_info panic_info = {}; u32 stack[MTK_ADSP_STACK_DUMP_SIZE]; u32 status; From 8ba7c55e112f4ffd2a95b99be1cb1c891ef08ba1 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Sat, 10 Dec 2022 02:51:19 -0500 Subject: [PATCH 113/543] drm/amdgpu: Fix size validation for non-exclusive domains (v4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 7554886daa31eacc8e7fac9e15bbce67d10b8f1f ] Fix amdgpu_bo_validate_size() to check whether the TTM domain manager for the requested memory exists, else we get a kernel oops when dereferencing "man". v2: Make the patch standalone, i.e. not dependent on local patches. v3: Preserve old behaviour and just check that the manager pointer is not NULL. v4: Complain if GTT domain requested and it is uninitialized--most likely a bug. Cc: Alex Deucher Cc: Christian König Cc: AMD Graphics Signed-off-by: Luben Tuikov Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 3df13d841e4d..3be3cba3a16d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -446,27 +446,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, /* * If GTT is part of requested domains the check must succeed to - * allow fall back to GTT + * allow fall back to GTT. */ if (domain & AMDGPU_GEM_DOMAIN_GTT) { man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); - if (size < man->size) + if (man && size < man->size) return true; - else - goto fail; - } - - if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + else if (!man) + WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); + goto fail; + } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) { man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); - if (size < man->size) + if (man && size < man->size) return true; - else - goto fail; + goto fail; } - /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ return true; From 9d74d1f52e16d8e07f7fbe52e96d6391418a2fe9 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 14 Dec 2022 10:15:17 -0500 Subject: [PATCH 114/543] drm/amdkfd: Fix kfd_process_device_init_vm error handling [ Upstream commit 29d48b87db64b6697ddad007548e51d032081c59 ] Should only destroy the ib_mem and let process cleanup worker to free the outstanding BOs. Reset the pointer in pdd->qpd structure, to avoid NULL pointer access in process destroy worker. BUG: kernel NULL pointer dereference, address: 0000000000000010 Call Trace: amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel+0x46/0xb0 [amdgpu] kfd_process_device_destroy_cwsr_dgpu+0x40/0x70 [amdgpu] kfd_process_destroy_pdds+0x71/0x190 [amdgpu] kfd_process_wq_release+0x2a2/0x3b0 [amdgpu] process_one_work+0x2a1/0x600 worker_thread+0x39/0x3d0 Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 951b63677248..9821fa9268d3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -689,13 +689,13 @@ void kfd_process_destroy_wq(void) } static void kfd_process_free_gpuvm(struct kgd_mem *mem, - struct kfd_process_device *pdd, void *kptr) + struct kfd_process_device *pdd, void **kptr) { struct kfd_dev *dev = pdd->dev; - if (kptr) { + if (kptr && *kptr) { amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); - kptr = NULL; + *kptr = NULL; } amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv); @@ -795,7 +795,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd) if (!qpd->ib_kaddr || !qpd->ib_base) return; - kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr); + kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr); } struct kfd_process *kfd_create_process(struct file *filep) @@ -1277,7 +1277,7 @@ static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd) if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) return; - kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr); + kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr); } void kfd_process_set_trap_handler(struct qcm_process_device *qpd, @@ -1598,8 +1598,8 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, return 0; err_init_cwsr: + kfd_process_device_destroy_ib_mem(pdd); err_reserve_ib_mem: - kfd_process_device_free_bos(pdd); pdd->drm_priv = NULL; return ret; From a02c07b619899179384fde06f951530438a3512d Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 13 Dec 2022 00:50:03 -0500 Subject: [PATCH 115/543] drm/amdkfd: Fix double release compute pasid [ Upstream commit 1a799c4c190ea9f0e81028e3eb3037ed0ab17ff5 ] If kfd_process_device_init_vm returns failure after vm is converted to compute vm and vm->pasid set to compute pasid, KFD will not take pdd->drm_file reference. As a result, drm close file handler maybe called to release the compute pasid before KFD process destroy worker to release the same pasid and set vm->pasid to zero, this generates below WARNING backtrace and NULL pointer access. Add helper amdgpu_amdkfd_gpuvm_set_vm_pasid and call it at the last step of kfd_process_device_init_vm, to ensure vm pasid is the original pasid if acquiring vm failed or is the compute pasid with pdd->drm_file reference taken to avoid double release same pasid. amdgpu: Failed to create process VM object ida_free called for id=32770 which is not allocated. WARNING: CPU: 57 PID: 72542 at ../lib/idr.c:522 ida_free+0x96/0x140 RIP: 0010:ida_free+0x96/0x140 Call Trace: amdgpu_pasid_free_delayed+0xe1/0x2a0 [amdgpu] amdgpu_driver_postclose_kms+0x2d8/0x340 [amdgpu] drm_file_free.part.13+0x216/0x270 [drm] drm_close_helper.isra.14+0x60/0x70 [drm] drm_release+0x6e/0xf0 [drm] __fput+0xcc/0x280 ____fput+0xe/0x20 task_work_run+0x96/0xc0 do_exit+0x3d0/0xc10 BUG: kernel NULL pointer dereference, address: 0000000000000000 RIP: 0010:ida_free+0x76/0x140 Call Trace: amdgpu_pasid_free_delayed+0xe1/0x2a0 [amdgpu] amdgpu_driver_postclose_kms+0x2d8/0x340 [amdgpu] drm_file_free.part.13+0x216/0x270 [drm] drm_close_helper.isra.14+0x60/0x70 [drm] drm_release+0x6e/0xf0 [drm] __fput+0xcc/0x280 ____fput+0xe/0x20 task_work_run+0x96/0xc0 do_exit+0x3d0/0xc10 Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 41 +++++++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 12 ++++-- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 647220a8762d..30f145dc8724 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -265,8 +265,10 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_ (&((struct amdgpu_fpriv *) \ ((struct drm_file *)(drm_priv))->driver_priv)->vm) +int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, + struct file *filp, u32 pasid); int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, - struct file *filp, u32 pasid, + struct file *filp, void **process_info, struct dma_fence **ef); void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index fe87b3402f06..29f045079a3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1473,8 +1473,36 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) amdgpu_bo_unreserve(bo); } +int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, + struct file *filp, u32 pasid) + +{ + struct amdgpu_fpriv *drv_priv; + struct amdgpu_vm *avm; + int ret; + + ret = amdgpu_file_to_fpriv(filp, &drv_priv); + if (ret) + return ret; + avm = &drv_priv->vm; + + /* Free the original amdgpu allocated pasid, + * will be replaced with kfd allocated pasid. + */ + if (avm->pasid) { + amdgpu_pasid_free(avm->pasid); + amdgpu_vm_set_pasid(adev, avm, 0); + } + + ret = amdgpu_vm_set_pasid(adev, avm, pasid); + if (ret) + return ret; + + return 0; +} + int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, - struct file *filp, u32 pasid, + struct file *filp, void **process_info, struct dma_fence **ef) { @@ -1491,22 +1519,11 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, if (avm->process_info) return -EINVAL; - /* Free the original amdgpu allocated pasid, - * will be replaced with kfd allocated pasid. - */ - if (avm->pasid) { - amdgpu_pasid_free(avm->pasid); - amdgpu_vm_set_pasid(adev, avm, 0); - } - /* Convert VM into a compute VM */ ret = amdgpu_vm_make_compute(adev, avm); if (ret) return ret; - ret = amdgpu_vm_set_pasid(adev, avm, pasid); - if (ret) - return ret; /* Initialize KFD part of the VM and process info */ ret = init_kfd_vm(avm, process_info, ef); if (ret) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 9821fa9268d3..dd351105c1bc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1576,9 +1576,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, p = pdd->process; dev = pdd->dev; - ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( - dev->adev, drm_file, p->pasid, - &p->kgd_process_info, &p->ef); + ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file, + &p->kgd_process_info, + &p->ef); if (ret) { pr_err("Failed to create process VM object\n"); return ret; @@ -1593,10 +1593,16 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, if (ret) goto err_init_cwsr; + ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid); + if (ret) + goto err_set_pasid; + pdd->drm_file = drm_file; return 0; +err_set_pasid: + kfd_process_device_destroy_cwsr_dgpu(pdd); err_init_cwsr: kfd_process_device_destroy_ib_mem(pdd); err_reserve_ib_mem: From a288e98adc02b11001e89f3092a0811e394055b5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Dec 2022 07:11:33 -0700 Subject: [PATCH 116/543] io_uring/cancel: re-grab ctx mutex after finishing wait [ Upstream commit 23fffb2f09ce1145cbd751801d45ba74acaa6542 ] If we have a signal pending during cancelations, it'll cause the task_work run to return an error. Since we didn't run task_work, the current task is left in TASK_INTERRUPTIBLE state when we need to re-grab the ctx mutex, and the kernel will rightfully complain about that. Move the lock grabbing for the error cases outside the loop to avoid that issue. Reported-by: syzbot+7df055631cd1be4586fd@syzkaller.appspotmail.com Link: https://lore.kernel.org/io-uring/0000000000003a14a905f05050b0@google.com/ Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/cancel.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/io_uring/cancel.c b/io_uring/cancel.c index 2291a53cdabd..b4f5dfacc0c3 100644 --- a/io_uring/cancel.c +++ b/io_uring/cancel.c @@ -288,24 +288,23 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg) ret = __io_sync_cancel(current->io_uring, &cd, sc.fd); + mutex_unlock(&ctx->uring_lock); if (ret != -EALREADY) break; - mutex_unlock(&ctx->uring_lock); ret = io_run_task_work_sig(ctx); - if (ret < 0) { - mutex_lock(&ctx->uring_lock); + if (ret < 0) break; - } ret = schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS); - mutex_lock(&ctx->uring_lock); if (!ret) { ret = -ETIME; break; } + mutex_lock(&ctx->uring_lock); } while (1); finish_wait(&ctx->cq_wait, &wait); + mutex_lock(&ctx->uring_lock); if (ret == -ENOENT || ret > 0) ret = 0; From 183c2aaef40a91acbaae45c3824d6cde7bb62b10 Mon Sep 17 00:00:00 2001 From: Yanjun Zhang Date: Thu, 22 Dec 2022 09:57:21 +0800 Subject: [PATCH 117/543] nvme: fix multipath crash caused by flush request when blktrace is enabled [ Upstream commit 3659fb5ac29a5e6102bebe494ac789fd47fb78f4 ] The flush request initialized by blk_kick_flush has NULL bio, and it may be dealt with nvme_end_req during io completion. When blktrace is enabled, nvme_trace_bio_complete with multipath activated trying to access NULL pointer bio from flush request results in the following crash: [ 2517.831677] BUG: kernel NULL pointer dereference, address: 000000000000001a [ 2517.835213] #PF: supervisor read access in kernel mode [ 2517.838724] #PF: error_code(0x0000) - not-present page [ 2517.842222] PGD 7b2d51067 P4D 0 [ 2517.845684] Oops: 0000 [#1] SMP NOPTI [ 2517.849125] CPU: 2 PID: 732 Comm: kworker/2:1H Kdump: loaded Tainted: G S 5.15.67-0.cl9.x86_64 #1 [ 2517.852723] Hardware name: XFUSION 2288H V6/BC13MBSBC, BIOS 1.13 07/27/2022 [ 2517.856358] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp] [ 2517.859993] RIP: 0010:blk_add_trace_bio_complete+0x6/0x30 [ 2517.863628] Code: 1f 44 00 00 48 8b 46 08 31 c9 ba 04 00 10 00 48 8b 80 50 03 00 00 48 8b 78 50 e9 e5 fe ff ff 0f 1f 44 00 00 41 54 49 89 f4 55 <0f> b6 7a 1a 48 89 d5 e8 3e 1c 2b 00 48 89 ee 4c 89 e7 5d 89 c1 ba [ 2517.871269] RSP: 0018:ff7f6a008d9dbcd0 EFLAGS: 00010286 [ 2517.875081] RAX: ff3d5b4be00b1d50 RBX: 0000000002040002 RCX: ff3d5b0a270f2000 [ 2517.878966] RDX: 0000000000000000 RSI: ff3d5b0b021fb9f8 RDI: 0000000000000000 [ 2517.882849] RBP: ff3d5b0b96a6fa00 R08: 0000000000000001 R09: 0000000000000000 [ 2517.886718] R10: 000000000000000c R11: 000000000000000c R12: ff3d5b0b021fb9f8 [ 2517.890575] R13: 0000000002000000 R14: ff3d5b0b021fb1b0 R15: 0000000000000018 [ 2517.894434] FS: 0000000000000000(0000) GS:ff3d5b42bfc80000(0000) knlGS:0000000000000000 [ 2517.898299] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 2517.902157] CR2: 000000000000001a CR3: 00000004f023e005 CR4: 0000000000771ee0 [ 2517.906053] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 2517.909930] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 2517.913761] PKRU: 55555554 [ 2517.917558] Call Trace: [ 2517.921294] [ 2517.924982] nvme_complete_rq+0x1c3/0x1e0 [nvme_core] [ 2517.928715] nvme_tcp_recv_pdu+0x4d7/0x540 [nvme_tcp] [ 2517.932442] nvme_tcp_recv_skb+0x4f/0x240 [nvme_tcp] [ 2517.936137] ? nvme_tcp_recv_pdu+0x540/0x540 [nvme_tcp] [ 2517.939830] tcp_read_sock+0x9c/0x260 [ 2517.943486] nvme_tcp_try_recv+0x65/0xa0 [nvme_tcp] [ 2517.947173] nvme_tcp_io_work+0x64/0x90 [nvme_tcp] [ 2517.950834] process_one_work+0x1e8/0x390 [ 2517.954473] worker_thread+0x53/0x3c0 [ 2517.958069] ? process_one_work+0x390/0x390 [ 2517.961655] kthread+0x10c/0x130 [ 2517.965211] ? set_kthread_struct+0x40/0x40 [ 2517.968760] ret_from_fork+0x1f/0x30 [ 2517.972285] To avoid this situation, add a NULL check for req->bio before calling trace_block_bio_complete. Signed-off-by: Yanjun Zhang Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- drivers/nvme/host/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 8a0db9e06dc6..cbda8a19409b 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -888,7 +888,7 @@ static inline void nvme_trace_bio_complete(struct request *req) { struct nvme_ns *ns = req->q->queuedata; - if (req->cmd_flags & REQ_NVME_MPATH) + if ((req->cmd_flags & REQ_NVME_MPATH) && req->bio) trace_block_bio_complete(ns->head->disk->queue, req->bio); } From 0ba8892d86ad69775e4ee3de53b021ae33aca8c0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 8 Dec 2022 10:42:05 -0600 Subject: [PATCH 118/543] ACPI: video: Allow GPU drivers to report no panels [ Upstream commit 00a734104af7d878f1252d49eff9298785c6cbdc ] The current logic for the ACPI backlight detection will create a backlight device if no native or vendor drivers have created 8 seconds after the system has booted if the ACPI tables included backlight control methods. If the GPU drivers have loaded, they may be able to report whether any LCD panels were found. Allow using this information to factor in whether to enable the fallback logic for making an acpi_video0 backlight device. Suggested-by: Hans de Goede Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpi_video.c | 11 +++++++++++ include/acpi/video.h | 2 ++ 2 files changed, 13 insertions(+) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index 32953646caeb..f64fdb029090 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -2178,6 +2178,17 @@ static bool should_check_lcd_flag(void) return false; } +/* + * At least one graphics driver has reported that no LCD is connected + * via the native interface. cancel the registration for fallback acpi_video0. + * If another driver still deems this necessary, it can explicitly register it. + */ +void acpi_video_report_nolcd(void) +{ + cancel_delayed_work(&video_bus_register_backlight_work); +} +EXPORT_SYMBOL(acpi_video_report_nolcd); + int acpi_video_register(void) { int ret = 0; diff --git a/include/acpi/video.h b/include/acpi/video.h index a275c35e5249..8ed9bec03e53 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -53,6 +53,7 @@ enum acpi_backlight_type { }; #if IS_ENABLED(CONFIG_ACPI_VIDEO) +extern void acpi_video_report_nolcd(void); extern int acpi_video_register(void); extern void acpi_video_unregister(void); extern void acpi_video_register_backlight(void); @@ -69,6 +70,7 @@ extern int acpi_video_get_levels(struct acpi_device *device, struct acpi_video_device_brightness **dev_br, int *pmax_level); #else +static inline void acpi_video_report_nolcd(void) { return; }; static inline int acpi_video_register(void) { return -ENODEV; } static inline void acpi_video_unregister(void) { return; } static inline void acpi_video_register_backlight(void) { return; } From adaf41b56803fe7a9a4ac625c7e41615ef23591f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 8 Dec 2022 10:42:06 -0600 Subject: [PATCH 119/543] drm/amd/display: Report to ACPI video if no panels were found [ Upstream commit c573e240609ff781a0246c0c8c8351abd0475287 ] On desktop APUs amdgpu doesn't create a native backlight device as no eDP panels are found. However if the BIOS has reported backlight control methods in the ACPI tables then an acpi_video0 backlight device will be made 8 seconds after boot. This has manifested in a power slider on a number of desktop APUs ranging from Ryzen 5000 through Ryzen 7000 on various motherboard manufacturers. To avoid this, report to the acpi video detection that the system does not have any panel connected in the native driver. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1783786 Reported-by: Hans de Goede Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index c2c26fbea512..6f1cc5ce4c7a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4372,6 +4372,10 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) amdgpu_set_panel_orientation(&aconnector->base); } + /* If we didn't find a panel, notify the acpi video detection */ + if (dm->adev->flags & AMD_IS_APU && dm->num_of_edps == 0) + acpi_video_report_nolcd(); + /* Software is initialized. Now we can register interrupt handlers. */ switch (adev->asic_type) { #if defined(CONFIG_DRM_AMD_DC_SI) From 9c152189a7e85dac1f759d6ba95f877af91ccf8c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 8 Dec 2022 10:42:07 -0600 Subject: [PATCH 120/543] ACPI: video: Don't enable fallback path for creating ACPI backlight by default [ Upstream commit 5aa9d943e9b6bf6e6023645cbe7ce7d5ed84baf4 ] The ACPI video detection code has a module parameter `register_backlight_delay` which is currently configured to 8 seconds. This means that if after 8 seconds of booting no native driver has created a backlight device then the code will attempt to make an ACPI video backlight device. This was intended as a safety mechanism with the backlight overhaul that occurred in kernel 6.1, but as it doesn't appear necesssary set it to be disabled by default. Suggested-by: Hans de Goede Signed-off-by: Mario Limonciello Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/acpi_video.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index f64fdb029090..0c79f463fbfd 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -70,11 +70,7 @@ module_param(device_id_scheme, bool, 0444); static int only_lcd = -1; module_param(only_lcd, int, 0444); -/* - * Display probing is known to take up to 5 seconds, so delay the fallback - * backlight registration by 5 seconds + 3 seconds for some extra margin. - */ -static int register_backlight_delay = 8; +static int register_backlight_delay; module_param(register_backlight_delay, int, 0444); MODULE_PARM_DESC(register_backlight_delay, "Delay in seconds before doing fallback (non GPU driver triggered) " From 8b2de52126612b6d99f637292fc3aaceb42efc8a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Dec 2022 06:37:08 -0700 Subject: [PATCH 121/543] io_uring: check for valid register opcode earlier [ Upstream commit 343190841a1f22b96996d9f8cfab902a4d1bfd0e ] We only check the register opcode value inside the restricted ring section, move it into the main io_uring_register() function instead and check it up front. Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/io_uring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 71f1cabb9f3d..1bc68dfda340 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -3897,8 +3897,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, return -EEXIST; if (ctx->restricted) { - if (opcode >= IORING_REGISTER_LAST) - return -EINVAL; opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); if (!test_bit(opcode, ctx->restrictions.register_op)) return -EACCES; @@ -4054,6 +4052,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, long ret = -EBADF; struct fd f; + if (opcode >= IORING_REGISTER_LAST) + return -EINVAL; + f = fdget(fd); if (!f.file) return -EBADF; From 84cc257e1888862e3018db2c402b44b8915c1eb1 Mon Sep 17 00:00:00 2001 From: "YoungJun.park" Date: Fri, 28 Oct 2022 07:42:41 -0700 Subject: [PATCH 122/543] kunit: alloc_string_stream_fragment error handling bug fix [ Upstream commit 93ef83050e597634d2c7dc838a28caf5137b9404 ] When it fails to allocate fragment, it does not free and return error. And check the pointer inappropriately. Fixed merge conflicts with commit 618887768bb7 ("kunit: update NULL vs IS_ERR() tests") Shuah Khan Signed-off-by: YoungJun.park Reviewed-by: David Gow Signed-off-by: Shuah Khan Signed-off-by: Sasha Levin --- lib/kunit/string-stream.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/kunit/string-stream.c b/lib/kunit/string-stream.c index a608746020a9..7aeabe1a3dc5 100644 --- a/lib/kunit/string-stream.c +++ b/lib/kunit/string-stream.c @@ -23,8 +23,10 @@ static struct string_stream_fragment *alloc_string_stream_fragment( return ERR_PTR(-ENOMEM); frag->fragment = kunit_kmalloc(test, len, gfp); - if (!frag->fragment) + if (!frag->fragment) { + kunit_kfree(test, frag); return ERR_PTR(-ENOMEM); + } return frag; } From 27eab31ed71fec55e98b0058db98342630e7c6a9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 Dec 2022 15:20:04 +0100 Subject: [PATCH 123/543] nvmet: use NVME_CMD_EFFECTS_CSUPP instead of open coding it [ Upstream commit 61f37154c599cf9f2f84dcbd9be842f8645a7099 ] Use NVME_CMD_EFFECTS_CSUPP instead of open coding it and assign a single value to multiple array entries instead of repeated assignments. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Sasha Levin --- drivers/nvme/target/admin-cmd.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index c8a061ce3ee5..76ceaadd6eea 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -164,26 +164,29 @@ out: static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log) { - log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_set_features] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_get_features] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_async_event] = cpu_to_le32(1 << 0); - log->acs[nvme_admin_keep_alive] = cpu_to_le32(1 << 0); + log->acs[nvme_admin_get_log_page] = + log->acs[nvme_admin_identify] = + log->acs[nvme_admin_abort_cmd] = + log->acs[nvme_admin_set_features] = + log->acs[nvme_admin_get_features] = + log->acs[nvme_admin_async_event] = + log->acs[nvme_admin_keep_alive] = + cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); - log->iocs[nvme_cmd_read] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_write] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0); + log->iocs[nvme_cmd_read] = + log->iocs[nvme_cmd_write] = + log->iocs[nvme_cmd_flush] = + log->iocs[nvme_cmd_dsm] = + log->iocs[nvme_cmd_write_zeroes] = + cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); } static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log) { - log->iocs[nvme_cmd_zone_append] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_zone_mgmt_send] = cpu_to_le32(1 << 0); - log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(1 << 0); + log->iocs[nvme_cmd_zone_append] = + log->iocs[nvme_cmd_zone_mgmt_send] = + log->iocs[nvme_cmd_zone_mgmt_recv] = + cpu_to_le32(NVME_CMD_EFFECTS_CSUPP); } static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) From f6631b9b4f912af298c739684ee523482b2ecaa1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 21 Dec 2022 10:12:17 +0100 Subject: [PATCH 124/543] nvme: also return I/O command effects from nvme_command_effects [ Upstream commit 831ed60c2aca2d7c517b2da22897a90224a97d27 ] To be able to use the Commands Supported and Effects Log for allowing unprivileged passtrough, it needs to be corretly reported for I/O commands as well. Return the I/O command effects from nvme_command_effects, and also add a default list of effects for the NVM command set. For other command sets, the Commands Supported and Effects log is required to be present already. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Kanchan Joshi Signed-off-by: Sasha Levin --- drivers/nvme/host/core.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 108b5022cead..1ded96d1bfd2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1069,6 +1069,18 @@ static u32 nvme_known_admin_effects(u8 opcode) return 0; } +static u32 nvme_known_nvm_effects(u8 opcode) +{ + switch (opcode) { + case nvme_cmd_write: + case nvme_cmd_write_zeroes: + case nvme_cmd_write_uncor: + return NVME_CMD_EFFECTS_LBCC; + default: + return 0; + } +} + u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) { u32 effects = 0; @@ -1076,16 +1088,24 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) if (ns) { if (ns->head->effects) effects = le32_to_cpu(ns->head->effects->iocs[opcode]); + if (ns->head->ids.csi == NVME_CAP_CSS_NVM) + effects |= nvme_known_nvm_effects(opcode); if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) dev_warn_once(ctrl->device, - "IO command:%02x has unhandled effects:%08x\n", + "IO command:%02x has unusual effects:%08x\n", opcode, effects); - return 0; - } - if (ctrl->effects) - effects = le32_to_cpu(ctrl->effects->acs[opcode]); - effects |= nvme_known_admin_effects(opcode); + /* + * NVME_CMD_EFFECTS_CSE_MASK causes a freeze all I/O queues, + * which would deadlock when done on an I/O command. Note that + * We already warn about an unusual effect above. + */ + effects &= ~NVME_CMD_EFFECTS_CSE_MASK; + } else { + if (ctrl->effects) + effects = le32_to_cpu(ctrl->effects->acs[opcode]); + effects |= nvme_known_admin_effects(opcode); + } return effects; } From 2dfc2347776e491a0ae7d7f38cc037e53b36ea10 Mon Sep 17 00:00:00 2001 From: Kai Vehmanen Date: Fri, 9 Dec 2022 13:45:28 +0200 Subject: [PATCH 125/543] ASoC: SOF: Intel: pci-tgl: unblock S5 entry if DMA stop has failed" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2aa2a5ead0ee0a358bf80a2984a641d1bf2adc2a ] If system shutdown has not been completed cleanly, it is possible the DMA stream shutdown has not been done, or was not clean. If this is the case, Intel TGL/ADL HDA platforms may fail to shutdown cleanly due to pending HDA DMA transactions. To avoid this, detect this scenario in the shutdown callback, and perform an additional controller reset. This has been tested to unblock S5 entry if this condition is hit. Co-developed-by: Archana Patni Signed-off-by: Archana Patni Signed-off-by: Kai Vehmanen Reviewed-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Link: https://lore.kernel.org/r/20221209114529.3909192-2-kai.vehmanen@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/intel/hda-dsp.c | 72 +++++++++++++++++++++++++++++++++++ sound/soc/sof/intel/hda.h | 1 + sound/soc/sof/intel/tgl.c | 2 +- 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 3c76f843454b..428aee8fd93b 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -903,6 +903,78 @@ int hda_dsp_suspend(struct snd_sof_dev *sdev, u32 target_state) return snd_sof_dsp_set_power_state(sdev, &target_dsp_state); } +static unsigned int hda_dsp_check_for_dma_streams(struct snd_sof_dev *sdev) +{ + struct hdac_bus *bus = sof_to_bus(sdev); + struct hdac_stream *s; + unsigned int active_streams = 0; + int sd_offset; + u32 val; + + list_for_each_entry(s, &bus->stream_list, list) { + sd_offset = SOF_STREAM_SD_OFFSET(s); + val = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, + sd_offset); + if (val & SOF_HDA_SD_CTL_DMA_START) + active_streams |= BIT(s->index); + } + + return active_streams; +} + +static int hda_dsp_s5_quirk(struct snd_sof_dev *sdev) +{ + int ret; + + /* + * Do not assume a certain timing between the prior + * suspend flow, and running of this quirk function. + * This is needed if the controller was just put + * to reset before calling this function. + */ + usleep_range(500, 1000); + + /* + * Take controller out of reset to flush DMA + * transactions. + */ + ret = hda_dsp_ctrl_link_reset(sdev, false); + if (ret < 0) + return ret; + + usleep_range(500, 1000); + + /* Restore state for shutdown, back to reset */ + ret = hda_dsp_ctrl_link_reset(sdev, true); + if (ret < 0) + return ret; + + return ret; +} + +int hda_dsp_shutdown_dma_flush(struct snd_sof_dev *sdev) +{ + unsigned int active_streams; + int ret, ret2; + + /* check if DMA cleanup has been successful */ + active_streams = hda_dsp_check_for_dma_streams(sdev); + + sdev->system_suspend_target = SOF_SUSPEND_S3; + ret = snd_sof_suspend(sdev->dev); + + if (active_streams) { + dev_warn(sdev->dev, + "There were active DSP streams (%#x) at shutdown, trying to recover\n", + active_streams); + ret2 = hda_dsp_s5_quirk(sdev); + if (ret2 < 0) + dev_err(sdev->dev, "shutdown recovery failed (%d)\n", ret2); + } + + return ret; +} + int hda_dsp_shutdown(struct snd_sof_dev *sdev) { sdev->system_suspend_target = SOF_SUSPEND_S3; diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h index 2ab3c3840b92..9acd21901e68 100644 --- a/sound/soc/sof/intel/hda.h +++ b/sound/soc/sof/intel/hda.h @@ -581,6 +581,7 @@ int hda_dsp_resume(struct snd_sof_dev *sdev); int hda_dsp_runtime_suspend(struct snd_sof_dev *sdev); int hda_dsp_runtime_resume(struct snd_sof_dev *sdev); int hda_dsp_runtime_idle(struct snd_sof_dev *sdev); +int hda_dsp_shutdown_dma_flush(struct snd_sof_dev *sdev); int hda_dsp_shutdown(struct snd_sof_dev *sdev); int hda_dsp_set_hw_params_upon_resume(struct snd_sof_dev *sdev); void hda_dsp_dump(struct snd_sof_dev *sdev, u32 flags); diff --git a/sound/soc/sof/intel/tgl.c b/sound/soc/sof/intel/tgl.c index 9ae2890e9dac..8637fe102c87 100644 --- a/sound/soc/sof/intel/tgl.c +++ b/sound/soc/sof/intel/tgl.c @@ -60,7 +60,7 @@ int sof_tgl_ops_init(struct snd_sof_dev *sdev) memcpy(&sof_tgl_ops, &sof_hda_common_ops, sizeof(struct snd_sof_dsp_ops)); /* probe/remove/shutdown */ - sof_tgl_ops.shutdown = hda_dsp_shutdown; + sof_tgl_ops.shutdown = hda_dsp_shutdown_dma_flush; if (sdev->pdata->ipc_type == SOF_IPC) { /* doorbell */ From 5bd3c7abeb69fb4133418b846a1c6dc11313d6f0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 22 Nov 2022 12:51:22 +0100 Subject: [PATCH 126/543] x86/kexec: Fix double-free of elf header buffer commit d00dd2f2645dca04cf399d8fc692f3f69b6dd996 upstream. After b3e34a47f989 ("x86/kexec: fix memory leak of elf header buffer"), freeing image->elf_headers in the error path of crash_load_segments() is not needed because kimage_file_post_load_cleanup() will take care of that later. And not clearing it could result in a double-free. Drop the superfluous vfree() call at the error path of crash_load_segments(). Fixes: b3e34a47f989 ("x86/kexec: fix memory leak of elf header buffer") Signed-off-by: Takashi Iwai Signed-off-by: Borislav Petkov (AMD) Acked-by: Baoquan He Acked-by: Vlastimil Babka Cc: Link: https://lore.kernel.org/r/20221122115122.13937-1-tiwai@suse.de Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/crash.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 9730c88530fc..305514431f26 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -401,10 +401,8 @@ int crash_load_segments(struct kimage *image) kbuf.buf_align = ELF_CORE_HEADER_ALIGN; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); - if (ret) { - vfree((void *)image->elf_headers); + if (ret) return ret; - } image->elf_load_addr = kbuf.mem; pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n", image->elf_load_addr, kbuf.bufsz, kbuf.memsz); From e8377f0456fb6738a4668d4df16c13d7599925fd Mon Sep 17 00:00:00 2001 From: Rodrigo Branco Date: Tue, 3 Jan 2023 14:17:51 -0600 Subject: [PATCH 127/543] x86/bugs: Flush IBP in ib_prctl_set() commit a664ec9158eeddd75121d39c9a0758016097fa96 upstream. We missed the window between the TIF flag update and the next reschedule. Signed-off-by: Rodrigo Branco Reviewed-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Cc: Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/bugs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 6daf84229548..16d8e43be775 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1951,6 +1951,8 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) if (ctrl == PR_SPEC_FORCE_DISABLE) task_set_spec_ib_force_disable(task); task_update_spec_tif(task); + if (task == current) + indirect_branch_prediction_barrier(); break; default: return -ERANGE; From 421fd5c9e0aedac833e0be9d50883d7c9ae2a43f Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 13 Dec 2022 13:08:26 -0500 Subject: [PATCH 128/543] nfsd: fix handling of readdir in v4root vs. mount upcall timeout commit cad853374d85fe678d721512cecfabd7636e51f3 upstream. If v4 READDIR operation hits a mountpoint and gets back an error, then it will include that entry in the reply and set RDATTR_ERROR for it to the error. That's fine for "normal" exported filesystems, but on the v4root, we need to be more careful to only expose the existence of dentries that lead to exports. If the mountd upcall times out while checking to see whether a mountpoint on the v4root is exported, then we have no recourse other than to fail the whole operation. Cc: Steve Dickson Link: https://bugzilla.kernel.org/show_bug.cgi?id=216777 Reported-by: JianHong Yin Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Cc: Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4xdr.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index bcfeb1a922c0..78849646fe83 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3570,6 +3570,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, case nfserr_noent: xdr_truncate_encode(xdr, start_offset); goto skip_entry; + case nfserr_jukebox: + /* + * The pseudoroot should only display dentries that lead to + * exports. If we get EJUKEBOX here, then we can't tell whether + * this entry should be included. Just fail the whole READDIR + * with NFS4ERR_DELAY in that case, and hope that the situation + * will resolve itself by the client's next attempt. + */ + if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT) + goto fail; + fallthrough; default: /* * If the client requested the RDATTR_ERROR attribute, From a3fb152c2f17991b65e8ee861f1c2d11848b3626 Mon Sep 17 00:00:00 2001 From: Paul Menzel Date: Mon, 2 Jan 2023 14:57:30 +0100 Subject: [PATCH 129/543] fbdev: matroxfb: G200eW: Increase max memory from 1 MB to 16 MB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f685dd7a8025f2554f73748cfdb8143a21fb92c7 upstream. Commit 62d89a7d49af ("video: fbdev: matroxfb: set maxvram of vbG200eW to the same as vbG200 to avoid black screen") accidently decreases the maximum memory size for the Matrox G200eW (102b:0532) from 8 MB to 1 MB by missing one zero. This caused the driver initialization to fail with the messages below, as the minimum required VRAM size is 2 MB: [ 9.436420] matroxfb: Matrox MGA-G200eW (PCI) detected [ 9.444502] matroxfb: cannot determine memory size [ 9.449316] matroxfb: probe of 0000:0a:03.0 failed with error -1 So, add the missing 0 to make it the intended 16 MB. Successfully tested on the Dell PowerEdge R910/0KYD3D, BIOS 2.10.0 08/29/2013, that the warning is gone. While at it, add a leading 0 to the maxdisplayable entry, so it’s aligned properly. The value could probably also be increased from 8 MB to 16 MB, as the G200 uses the same values, but I have not checked any datasheet. Note, matroxfb is obsolete and superseded by the maintained DRM driver mga200, which is used by default on most systems where both drivers are available. Therefore, on most systems it was only a cosmetic issue. Fixes: 62d89a7d49af ("video: fbdev: matroxfb: set maxvram of vbG200eW to the same as vbG200 to avoid black screen") Link: https://lore.kernel.org/linux-fbdev/972999d3-b75d-5680-fcef-6e6905c52ac5@suse.de/T/#mb6953a9995ebd18acc8552f99d6db39787aec775 Cc: it+linux-fbdev@molgen.mpg.de Cc: Z. Liu Cc: Rich Felker Cc: stable@vger.kernel.org Signed-off-by: Paul Menzel Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- drivers/video/fbdev/matrox/matroxfb_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/matrox/matroxfb_base.c b/drivers/video/fbdev/matrox/matroxfb_base.c index 775d34115e2d..82f53e998b01 100644 --- a/drivers/video/fbdev/matrox/matroxfb_base.c +++ b/drivers/video/fbdev/matrox/matroxfb_base.c @@ -1378,8 +1378,8 @@ static struct video_board vbG200 = { .lowlevel = &matrox_G100 }; static struct video_board vbG200eW = { - .maxvram = 0x100000, - .maxdisplayable = 0x800000, + .maxvram = 0x1000000, + .maxdisplayable = 0x0800000, .accelID = FB_ACCEL_MATROX_MGAG200, .lowlevel = &matrox_G100 }; From 7f656fff955ccb216c40fa188a24c05fa40985a5 Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Sat, 24 Dec 2022 21:31:46 +0800 Subject: [PATCH 130/543] bpf: Fix panic due to wrong pageattr of im->image commit 9ed1d9aeef5842ecacb660fce933613b58af1e00 upstream. In the scenario where livepatch and kretfunc coexist, the pageattr of im->image is rox after arch_prepare_bpf_trampoline in bpf_trampoline_update, and then modify_fentry or register_fentry returns -EAGAIN from bpf_tramp_ftrace_ops_func, the BPF_TRAMP_F_ORIG_STACK flag will be configured, and arch_prepare_bpf_trampoline will be re-executed. At this time, because the pageattr of im->image is rox, arch_prepare_bpf_trampoline will read and write im->image, which causes a fault. as follows: insmod livepatch-sample.ko # samples/livepatch/livepatch-sample.c bpftrace -e 'kretfunc:cmdline_proc_show {}' BUG: unable to handle page fault for address: ffffffffa0206000 PGD 322d067 P4D 322d067 PUD 322e063 PMD 1297e067 PTE d428061 Oops: 0003 [#1] PREEMPT SMP PTI CPU: 2 PID: 270 Comm: bpftrace Tainted: G E K 6.1.0 #5 RIP: 0010:arch_prepare_bpf_trampoline+0xed/0x8c0 RSP: 0018:ffffc90001083ad8 EFLAGS: 00010202 RAX: ffffffffa0206000 RBX: 0000000000000020 RCX: 0000000000000000 RDX: ffffffffa0206001 RSI: ffffffffa0206000 RDI: 0000000000000030 RBP: ffffc90001083b70 R08: 0000000000000066 R09: ffff88800f51b400 R10: 000000002e72c6e5 R11: 00000000d0a15080 R12: ffff8880110a68c8 R13: 0000000000000000 R14: ffff88800f51b400 R15: ffffffff814fec10 FS: 00007f87bc0dc780(0000) GS:ffff88803e600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffa0206000 CR3: 0000000010b70000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bpf_trampoline_update+0x25a/0x6b0 __bpf_trampoline_link_prog+0x101/0x240 bpf_trampoline_link_prog+0x2d/0x50 bpf_tracing_prog_attach+0x24c/0x530 bpf_raw_tp_link_attach+0x73/0x1d0 __sys_bpf+0x100e/0x2570 __x64_sys_bpf+0x1c/0x30 do_syscall_64+0x5b/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd With this patch, when modify_fentry or register_fentry returns -EAGAIN from bpf_tramp_ftrace_ops_func, the pageattr of im->image will be reset to nx+rw. Cc: stable@vger.kernel.org Fixes: 00963a2e75a8 ("bpf: Support bpf_trampoline on functions with IPMODIFY (e.g. livepatch)") Signed-off-by: Chuang Wang Acked-by: Jiri Olsa Acked-by: Song Liu Link: https://lore.kernel.org/r/20221224133146.780578-1-nashuiliang@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/trampoline.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index bf0906e1e2b9..f1504cb5b6e1 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -489,6 +489,10 @@ again: /* reset fops->func and fops->trampoline for re-register */ tr->fops->func = NULL; tr->fops->trampoline = 0; + + /* reset im->image memory attr for arch_prepare_bpf_trampoline */ + set_memory_nx((long)im->image, 1); + set_memory_rw((long)im->image, 1); goto again; } #endif From d54f66bc9c371e4765d78144c8dac568a59a31dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 21 Dec 2022 16:24:13 +0100 Subject: [PATCH 131/543] Revert "drm/amd/display: Enable Freesync Video Mode by default" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6fe6ece398f7431784847e922a2c8c385dc58a35 upstream. This reverts commit de05abe6b9d0fe08f65d744f7f75a4cba4df27ad. The bug referenced below was bisected to this commit. There has been no activity toward fixing it in 3 months, so let's revert for now. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2162 Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 27 +++++++++++++++++++ .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 12 +++++---- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2eca58220550..5f1d0990c6f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -196,6 +196,7 @@ extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; +extern uint amdgpu_freesync_vid_mode; extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index d8dfbb9b735d..b59466972ed7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -180,6 +180,7 @@ int amdgpu_mes_kiq; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = -1; /* auto */ +uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; @@ -877,6 +878,32 @@ module_param_named(backlight, amdgpu_backlight, bint, 0444); MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)"); module_param_named(tmz, amdgpu_tmz, int, 0444); +/** + * DOC: freesync_video (uint) + * Enable the optimization to adjust front porch timing to achieve seamless + * mode change experience when setting a freesync supported mode for which full + * modeset is not needed. + * + * The Display Core will add a set of modes derived from the base FreeSync + * video mode into the corresponding connector's mode list based on commonly + * used refresh rates and VRR range of the connected display, when users enable + * this feature. From the userspace perspective, they can see a seamless mode + * change experience when the change between different refresh rates under the + * same resolution. Additionally, userspace applications such as Video playback + * can read this modeset list and change the refresh rate based on the video + * frame rate. Finally, the userspace can also derive an appropriate mode for a + * particular refresh rate based on the FreeSync Mode and add it to the + * connector's mode list. + * + * Note: This is an experimental feature. + * + * The default value: 0 (off). + */ +MODULE_PARM_DESC( + freesync_video, + "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); +module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); + /** * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6f1cc5ce4c7a..dacad8b85963 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5807,7 +5807,8 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector, */ DRM_DEBUG_DRIVER("No preferred mode found\n"); } else { - recalculate_timing = is_freesync_video_mode(&mode, aconnector); + recalculate_timing = amdgpu_freesync_vid_mode && + is_freesync_video_mode(&mode, aconnector); if (recalculate_timing) { freesync_mode = get_highest_refresh_rate_mode(aconnector, false); drm_mode_copy(&saved_mode, &mode); @@ -6892,7 +6893,7 @@ static void amdgpu_dm_connector_add_freesync_modes(struct drm_connector *connect struct amdgpu_dm_connector *amdgpu_dm_connector = to_amdgpu_dm_connector(connector); - if (!edid) + if (!(amdgpu_freesync_vid_mode && edid)) return; if (amdgpu_dm_connector->max_vfreq - amdgpu_dm_connector->min_vfreq > 10) @@ -8753,7 +8754,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, * TODO: Refactor this function to allow this check to work * in all conditions. */ - if (dm_new_crtc_state->stream && + if (amdgpu_freesync_vid_mode && + dm_new_crtc_state->stream && is_timing_unchanged_for_freesync(new_crtc_state, old_crtc_state)) goto skip_modeset; @@ -8788,7 +8790,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, if (!dm_old_crtc_state->stream) goto skip_modeset; - if (dm_new_crtc_state->stream && + if (amdgpu_freesync_vid_mode && dm_new_crtc_state->stream && is_timing_unchanged_for_freesync(new_crtc_state, old_crtc_state)) { new_crtc_state->mode_changed = false; @@ -8800,7 +8802,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, set_freesync_fixed_config(dm_new_crtc_state); goto skip_modeset; - } else if (aconnector && + } else if (amdgpu_freesync_vid_mode && aconnector && is_freesync_video_mode(&new_crtc_state->mode, aconnector)) { struct drm_display_mode *high_mode; From 49d901dce4b989d18c81556fc27481886384b36b Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 29 Dec 2022 17:33:34 +0100 Subject: [PATCH 132/543] Revert "net: dsa: qca8k: cache lo and hi for mdio write" commit 03cb9e6d0b32b768e3d9d473c5c4ca1100877664 upstream. This reverts commit 2481d206fae7884cd07014fd1318e63af35e99eb. The Documentation is very confusing about the topic. The cache logic for hi and lo is wrong and actually miss some regs to be actually written. What the Documentation actually intended was that it's possible to skip writing hi OR lo if half of the reg is not needed to be written or read. Revert the change in favor of a better and correct implementation. Reported-by: Ronald Wahl Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org # v5.18+ Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/qca/qca8k-8xxx.c | 61 +++++++------------------------- drivers/net/dsa/qca/qca8k.h | 5 --- 2 files changed, 12 insertions(+), 54 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index c5c3b4e92f28..34bf022bb724 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -36,44 +36,6 @@ qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) *page = regaddr & 0x3ff; } -static int -qca8k_set_lo(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 lo) -{ - u16 *cached_lo = &priv->mdio_cache.lo; - struct mii_bus *bus = priv->bus; - int ret; - - if (lo == *cached_lo) - return 0; - - ret = bus->write(bus, phy_id, regnum, lo); - if (ret < 0) - dev_err_ratelimited(&bus->dev, - "failed to write qca8k 32bit lo register\n"); - - *cached_lo = lo; - return 0; -} - -static int -qca8k_set_hi(struct qca8k_priv *priv, int phy_id, u32 regnum, u16 hi) -{ - u16 *cached_hi = &priv->mdio_cache.hi; - struct mii_bus *bus = priv->bus; - int ret; - - if (hi == *cached_hi) - return 0; - - ret = bus->write(bus, phy_id, regnum, hi); - if (ret < 0) - dev_err_ratelimited(&bus->dev, - "failed to write qca8k 32bit hi register\n"); - - *cached_hi = hi; - return 0; -} - static int qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) { @@ -97,7 +59,7 @@ qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum, u32 *val) } static void -qca8k_mii_write32(struct qca8k_priv *priv, int phy_id, u32 regnum, u32 val) +qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) { u16 lo, hi; int ret; @@ -105,9 +67,12 @@ qca8k_mii_write32(struct qca8k_priv *priv, int phy_id, u32 regnum, u32 val) lo = val & 0xffff; hi = (u16)(val >> 16); - ret = qca8k_set_lo(priv, phy_id, regnum, lo); + ret = bus->write(bus, phy_id, regnum, lo); if (ret >= 0) - ret = qca8k_set_hi(priv, phy_id, regnum + 1, hi); + ret = bus->write(bus, phy_id, regnum + 1, hi); + if (ret < 0) + dev_err_ratelimited(&bus->dev, + "failed to write qca8k 32bit register\n"); } static int @@ -417,7 +382,7 @@ qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val) if (ret < 0) goto exit; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); exit: mutex_unlock(&bus->mdio_lock); @@ -450,7 +415,7 @@ qca8k_regmap_update_bits(void *ctx, uint32_t reg, uint32_t mask, uint32_t write_ val &= ~mask; val |= write_val; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); exit: mutex_unlock(&bus->mdio_lock); @@ -725,14 +690,14 @@ qca8k_mdio_write(struct qca8k_priv *priv, int phy, int regnum, u16 data) if (ret) goto exit; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL, QCA8K_MDIO_MASTER_BUSY); exit: /* even if the busy_wait timeouts try to clear the MASTER_EN */ - qca8k_mii_write32(priv, 0x10 | r2, r1, 0); + qca8k_mii_write32(bus, 0x10 | r2, r1, 0); mutex_unlock(&bus->mdio_lock); @@ -762,7 +727,7 @@ qca8k_mdio_read(struct qca8k_priv *priv, int phy, int regnum) if (ret) goto exit; - qca8k_mii_write32(priv, 0x10 | r2, r1, val); + qca8k_mii_write32(bus, 0x10 | r2, r1, val); ret = qca8k_mdio_busy_wait(bus, QCA8K_MDIO_MASTER_CTRL, QCA8K_MDIO_MASTER_BUSY); @@ -773,7 +738,7 @@ qca8k_mdio_read(struct qca8k_priv *priv, int phy, int regnum) exit: /* even if the busy_wait timeouts try to clear the MASTER_EN */ - qca8k_mii_write32(priv, 0x10 | r2, r1, 0); + qca8k_mii_write32(bus, 0x10 | r2, r1, 0); mutex_unlock(&bus->mdio_lock); @@ -1943,8 +1908,6 @@ qca8k_sw_probe(struct mdio_device *mdiodev) } priv->mdio_cache.page = 0xffff; - priv->mdio_cache.lo = 0xffff; - priv->mdio_cache.hi = 0xffff; /* Check the detected switch id */ ret = qca8k_read_switch_id(priv); diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h index 0b7a5cb12321..03514f7a20be 100644 --- a/drivers/net/dsa/qca/qca8k.h +++ b/drivers/net/dsa/qca/qca8k.h @@ -375,11 +375,6 @@ struct qca8k_mdio_cache { * mdio writes */ u16 page; -/* lo and hi can also be cached and from Documentation we can skip one - * extra mdio write if lo or hi is didn't change. - */ - u16 lo; - u16 hi; }; struct qca8k_pcs { From ec60222356703c3ca3c5b35609e2d41495b4a9b4 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 29 Dec 2022 17:33:32 +0100 Subject: [PATCH 133/543] net: dsa: qca8k: fix wrong length value for mgmt eth packet commit 9807ae69746196ee4bbffe7d22d22ab2b61c6ed0 upstream. The assumption that Documentation was right about how this value work was wrong. It was discovered that the length value of the mgmt header is in step of word size. As an example to process 4 byte of data the correct length to set is 2. To process 8 byte 4, 12 byte 6, 16 byte 8... Odd values will always return the next size on the ack packet. (length of 3 (6 byte) will always return 8 bytes of data) This means that a value of 15 (0xf) actually means reading/writing 32 bytes of data instead of 16 bytes. This behaviour is totally absent and not documented in the switch Documentation. In fact from Documentation the max value that mgmt eth can process is 16 byte of data while in reality it can process 32 bytes at once. To handle this we always round up the length after deviding it for word size. We check if the result is odd and we round another time to align to what the switch will provide in the ack packet. The workaround for the length limit of 15 is still needed as the length reg max value is 0xf(15) Reported-by: Ronald Wahl Tested-by: Ronald Wahl Fixes: 90386223f44e ("net: dsa: qca8k: add support for larger read/write size with mgmt Ethernet") Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org # v5.18+ Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/qca/qca8k-8xxx.c | 45 +++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 34bf022bb724..fbcd5c2b13ae 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -111,7 +111,16 @@ static void qca8k_rw_reg_ack_handler(struct dsa_switch *ds, struct sk_buff *skb) command = get_unaligned_le32(&mgmt_ethhdr->command); cmd = FIELD_GET(QCA_HDR_MGMT_CMD, command); + len = FIELD_GET(QCA_HDR_MGMT_LENGTH, command); + /* Special case for len of 15 as this is the max value for len and needs to + * be increased before converting it from word to dword. + */ + if (len == 15) + len++; + + /* We can ignore odd value, we always round up them in the alloc function. */ + len *= sizeof(u16); /* Make sure the seq match the requested packet */ if (get_unaligned_le32(&mgmt_ethhdr->seq) == mgmt_eth_data->seq) @@ -158,17 +167,33 @@ static struct sk_buff *qca8k_alloc_mdio_header(enum mdio_cmd cmd, u32 reg, u32 * if (!skb) return NULL; - /* Max value for len reg is 15 (0xf) but the switch actually return 16 byte - * Actually for some reason the steps are: - * 0: nothing - * 1-4: first 4 byte - * 5-6: first 12 byte - * 7-15: all 16 byte + /* Hdr mgmt length value is in step of word size. + * As an example to process 4 byte of data the correct length to set is 2. + * To process 8 byte 4, 12 byte 6, 16 byte 8... + * + * Odd values will always return the next size on the ack packet. + * (length of 3 (6 byte) will always return 8 bytes of data) + * + * This means that a value of 15 (0xf) actually means reading/writing 32 bytes + * of data. + * + * To correctly calculate the length we devide the requested len by word and + * round up. + * On the ack function we can skip the odd check as we already handle the + * case here. */ - if (len == 16) - real_len = 15; - else - real_len = len; + real_len = DIV_ROUND_UP(len, sizeof(u16)); + + /* We check if the result len is odd and we round up another time to + * the next size. (length of 3 will be increased to 4 as switch will always + * return 8 bytes) + */ + if (real_len % sizeof(u16) != 0) + real_len++; + + /* Max reg value is 0xf(15) but switch will always return the next size (32 byte) */ + if (real_len == 16) + real_len--; skb_reset_mac_header(skb); skb_set_network_header(skb, skb->len); From 39a20c4354bee56b647c4eef3e7f0e8d235e122c Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 29 Dec 2022 17:33:33 +0100 Subject: [PATCH 134/543] net: dsa: tag_qca: fix wrong MGMT_DATA2 size commit d9dba91be71f03cc75bcf39fc0d5d99ff33f1ae0 upstream. It was discovered that MGMT_DATA2 can contain up to 28 bytes of data instead of the 12 bytes written in the Documentation by accounting the limit of 16 bytes declared in Documentation subtracting the first 4 byte in the packet header. Update the define with the real world value. Tested-by: Ronald Wahl Fixes: c2ee8181fddb ("net: dsa: tag_qca: add define for handling mgmt Ethernet packet") Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org # v5.18+ Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/dsa/tag_qca.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h index b1b5720d89a5..ee657452f122 100644 --- a/include/linux/dsa/tag_qca.h +++ b/include/linux/dsa/tag_qca.h @@ -45,8 +45,8 @@ struct sk_buff; QCA_HDR_MGMT_COMMAND_LEN + \ QCA_HDR_MGMT_DATA1_LEN) -#define QCA_HDR_MGMT_DATA2_LEN 12 /* Other 12 byte for the mdio data */ -#define QCA_HDR_MGMT_PADDING_LEN 34 /* Padding to reach the min Ethernet packet */ +#define QCA_HDR_MGMT_DATA2_LEN 28 /* Other 28 byte for the mdio data */ +#define QCA_HDR_MGMT_PADDING_LEN 18 /* Padding to reach the min Ethernet packet */ #define QCA_HDR_MGMT_PKT_LEN (QCA_HDR_MGMT_HEADER_LEN + \ QCA_HDR_LEN + \ From 6d47e0f6a535701134d950db65eb8fe1edf0b575 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 08:52:06 -0700 Subject: [PATCH 135/543] block: don't allow splitting of a REQ_NOWAIT bio commit 9cea62b2cbabff8ed46f2df17778b624ad9dd25a upstream. If we split a bio marked with REQ_NOWAIT, then we can trigger spurious EAGAIN if constituent parts of that split bio end up failing request allocations. Parts will complete just fine, but just a single failure in one of the chained bios will yield an EAGAIN final result for the parent bio. Return EAGAIN early if we end up needing to split such a bio, which allows for saner recovery handling. Cc: stable@vger.kernel.org # 5.15+ Link: https://github.com/axboe/liburing/issues/766 Reported-by: Michael Kelley Reviewed-by: Keith Busch Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-merge.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/blk-merge.c b/block/blk-merge.c index ff04e9290715..f46c87ef951d 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -300,6 +300,16 @@ static struct bio *bio_split_rw(struct bio *bio, struct queue_limits *lim, *segs = nsegs; return NULL; split: + /* + * We can't sanely support splitting for a REQ_NOWAIT bio. End it + * with EAGAIN if splitting is required and return an error pointer. + */ + if (bio->bi_opf & REQ_NOWAIT) { + bio->bi_status = BLK_STS_AGAIN; + bio_endio(bio); + return ERR_PTR(-EAGAIN); + } + *segs = nsegs; /* From 91d1295199853126f6c7493c995b5604657225ad Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Wed, 4 Jan 2023 01:34:02 +0000 Subject: [PATCH 136/543] io_uring: pin context while queueing deferred tw commit 9ffa13ff78a0a55df968a72d6f0ebffccee5c9f4 upstream. Unlike normal tw, nothing prevents deferred tw to be executed right after an tw item added to ->work_llist in io_req_local_work_add(). For instance, the waiting task may get waken up by CQ posting or a normal tw. Thus we need to pin the ring for the rest of io_req_local_work_add() Cc: stable@vger.kernel.org Fixes: c0e0d6ba25f18 ("io_uring: add IORING_SETUP_DEFER_TASKRUN") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/1a79362b9c10b8523ef70b061d96523650a23344.1672795998.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 1bc68dfda340..c93e98867798 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1109,13 +1109,18 @@ static void io_req_local_work_add(struct io_kiocb *req) { struct io_ring_ctx *ctx = req->ctx; - if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) + percpu_ref_get(&ctx->refs); + + if (!llist_add(&req->io_task_work.node, &ctx->work_llist)) { + percpu_ref_put(&ctx->refs); return; + } /* need it for the following io_cqring_wake() */ smp_mb__after_atomic(); if (unlikely(atomic_read(&req->task->io_uring->in_idle))) { io_move_task_work_from_local(ctx); + percpu_ref_put(&ctx->refs); return; } @@ -1125,6 +1130,7 @@ static void io_req_local_work_add(struct io_kiocb *req) if (ctx->has_evfd) io_eventfd_signal(ctx); __io_cqring_wake(ctx); + percpu_ref_put(&ctx->refs); } static inline void __io_req_task_work_add(struct io_kiocb *req, bool allow_local) From e595dcd987d041eaea64d7b898c7936875f69260 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 5 Jan 2023 10:49:15 +0000 Subject: [PATCH 137/543] io_uring: fix CQ waiting timeout handling commit 12521a5d5cb7ff0ad43eadfc9c135d86e1131fa8 upstream. Jiffy to ktime CQ waiting conversion broke how we treat timeouts, in particular we rearm it anew every time we get into io_cqring_wait_schedule() without adjusting the timeout. Waiting for 2 CQEs and getting a task_work in the middle may double the timeout value, or even worse in some cases task may wait indefinitely. Cc: stable@vger.kernel.org Fixes: 228339662b398 ("io_uring: don't convert to jiffies for waiting on timeouts") Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/f7bffddd71b08f28a877d44d37ac953ddb01590d.1672915663.git.asml.silence@gmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io_uring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index c93e98867798..cea5de98c423 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2364,7 +2364,7 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx) /* when returns >0, the caller should retry */ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, - ktime_t timeout) + ktime_t *timeout) { int ret; unsigned long check_cq; @@ -2382,7 +2382,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, if (check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT)) return -EBADR; } - if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) + if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS)) return -ETIME; return 1; } @@ -2452,7 +2452,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, TASK_INTERRUPTIBLE); - ret = io_cqring_wait_schedule(ctx, &iowq, timeout); + ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); cond_resched(); } while (ret > 0); From ed2d0e160ce4a718034c114dcdfc2bc9b5158124 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 6 Jan 2023 04:01:56 +0100 Subject: [PATCH 138/543] tpm: Allow system suspend to continue when TPM suspend fails commit 1382999aa0548a171a272ca817f6c38e797c458c upstream. TPM 1 is sometimes broken across system suspends, due to races or locking issues or something else that haven't been diagnosed or fixed yet, most likely having to do with concurrent reads from the TPM's hardware random number generator driver. These issues prevent the system from actually suspending, with errors like: tpm tpm0: A TPM error (28) occurred continue selftest ... tpm tpm0: A TPM error (28) occurred attempting get random ... tpm tpm0: Error (28) sending savestate before suspend tpm_tis 00:08: PM: __pnp_bus_suspend(): tpm_pm_suspend+0x0/0x80 returns 28 tpm_tis 00:08: PM: dpm_run_callback(): pnp_bus_suspend+0x0/0x10 returns 28 tpm_tis 00:08: PM: failed to suspend: error 28 PM: Some devices failed to suspend, or early wake event detected This issue was partially fixed by 23393c646142 ("char: tpm: Protect tpm_pm_suspend with locks"), in a last minute 6.1 commit that Linus took directly because the TPM maintainers weren't available. However, it seems like this just addresses the most common cases of the bug, rather than addressing it entirely. So there are more things to fix still, apparently. In lieu of actually fixing the underlying bug, just allow system suspend to continue, so that laptops still go to sleep fine. Later, this can be reverted when the real bug is fixed. Link: https://lore.kernel.org/lkml/7cbe96cf-e0b5-ba63-d1b4-f63d2e826efa@suse.cz/ Cc: stable@vger.kernel.org # 6.1+ Reported-by: Vlastimil Babka Suggested-by: Linus Torvalds Acked-by: Luigi Semenzato Cc: Peter Huewe Cc: Jarkko Sakkinen Cc: James Bottomley Cc: Johannes Altmanninger Signed-off-by: Jason A. Donenfeld Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-interface.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index d69905233aff..7e513b771832 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -412,7 +412,9 @@ int tpm_pm_suspend(struct device *dev) } suspended: - return rc; + if (rc) + dev_err(dev, "Ignoring error %d while suspending\n", rc); + return 0; } EXPORT_SYMBOL_GPL(tpm_pm_suspend); From 8b258a31c2e8d4d4e42be70a7c6ca35a5afbff0d Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Mon, 19 Dec 2022 15:33:31 +0800 Subject: [PATCH 139/543] vhost_vdpa: fix the crash in unmap a large memory commit e794070af224ade46db368271896b2685ff4f96b upstream. While testing in vIOMMU, sometimes Guest will unmap very large memory, which will cause the crash. To fix this, add a new function vhost_vdpa_general_unmap(). This function will only unmap the memory that saved in iotlb. Call Trace: [ 647.820144] ------------[ cut here ]------------ [ 647.820848] kernel BUG at drivers/iommu/intel/iommu.c:1174! [ 647.821486] invalid opcode: 0000 [#1] PREEMPT SMP PTI [ 647.822082] CPU: 10 PID: 1181 Comm: qemu-system-x86 Not tainted 6.0.0-rc1home_lulu_2452_lulu7_vhost+ #62 [ 647.823139] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.15.0-29-g6a62e0cb0dfe-prebuilt.qem4 [ 647.824365] RIP: 0010:domain_unmap+0x48/0x110 [ 647.825424] Code: 48 89 fb 8d 4c f6 1e 39 c1 0f 4f c8 83 e9 0c 83 f9 3f 7f 18 48 89 e8 48 d3 e8 48 85 c0 75 59 [ 647.828064] RSP: 0018:ffffae5340c0bbf0 EFLAGS: 00010202 [ 647.828973] RAX: 0000000000000001 RBX: ffff921793d10540 RCX: 000000000000001b [ 647.830083] RDX: 00000000080000ff RSI: 0000000000000001 RDI: ffff921793d10540 [ 647.831214] RBP: 0000000007fc0100 R08: ffffae5340c0bcd0 R09: 0000000000000003 [ 647.832388] R10: 0000007fc0100000 R11: 0000000000100000 R12: 00000000080000ff [ 647.833668] R13: ffffae5340c0bcd0 R14: ffff921793d10590 R15: 0000008000100000 [ 647.834782] FS: 00007f772ec90640(0000) GS:ffff921ce7a80000(0000) knlGS:0000000000000000 [ 647.836004] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 647.836990] CR2: 00007f02c27a3a20 CR3: 0000000101b0c006 CR4: 0000000000372ee0 [ 647.838107] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 647.839283] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 647.840666] Call Trace: [ 647.841437] [ 647.842107] intel_iommu_unmap_pages+0x93/0x140 [ 647.843112] __iommu_unmap+0x91/0x1b0 [ 647.844003] iommu_unmap+0x6a/0x95 [ 647.844885] vhost_vdpa_unmap+0x1de/0x1f0 [vhost_vdpa] [ 647.845985] vhost_vdpa_process_iotlb_msg+0xf0/0x90b [vhost_vdpa] [ 647.847235] ? _raw_spin_unlock+0x15/0x30 [ 647.848181] ? _copy_from_iter+0x8c/0x580 [ 647.849137] vhost_chr_write_iter+0xb3/0x430 [vhost] [ 647.850126] vfs_write+0x1e4/0x3a0 [ 647.850897] ksys_write+0x53/0xd0 [ 647.851688] do_syscall_64+0x3a/0x90 [ 647.852508] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 647.853457] RIP: 0033:0x7f7734ef9f4f [ 647.854408] Code: 89 54 24 18 48 89 74 24 10 89 7c 24 08 e8 29 76 f8 ff 48 8b 54 24 18 48 8b 74 24 10 41 89 c8 [ 647.857217] RSP: 002b:00007f772ec8f040 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 [ 647.858486] RAX: ffffffffffffffda RBX: 00000000fef00000 RCX: 00007f7734ef9f4f [ 647.859713] RDX: 0000000000000048 RSI: 00007f772ec8f090 RDI: 0000000000000010 [ 647.860942] RBP: 00007f772ec8f1a0 R08: 0000000000000000 R09: 0000000000000000 [ 647.862206] R10: 0000000000000001 R11: 0000000000000293 R12: 0000000000000010 [ 647.863446] R13: 0000000000000002 R14: 0000000000000000 R15: ffffffff01100000 [ 647.864692] [ 647.865458] Modules linked in: rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs v] [ 647.874688] ---[ end trace 0000000000000000 ]--- Cc: stable@vger.kernel.org Fixes: 4c8cf31885f6 ("vhost: introduce vDPA-based backend") Signed-off-by: Cindy Lu Message-Id: <20221219073331.556140-1-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/vdpa.c | 46 +++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b08e07fc7d1f..ec32f785dfde 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -66,8 +66,8 @@ static DEFINE_IDA(vhost_vdpa_ida); static dev_t vhost_vdpa_major; static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, - struct vhost_iotlb *iotlb, - u64 start, u64 last); + struct vhost_iotlb *iotlb, u64 start, + u64 last, u32 asid); static inline u32 iotlb_to_asid(struct vhost_iotlb *iotlb) { @@ -139,7 +139,7 @@ static int vhost_vdpa_remove_as(struct vhost_vdpa *v, u32 asid) return -EINVAL; hlist_del(&as->hash_link); - vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1); + vhost_vdpa_iotlb_unmap(v, &as->iotlb, 0ULL, 0ULL - 1, asid); kfree(as); return 0; @@ -687,10 +687,20 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, mutex_unlock(&d->mutex); return r; } +static void vhost_vdpa_general_unmap(struct vhost_vdpa *v, + struct vhost_iotlb_map *map, u32 asid) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + if (ops->dma_map) { + ops->dma_unmap(vdpa, asid, map->start, map->size); + } else if (ops->set_map == NULL) { + iommu_unmap(v->domain, map->start, map->size); + } +} -static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, - struct vhost_iotlb *iotlb, - u64 start, u64 last) +static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 start, u64 last, u32 asid) { struct vhost_dev *dev = &v->vdev; struct vhost_iotlb_map *map; @@ -707,13 +717,13 @@ static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, unpin_user_page(page); } atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm); + vhost_vdpa_general_unmap(v, map, asid); vhost_iotlb_map_free(iotlb, map); } } -static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, - struct vhost_iotlb *iotlb, - u64 start, u64 last) +static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, struct vhost_iotlb *iotlb, + u64 start, u64 last, u32 asid) { struct vhost_iotlb_map *map; struct vdpa_map_file *map_file; @@ -722,20 +732,21 @@ static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, map_file = (struct vdpa_map_file *)map->opaque; fput(map_file->file); kfree(map_file); + vhost_vdpa_general_unmap(v, map, asid); vhost_iotlb_map_free(iotlb, map); } } static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, - struct vhost_iotlb *iotlb, - u64 start, u64 last) + struct vhost_iotlb *iotlb, u64 start, + u64 last, u32 asid) { struct vdpa_device *vdpa = v->vdpa; if (vdpa->use_va) - return vhost_vdpa_va_unmap(v, iotlb, start, last); + return vhost_vdpa_va_unmap(v, iotlb, start, last, asid); - return vhost_vdpa_pa_unmap(v, iotlb, start, last); + return vhost_vdpa_pa_unmap(v, iotlb, start, last, asid); } static int perm_to_iommu_flags(u32 perm) @@ -802,17 +813,12 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, const struct vdpa_config_ops *ops = vdpa->config; u32 asid = iotlb_to_asid(iotlb); - vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1); + vhost_vdpa_iotlb_unmap(v, iotlb, iova, iova + size - 1, asid); - if (ops->dma_map) { - ops->dma_unmap(vdpa, asid, iova, size); - } else if (ops->set_map) { + if (ops->set_map) { if (!v->in_batch) ops->set_map(vdpa, asid, iotlb); - } else { - iommu_unmap(v->domain, iova, size); } - /* If we are in the middle of batch processing, delay the free * of AS until BATCH_END. */ From 7f56c4fa299a4f6124f00a6590d7151832b5f170 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Tue, 27 Dec 2022 16:10:05 -0800 Subject: [PATCH 140/543] thermal: int340x: Add missing attribute for data rate base commit b878d3ba9bb41cddb73ba4b56e5552f0a638daca upstream. Commit 473be51142ad ("thermal: int340x: processor_thermal: Add RFIM driver")' added rfi_restriction_data_rate_base string, mmio details and documentation, but missed adding attribute to sysfs. Add missing sysfs attribute. Fixes: 473be51142ad ("thermal: int340x: processor_thermal: Add RFIM driver") Cc: 5.11+ # v5.11+ Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- .../thermal/intel/int340x_thermal/processor_thermal_rfim.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c index 8c42e7662033..92ed1213fe37 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c @@ -172,6 +172,7 @@ static const struct attribute_group fivr_attribute_group = { RFIM_SHOW(rfi_restriction_run_busy, 1) RFIM_SHOW(rfi_restriction_err_code, 1) RFIM_SHOW(rfi_restriction_data_rate, 1) +RFIM_SHOW(rfi_restriction_data_rate_base, 1) RFIM_SHOW(ddr_data_rate_point_0, 1) RFIM_SHOW(ddr_data_rate_point_1, 1) RFIM_SHOW(ddr_data_rate_point_2, 1) @@ -181,11 +182,13 @@ RFIM_SHOW(rfi_disable, 1) RFIM_STORE(rfi_restriction_run_busy, 1) RFIM_STORE(rfi_restriction_err_code, 1) RFIM_STORE(rfi_restriction_data_rate, 1) +RFIM_STORE(rfi_restriction_data_rate_base, 1) RFIM_STORE(rfi_disable, 1) static DEVICE_ATTR_RW(rfi_restriction_run_busy); static DEVICE_ATTR_RW(rfi_restriction_err_code); static DEVICE_ATTR_RW(rfi_restriction_data_rate); +static DEVICE_ATTR_RW(rfi_restriction_data_rate_base); static DEVICE_ATTR_RO(ddr_data_rate_point_0); static DEVICE_ATTR_RO(ddr_data_rate_point_1); static DEVICE_ATTR_RO(ddr_data_rate_point_2); @@ -248,6 +251,7 @@ static struct attribute *dvfs_attrs[] = { &dev_attr_rfi_restriction_run_busy.attr, &dev_attr_rfi_restriction_err_code.attr, &dev_attr_rfi_restriction_data_rate.attr, + &dev_attr_rfi_restriction_data_rate_base.attr, &dev_attr_ddr_data_rate_point_0.attr, &dev_attr_ddr_data_rate_point_1.attr, &dev_attr_ddr_data_rate_point_2.attr, From 36fd385ae2ca04a95e34f52a188e3aaf985e757f Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 29 Dec 2022 17:05:45 +0000 Subject: [PATCH 141/543] riscv: uaccess: fix type of 0 variable on error in get_user() commit b9b916aee6715cd7f3318af6dc360c4729417b94 upstream. If the get_user(x, ptr) has x as a pointer, then the setting of (x) = 0 is going to produce the following sparse warning, so fix this by forcing the type of 'x' when access_ok() fails. fs/aio.c:2073:21: warning: Using plain integer as NULL pointer Signed-off-by: Ben Dooks Reviewed-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20221229170545.718264-1-ben-linux@fluff.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 855450bed9f5..ec0cab9fbddd 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -165,7 +165,7 @@ do { \ might_fault(); \ access_ok(__p, sizeof(*__p)) ? \ __get_user((x), __p) : \ - ((x) = 0, -EFAULT); \ + ((x) = (__force __typeof__(x))0, -EFAULT); \ }) #define __put_user_asm(insn, x, ptr, err) \ From a33220faead65fd3ac42284f32b83094543ea91b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 2 Jan 2023 17:07:48 +0100 Subject: [PATCH 142/543] riscv, kprobes: Stricter c.jr/c.jalr decoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b2d473a6019ef9a54b0156ecdb2e0398c9fa6a24 upstream. In the compressed instruction extension, c.jr, c.jalr, c.mv, and c.add is encoded the following way (each instruction is 16b): ---+-+-----------+-----------+-- 100 0 rs1[4:0]!=0 00000 10 : c.jr 100 1 rs1[4:0]!=0 00000 10 : c.jalr 100 0 rd[4:0]!=0 rs2[4:0]!=0 10 : c.mv 100 1 rd[4:0]!=0 rs2[4:0]!=0 10 : c.add The following logic is used to decode c.jr and c.jalr: insn & 0xf007 == 0x8002 => instruction is an c.jr insn & 0xf007 == 0x9002 => instruction is an c.jalr When 0xf007 is used to mask the instruction, c.mv can be incorrectly decoded as c.jr, and c.add as c.jalr. Correct the decoding by changing the mask from 0xf007 to 0xf07f. Fixes: c22b0bcb1dd0 ("riscv: Add kprobes supported") Signed-off-by: Björn Töpel Reviewed-by: Conor Dooley Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/20230102160748.1307289-1-bjorn@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/probes/simulate-insn.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h index cb6ff7dccb92..de8474146a9b 100644 --- a/arch/riscv/kernel/probes/simulate-insn.h +++ b/arch/riscv/kernel/probes/simulate-insn.h @@ -31,9 +31,9 @@ __RISCV_INSN_FUNCS(fence, 0x7f, 0x0f); } while (0) __RISCV_INSN_FUNCS(c_j, 0xe003, 0xa001); -__RISCV_INSN_FUNCS(c_jr, 0xf007, 0x8002); +__RISCV_INSN_FUNCS(c_jr, 0xf07f, 0x8002); __RISCV_INSN_FUNCS(c_jal, 0xe003, 0x2001); -__RISCV_INSN_FUNCS(c_jalr, 0xf007, 0x9002); +__RISCV_INSN_FUNCS(c_jalr, 0xf07f, 0x9002); __RISCV_INSN_FUNCS(c_beqz, 0xe003, 0xc001); __RISCV_INSN_FUNCS(c_bnez, 0xe003, 0xe001); __RISCV_INSN_FUNCS(c_ebreak, 0xffff, 0x9002); From c4849f18185fd4e93b04cd45552f8d68c0240e21 Mon Sep 17 00:00:00 2001 From: Andreas Rammhold Date: Fri, 23 Dec 2022 12:27:47 +0100 Subject: [PATCH 143/543] of/fdt: run soc memory setup when early_init_dt_scan_memory fails commit 2a12187d5853d9fd5102278cecef7dac7c8ce7ea upstream. If memory has been found early_init_dt_scan_memory now returns 1. If it hasn't found any memory it will return 0, allowing other memory setup mechanisms to carry on. Previously early_init_dt_scan_memory always returned 0 without distinguishing between any kind of memory setup being done or not. Any code path after the early_init_dt_scan memory call in the ramips plat_mem_setup code wouldn't be executed anymore. Making early_init_dt_scan_memory the only way to initialize the memory. Some boards, including my mt7621 based Cudy X6 board, depend on memory initialization being done via the soc_info.mem_detect function pointer. Those wouldn't be able to obtain memory and panic the kernel during early bootup with the message "early_init_dt_alloc_memory_arch: Failed to allocate 12416 bytes align=0x40". Fixes: 1f012283e936 ("of/fdt: Rework early_init_dt_scan_memory() to call directly") Cc: stable@vger.kernel.org Signed-off-by: Andreas Rammhold Link: https://lore.kernel.org/r/20221223112748.2935235-1-andreas@rammhold.de Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- arch/mips/ralink/of.c | 2 +- drivers/of/fdt.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c index 01c132bc33d5..4d06de77d92a 100644 --- a/arch/mips/ralink/of.c +++ b/arch/mips/ralink/of.c @@ -64,7 +64,7 @@ void __init plat_mem_setup(void) dtb = get_fdt(); __dt_setup_arch(dtb); - if (!early_init_dt_scan_memory()) + if (early_init_dt_scan_memory()) return; if (soc_info.mem_detect) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 7b571a631639..4f88e8bbdd27 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1099,7 +1099,7 @@ u64 __init dt_mem_next_cell(int s, const __be32 **cellp) */ int __init early_init_dt_scan_memory(void) { - int node; + int node, found_memory = 0; const void *fdt = initial_boot_params; fdt_for_each_subnode(node, fdt, 0) { @@ -1139,6 +1139,8 @@ int __init early_init_dt_scan_memory(void) early_init_dt_add_memory_arch(base, size); + found_memory = 1; + if (!hotpluggable) continue; @@ -1147,7 +1149,7 @@ int __init early_init_dt_scan_memory(void) base, base + size); } } - return 0; + return found_memory; } int __init early_init_dt_scan_chosen(char *cmdline) From c7041ec41036f64db5104f33348c45a1aedcf098 Mon Sep 17 00:00:00 2001 From: Ma Jun Date: Fri, 16 Dec 2022 11:05:26 +0800 Subject: [PATCH 144/543] drm/plane-helper: Add the missing declaration of drm_atomic_state commit 4e699e34f923188175986ad8a74ab99f7034075e upstream. Add the missing declaration of struct drm_atomic_state to fix the compile error below: error: 'struct drm_atomic_state' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] Signed-off-by: Ma Jun Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Fixes: 8401bd361f59 ("drm/plane-helper: Add a drm_plane_helper_atomic_check() helper") Cc: Javier Martinez Canillas Cc: Thomas Zimmermann Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: David Airlie Cc: Daniel Vetter Cc: dri-devel@lists.freedesktop.org Cc: # v6.1+ Link: https://patchwork.freedesktop.org/patch/msgid/20221216030526.1335609-1-majun@amd.com Signed-off-by: Greg Kroah-Hartman --- include/drm/drm_plane_helper.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h index ff83d2621687..3a574e8cd22f 100644 --- a/include/drm/drm_plane_helper.h +++ b/include/drm/drm_plane_helper.h @@ -26,6 +26,7 @@ #include +struct drm_atomic_state; struct drm_crtc; struct drm_framebuffer; struct drm_modeset_acquire_ctx; From 306888b1246bf44e703b6f1ccc746c2746c1a981 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 20 Dec 2022 17:11:24 -0500 Subject: [PATCH 145/543] drm/amdkfd: Fix kernel warning during topology setup commit cf97eb7e47d4671084c7e114c5d88a3d0540ecbd upstream. This patch fixes the following kernel warning seen during driver load by correctly initializing the p2plink attr before creating the sysfs file: [ +0.002865] ------------[ cut here ]------------ [ +0.002327] kobject: '(null)' (0000000056260cfb): is not initialized, yet kobject_put() is being called. [ +0.004780] WARNING: CPU: 32 PID: 1006 at lib/kobject.c:718 kobject_put+0xaa/0x1c0 [ +0.001361] Call Trace: [ +0.001234] [ +0.001067] kfd_remove_sysfs_node_entry+0x24a/0x2d0 [amdgpu] [ +0.003147] kfd_topology_update_sysfs+0x3d/0x750 [amdgpu] [ +0.002890] kfd_topology_add_device+0xbd7/0xc70 [amdgpu] [ +0.002844] ? lock_release+0x13c/0x2e0 [ +0.001936] ? smu_cmn_send_smc_msg_with_param+0x1e8/0x2d0 [amdgpu] [ +0.003313] ? amdgpu_dpm_get_mclk+0x54/0x60 [amdgpu] [ +0.002703] kgd2kfd_device_init.cold+0x39f/0x4ed [amdgpu] [ +0.002930] amdgpu_amdkfd_device_init+0x13d/0x1f0 [amdgpu] [ +0.002944] amdgpu_device_init.cold+0x1464/0x17b4 [amdgpu] [ +0.002970] ? pci_bus_read_config_word+0x43/0x80 [ +0.002380] amdgpu_driver_load_kms+0x15/0x100 [amdgpu] [ +0.002744] amdgpu_pci_probe+0x147/0x370 [amdgpu] [ +0.002522] local_pci_probe+0x40/0x80 [ +0.001896] work_for_cpu_fn+0x10/0x20 [ +0.001892] process_one_work+0x26e/0x5a0 [ +0.002029] worker_thread+0x1fd/0x3e0 [ +0.001890] ? process_one_work+0x5a0/0x5a0 [ +0.002115] kthread+0xea/0x110 [ +0.001618] ? kthread_complete_and_exit+0x20/0x20 [ +0.002422] ret_from_fork+0x1f/0x30 [ +0.001808] [ +0.001103] irq event stamp: 59837 [ +0.001718] hardirqs last enabled at (59849): [] __up_console_sem+0x52/0x60 [ +0.004414] hardirqs last disabled at (59860): [] __up_console_sem+0x37/0x60 [ +0.004414] softirqs last enabled at (59654): [] irq_exit_rcu+0xd7/0x130 [ +0.004205] softirqs last disabled at (59649): [] irq_exit_rcu+0xd7/0x130 [ +0.004203] ---[ end trace 0000000000000000 ]--- Fixes: 0f28cca87e9a ("drm/amdkfd: Extend KFD device topology to surface peer-to-peer links") Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3f0a4a415907..35a9b702508a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -801,7 +801,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, p2plink->attr.name = "properties"; p2plink->attr.mode = KFD_SYSFS_FILE_MODE; - sysfs_attr_init(&iolink->attr); + sysfs_attr_init(&p2plink->attr); ret = sysfs_create_file(p2plink->kobj, &p2plink->attr); if (ret < 0) return ret; From fe340500baf84b6531c9fc508b167525b9bf6446 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 19 Dec 2022 22:03:56 +0800 Subject: [PATCH 146/543] drm/i915/gvt: fix gvt debugfs destroy commit c4b850d1f448a901fbf4f7f36dec38c84009b489 upstream. When gvt debug fs is destroyed, need to have a sane check if drm minor's debugfs root is still available or not, otherwise in case like device remove through unbinding, drm minor's debugfs directory has already been removed, then intel_gvt_debugfs_clean() would act upon dangling pointer like below oops. i915 0000:00:02.0: Direct firmware load for i915/gvt/vid_0x8086_did_0x1926_rid_0x0a.golden_hw_state failed with error -2 i915 0000:00:02.0: MDEV: Registered Console: switching to colour dummy device 80x25 i915 0000:00:02.0: MDEV: Unregistering BUG: kernel NULL pointer dereference, address: 00000000000000a0 PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP PTI CPU: 2 PID: 2486 Comm: gfx-unbind.sh Tainted: G I 6.1.0-rc8+ #15 Hardware name: Dell Inc. XPS 13 9350/0JXC1H, BIOS 1.13.0 02/10/2020 RIP: 0010:down_write+0x1f/0x90 Code: 1d ff ff 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 53 48 89 fb e8 62 c0 ff ff bf 01 00 00 00 e8 28 5e 31 ff 31 c0 ba 01 00 00 00 48 0f b1 13 75 33 65 48 8b 04 25 c0 bd 01 00 48 89 43 08 bf 01 RSP: 0018:ffff9eb3036ffcc8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 00000000000000a0 RCX: ffffff8100000000 RDX: 0000000000000001 RSI: 0000000000000064 RDI: ffffffffa48787a8 RBP: ffff9eb3036ffd30 R08: ffffeb1fc45a0608 R09: ffffeb1fc45a05c0 R10: 0000000000000002 R11: 0000000000000000 R12: 0000000000000000 R13: ffff91acc33fa328 R14: ffff91acc033f080 R15: ffff91acced533e0 FS: 00007f6947bba740(0000) GS:ffff91ae36d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000a0 CR3: 00000001133a2002 CR4: 00000000003706e0 Call Trace: simple_recursive_removal+0x9f/0x2a0 ? start_creating.part.0+0x120/0x120 ? _raw_spin_lock+0x13/0x40 debugfs_remove+0x40/0x60 intel_gvt_debugfs_clean+0x15/0x30 [kvmgt] intel_gvt_clean_device+0x49/0xe0 [kvmgt] intel_gvt_driver_remove+0x2f/0xb0 i915_driver_remove+0xa4/0xf0 i915_pci_remove+0x1a/0x30 pci_device_remove+0x33/0xa0 device_release_driver_internal+0x1b2/0x230 unbind_store+0xe0/0x110 kernfs_fop_write_iter+0x11b/0x1f0 vfs_write+0x203/0x3d0 ksys_write+0x63/0xe0 do_syscall_64+0x37/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f6947cb5190 Code: 40 00 48 8b 15 71 9c 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 80 3d 51 24 0e 00 00 74 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28 48 89 RSP: 002b:00007ffcbac45a28 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007f6947cb5190 RDX: 000000000000000d RSI: 0000555e35c866a0 RDI: 0000000000000001 RBP: 0000555e35c866a0 R08: 0000000000000002 R09: 0000555e358cb97c R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000001 R13: 000000000000000d R14: 0000000000000000 R15: 0000555e358cb8e0 Modules linked in: kvmgt CR2: 00000000000000a0 ---[ end trace 0000000000000000 ]--- Cc: Wang, Zhi Cc: He, Yu Cc: stable@vger.kernel.org Reviewed-by: Zhi Wang Fixes: bc7b0be316ae ("drm/i915/gvt: Add basic debugfs infrastructure") Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221219140357.769557-1-zhenyuw@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gvt/debugfs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index 9f1c209d9251..d7df27feee8c 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -199,6 +199,10 @@ void intel_gvt_debugfs_init(struct intel_gvt *gvt) */ void intel_gvt_debugfs_clean(struct intel_gvt *gvt) { - debugfs_remove_recursive(gvt->debugfs_root); - gvt->debugfs_root = NULL; + struct drm_minor *minor = gvt->gt->i915->drm.primary; + + if (minor->debugfs_root) { + debugfs_remove_recursive(gvt->debugfs_root); + gvt->debugfs_root = NULL; + } } From 44c0e07e3972e3f2609d69ad873d4f342f8a68ec Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 19 Dec 2022 22:03:57 +0800 Subject: [PATCH 147/543] drm/i915/gvt: fix vgpu debugfs clean in remove commit 704f3384f322b40ba24d958473edfb1c9750c8fd upstream. Check carefully on root debugfs available when destroying vgpu, e.g in remove case drm minor's debugfs root might already be destroyed, which led to kernel oops like below. Console: switching to colour dummy device 80x25 i915 0000:00:02.0: MDEV: Unregistering intel_vgpu_mdev b1338b2d-a709-4c23-b766-cc436c36cdf0: Removing from iommu group 14 BUG: kernel NULL pointer dereference, address: 0000000000000150 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP CPU: 3 PID: 1046 Comm: driverctl Not tainted 6.1.0-rc2+ #6 Hardware name: HP HP ProDesk 600 G3 MT/829D, BIOS P02 Ver. 02.44 09/13/2022 RIP: 0010:__lock_acquire+0x5e2/0x1f90 Code: 87 ad 09 00 00 39 05 e1 1e cc 02 0f 82 f1 09 00 00 ba 01 00 00 00 48 83 c4 48 89 d0 5b 5d 41 5c 41 5d 41 5e 41 5f c3 45 31 ff <48> 81 3f 60 9e c2 b6 45 0f 45 f8 83 fe 01 0f 87 55 fa ff ff 89 f0 RSP: 0018:ffff9f770274f948 EFLAGS: 00010046 RAX: 0000000000000003 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000150 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: ffff8895d1173300 R11: 0000000000000001 R12: 0000000000000000 R13: 0000000000000150 R14: 0000000000000000 R15: 0000000000000000 FS: 00007fc9b2ba0740(0000) GS:ffff889cdfcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000150 CR3: 000000010fd93005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: lock_acquire+0xbf/0x2b0 ? simple_recursive_removal+0xa5/0x2b0 ? lock_release+0x13d/0x2d0 down_write+0x2a/0xd0 ? simple_recursive_removal+0xa5/0x2b0 simple_recursive_removal+0xa5/0x2b0 ? start_creating.part.0+0x110/0x110 ? _raw_spin_unlock+0x29/0x40 debugfs_remove+0x40/0x60 intel_gvt_debugfs_remove_vgpu+0x15/0x30 [kvmgt] intel_gvt_destroy_vgpu+0x60/0x100 [kvmgt] intel_vgpu_release_dev+0xe/0x20 [kvmgt] device_release+0x30/0x80 kobject_put+0x79/0x1b0 device_release_driver_internal+0x1b8/0x230 bus_remove_device+0xec/0x160 device_del+0x189/0x400 ? up_write+0x9c/0x1b0 ? mdev_device_remove_common+0x60/0x60 [mdev] mdev_device_remove_common+0x22/0x60 [mdev] mdev_device_remove_cb+0x17/0x20 [mdev] device_for_each_child+0x56/0x80 mdev_unregister_parent+0x5a/0x81 [mdev] intel_gvt_clean_device+0x2d/0xe0 [kvmgt] intel_gvt_driver_remove+0x2e/0xb0 [i915] i915_driver_remove+0xac/0x100 [i915] i915_pci_remove+0x1a/0x30 [i915] pci_device_remove+0x31/0xa0 device_release_driver_internal+0x1b8/0x230 unbind_store+0xd8/0x100 kernfs_fop_write_iter+0x156/0x210 vfs_write+0x236/0x4a0 ksys_write+0x61/0xd0 do_syscall_64+0x55/0x80 ? find_held_lock+0x2b/0x80 ? lock_release+0x13d/0x2d0 ? up_read+0x17/0x20 ? lock_is_held_type+0xe3/0x140 ? asm_exc_page_fault+0x22/0x30 ? lockdep_hardirqs_on+0x7d/0x100 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7fc9b2c9e0c4 Code: 15 71 7d 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 80 3d 3d 05 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 48 89 54 24 18 48 RSP: 002b:00007ffec29c81c8 EFLAGS: 00000202 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 000000000000000d RCX: 00007fc9b2c9e0c4 RDX: 000000000000000d RSI: 0000559f8b5f48a0 RDI: 0000000000000001 RBP: 0000559f8b5f48a0 R08: 0000559f8b5f3540 R09: 00007fc9b2d76d30 R10: 0000000000000000 R11: 0000000000000202 R12: 000000000000000d R13: 00007fc9b2d77780 R14: 000000000000000d R15: 00007fc9b2d72a00 Modules linked in: sunrpc intel_rapl_msr intel_rapl_common intel_pmc_core_pltdrv intel_pmc_core intel_tcc_cooling x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel ee1004 igbvf rapl vfat fat intel_cstate intel_uncore pktcdvd i2c_i801 pcspkr wmi_bmof i2c_smbus acpi_pad vfio_pci vfio_pci_core vfio_virqfd zram fuse dm_multipath kvmgt mdev vfio_iommu_type1 vfio kvm irqbypass i915 nvme e1000e igb nvme_core crct10dif_pclmul crc32_pclmul crc32c_intel polyval_clmulni polyval_generic serio_raw ghash_clmulni_intel sha512_ssse3 dca drm_buddy intel_gtt video wmi drm_display_helper ttm CR2: 0000000000000150 ---[ end trace 0000000000000000 ]--- Cc: Wang Zhi Cc: He Yu Cc: Alex Williamson Cc: stable@vger.kernel.org Reviewed-by: Zhi Wang Tested-by: Yu He Fixes: bc7b0be316ae ("drm/i915/gvt: Add basic debugfs infrastructure") Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20221219140357.769557-2-zhenyuw@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gvt/debugfs.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index d7df27feee8c..e08ed0e9f165 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -175,8 +175,13 @@ void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) */ void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu) { - debugfs_remove_recursive(vgpu->debugfs); - vgpu->debugfs = NULL; + struct intel_gvt *gvt = vgpu->gvt; + struct drm_minor *minor = gvt->gt->i915->drm.primary; + + if (minor->debugfs_root && gvt->debugfs_root) { + debugfs_remove_recursive(vgpu->debugfs); + vgpu->debugfs = NULL; + } } /** From 569b4f8fb02e212353fbe7ae3d4dacd3016217d0 Mon Sep 17 00:00:00 2001 From: Dmitry Fomichev Date: Sat, 15 Oct 2022 23:41:26 -0400 Subject: [PATCH 148/543] virtio-blk: use a helper to handle request queuing errors [ Upstream commit 258896fcc786b4e7db238eba26f6dd080e0ff41e ] Define a new helper function, virtblk_fail_to_queue(), to clean up the error handling code in virtio_queue_rq(). Signed-off-by: Dmitry Fomichev Message-Id: <20221016034127.330942-2-dmitry.fomichev@wdc.com> Signed-off-by: Michael S. Tsirkin Stable-dep-of: a26116c1e740 ("virtio_blk: Fix signedness bug in virtblk_prep_rq()") Signed-off-by: Sasha Levin --- drivers/block/virtio_blk.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 19da5defd734..3efe3da5f8c2 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -315,6 +315,19 @@ static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) virtqueue_notify(vq->vq); } +static blk_status_t virtblk_fail_to_queue(struct request *req, int rc) +{ + virtblk_cleanup_cmd(req); + switch (rc) { + case -ENOSPC: + return BLK_STS_DEV_RESOURCE; + case -ENOMEM: + return BLK_STS_RESOURCE; + default: + return BLK_STS_IOERR; + } +} + static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, struct virtio_blk *vblk, struct request *req, @@ -327,10 +340,8 @@ static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, return status; vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr); - if (unlikely(vbr->sg_table.nents < 0)) { - virtblk_cleanup_cmd(req); - return BLK_STS_RESOURCE; - } + if (unlikely(vbr->sg_table.nents < 0)) + return virtblk_fail_to_queue(req, -ENOMEM); blk_mq_start_request(req); @@ -364,15 +375,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); virtblk_unmap_data(req, vbr); - virtblk_cleanup_cmd(req); - switch (err) { - case -ENOSPC: - return BLK_STS_DEV_RESOURCE; - case -ENOMEM: - return BLK_STS_RESOURCE; - default: - return BLK_STS_IOERR; - } + return virtblk_fail_to_queue(req, err); } if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) From bff553de2c372ef088666d89465cc60a3e2ef12d Mon Sep 17 00:00:00 2001 From: Rafael Mendonca Date: Fri, 21 Oct 2022 17:41:26 -0300 Subject: [PATCH 149/543] virtio_blk: Fix signedness bug in virtblk_prep_rq() [ Upstream commit a26116c1e74028914f281851488546c91cbae57d ] The virtblk_map_data() function returns negative error codes, however, the 'nents' field of vbr->sg_table is an unsigned int, which causes the error handling not to work correctly. Cc: stable@vger.kernel.org Fixes: 0e9911fa768f ("virtio-blk: support mq_ops->queue_rqs()") Signed-off-by: Rafael Mendonca Message-Id: <20221021204126.927603-1-rafaelmendsr@gmail.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Reviewed-by: Suwan Kim Reviewed-by: Stefan Hajnoczi Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/block/virtio_blk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 3efe3da5f8c2..a7697027ce43 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -334,14 +334,16 @@ static blk_status_t virtblk_prep_rq(struct blk_mq_hw_ctx *hctx, struct virtblk_req *vbr) { blk_status_t status; + int num; status = virtblk_setup_cmd(vblk->vdev, req, vbr); if (unlikely(status)) return status; - vbr->sg_table.nents = virtblk_map_data(hctx, req, vbr); - if (unlikely(vbr->sg_table.nents < 0)) + num = virtblk_map_data(hctx, req, vbr); + if (unlikely(num < 0)) return virtblk_fail_to_queue(req, -ENOMEM); + vbr->sg_table.nents = num; blk_mq_start_request(req); From 4ac1437d64efdd2788f8c511276243f594e946fd Mon Sep 17 00:00:00 2001 From: Dillon Varone Date: Tue, 8 Nov 2022 18:45:33 -0500 Subject: [PATCH 150/543] drm/amd/display: Add check for DET fetch latency hiding for dcn32 [ Upstream commit 6d4727c80947de0e6fad58b196a9d215e3b32608 ] [WHY?] Some configurations are constructed with very marginal DET buffers relative to the worst possible time required to fetch a swath. [HOW?] Add a check to see that the DET buffer allocated for each pipe can hide the latency for all pipes to fetch at least one swath. Reviewed-by: Alvin Lee Reviewed-by: Jun Lei Acked-by: Brian Chang Signed-off-by: Dillon Varone Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Stable-dep-of: f3c23bea598a ("drm/amd/display: Uninitialized variables causing 4k60 UCLK to stay at DPM1 and not DPM0") Signed-off-by: Sasha Levin --- .../dc/dml/dcn32/display_mode_vba_32.c | 39 +++++++++++ .../dc/dml/dcn32/display_mode_vba_util_32.c | 69 +++++++++++++++++++ .../dc/dml/dcn32/display_mode_vba_util_32.h | 18 +++++ .../drm/amd/display/dc/dml/display_mode_vba.h | 2 + 4 files changed, 128 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c index 9afd9ba23fb2..820042f6aaca 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c @@ -670,6 +670,25 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman v->cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 8 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k]; } + v->NotEnoughDETSwathFillLatencyHiding = dml32_CalculateDETSwathFillLatencyHiding( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBW, + v->UrgentLatency, + mode_lib->vba.SwathHeightY, + mode_lib->vba.SwathHeightC, + v->swath_width_luma_ub, + v->swath_width_chroma_ub, + v->BytePerPixelDETY, + v->BytePerPixelDETC, + mode_lib->vba.DETBufferSizeY, + mode_lib->vba.DETBufferSizeC, + mode_lib->vba.DPPPerPlane, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.UsesMALLForPStateChange); + for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) { v->MaxVStartupLines[k] = ((mode_lib->vba.Interlace[k] && !mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ? @@ -1664,6 +1683,7 @@ static void mode_support_configuration(struct vba_vars_st *v, && mode_lib->vba.PTEBufferSizeNotExceeded[i][j] == true && mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true && mode_lib->vba.NonsupportedDSCInputBPC == false + && mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] == false && !mode_lib->vba.ExceededMALLSize && ((mode_lib->vba.HostVMEnable == false && !mode_lib->vba.ImmediateFlipRequiredFinal) @@ -3158,6 +3178,25 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l mode_lib->vba.UrgentBurstFactorChroma, mode_lib->vba.UrgentBurstFactorCursor); + mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] = dml32_CalculateDETSwathFillLatencyHiding( + mode_lib->vba.NumberOfActiveSurfaces, + mode_lib->vba.ReturnBWPerState[i][j], + mode_lib->vba.UrgLatency[i], + mode_lib->vba.SwathHeightYThisState, + mode_lib->vba.SwathHeightCThisState, + mode_lib->vba.swath_width_luma_ub_this_state, + mode_lib->vba.swath_width_chroma_ub_this_state, + mode_lib->vba.BytePerPixelInDETY, + mode_lib->vba.BytePerPixelInDETC, + mode_lib->vba.DETBufferSizeYThisState, + mode_lib->vba.DETBufferSizeCThisState, + mode_lib->vba.NoOfDPPThisState, + mode_lib->vba.HTotal, + mode_lib->vba.PixelClock, + mode_lib->vba.VRatio, + mode_lib->vba.VRatioChroma, + mode_lib->vba.UsesMALLForPStateChange); + v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i, mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i], mode_lib->vba.DRAMSpeedPerState[i]); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index debe46b24a3e..5af601cff1a0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -6228,3 +6228,72 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; } + +bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double UrgentLatency, + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + double BytePerPixelInDETY[], + double BytePerPixelInDETC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int NumOfDPP[], + unsigned int HTotal[], + double PixelClock[], + double VRatioY[], + double VRatioC[], + enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]) +{ + int k; + double SwathSizeAllSurfaces = 0; + double SwathSizeAllSurfacesInFetchTimeUs; + double DETSwathLatencyHidingUs; + double DETSwathLatencyHidingYUs; + double DETSwathLatencyHidingCUs; + double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; + double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; + bool NotEnoughDETSwathFillLatencyHiding = false; + + /* calculate sum of single swath size for all pipes in bytes*/ + for (k = 0; k < NumberOfActiveSurfaces; k++) { + SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; + + if (SwathHeightC[k] != 0) + SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; + else + SwathSizePerSurfaceC[k] = 0; + + SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; + } + + SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; + + /* ensure all DET - 1 swath can hide a fetch for all surfaces */ + for (k = 0; k < NumberOfActiveSurfaces; k++) { + double LineTime = HTotal[k] / PixelClock[k]; + + /* only care if surface is not phantom */ + if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { + DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; + + if (SwathHeightC[k] != 0) { + DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; + + DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); + } else { + DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; + } + + /* DET must be able to hide time to fetch 1 swath for each surface */ + if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { + NotEnoughDETSwathFillLatencyHiding = true; + break; + } + } + } + + return NotEnoughDETSwathFillLatencyHiding; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h index 3989c2a28fae..779c6805f599 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h @@ -1141,4 +1141,22 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf double *FractionOfUrgentBandwidth, bool *ImmediateFlipBandwidthSupport); +bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, + double ReturnBW, + double UrgentLatency, + unsigned int SwathHeightY[], + unsigned int SwathHeightC[], + unsigned int SwathWidthY[], + unsigned int SwathWidthC[], + double BytePerPixelInDETY[], + double BytePerPixelInDETC[], + unsigned int DETBufferSizeY[], + unsigned int DETBufferSizeC[], + unsigned int NumOfDPP[], + unsigned int HTotal[], + double PixelClock[], + double VRatioY[], + double VRatioC[], + enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h index a0207a8f8756..2b34b02dbd45 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h @@ -1041,6 +1041,7 @@ struct vba_vars_st { double MinFullDETBufferingTime; double AverageReadBandwidthGBytePerSecond; bool FirstMainPlane; + bool NotEnoughDETSwathFillLatencyHiding; unsigned int ViewportWidthChroma[DC__NUM_DPP__MAX]; unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX]; @@ -1224,6 +1225,7 @@ struct vba_vars_st { unsigned int BlockWidthC[DC__NUM_DPP__MAX]; unsigned int SubViewportLinesNeededInMALL[DC__NUM_DPP__MAX]; bool VActiveBandwithSupport[DC__VOLTAGE_STATES][2]; + bool NotEnoughDETSwathFillLatencyHidingPerState[DC__VOLTAGE_STATES][2]; struct dummy_vars dummy_vars; }; From d179f9d27f1e31fdcf6b02c4f1658dd69985f602 Mon Sep 17 00:00:00 2001 From: Samson Tam Date: Mon, 5 Dec 2022 11:08:40 -0500 Subject: [PATCH 151/543] drm/amd/display: Uninitialized variables causing 4k60 UCLK to stay at DPM1 and not DPM0 [ Upstream commit f3c23bea598ab7e8e4b8c5ca66598921310f718e ] [Why] SwathSizePerSurfaceY[] and SwathSizePerSurfaceC[] values are uninitialized because we are using += instead of = operator. [How] Assign values in loop with = operator. Acked-by: Aurabindo Pillai Signed-off-by: Samson Tam Reviewed-by: Aric Cyr Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x, 6.1.x Signed-off-by: Sasha Levin --- .../drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c index 5af601cff1a0..b53feeaf5cf1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c @@ -6257,12 +6257,12 @@ bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurface double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; bool NotEnoughDETSwathFillLatencyHiding = false; - /* calculate sum of single swath size for all pipes in bytes*/ + /* calculate sum of single swath size for all pipes in bytes */ for (k = 0; k < NumberOfActiveSurfaces; k++) { - SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; + SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; if (SwathHeightC[k] != 0) - SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; + SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; else SwathSizePerSurfaceC[k] = 0; From 53e9d6851b56626885476a2966194ba994f8bb4b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 1 Jan 2023 09:02:21 +0800 Subject: [PATCH 152/543] btrfs: handle case when repair happens with dev-replace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d73a27b86fc722c28a26ec64002e3a7dc86d1c07 ] [BUG] There is a bug report that a BUG_ON() in btrfs_repair_io_failure() (originally repair_io_failure() in v6.0 kernel) got triggered when replacing a unreliable disk: BTRFS warning (device sda1): csum failed root 257 ino 2397453 off 39624704 csum 0xb0d18c75 expected csum 0x4dae9c5e mirror 3 kernel BUG at fs/btrfs/extent_io.c:2380! invalid opcode: 0000 [#1] PREEMPT SMP NOPTI CPU: 9 PID: 3614331 Comm: kworker/u257:2 Tainted: G OE 6.0.0-5-amd64 #1 Debian 6.0.10-2 Hardware name: Micro-Star International Co., Ltd. MS-7C60/TRX40 PRO WIFI (MS-7C60), BIOS 2.70 07/01/2021 Workqueue: btrfs-endio btrfs_end_bio_work [btrfs] RIP: 0010:repair_io_failure+0x24a/0x260 [btrfs] Call Trace: clean_io_failure+0x14d/0x180 [btrfs] end_bio_extent_readpage+0x412/0x6e0 [btrfs] ? __switch_to+0x106/0x420 process_one_work+0x1c7/0x380 worker_thread+0x4d/0x380 ? rescuer_thread+0x3a0/0x3a0 kthread+0xe9/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 [CAUSE] Before the BUG_ON(), we got some read errors from the replace target first, note the mirror number (3, which is beyond RAID1 duplication, thus it's read from the replace target device). Then at the BUG_ON() location, we are trying to writeback the repaired sectors back the failed device. The check looks like this: ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length, &bioc, mirror_num); if (ret) goto out_counter_dec; BUG_ON(mirror_num != bioc->mirror_num); But inside btrfs_map_block(), we can modify bioc->mirror_num especially for dev-replace: if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && !need_full_stripe(op) && dev_replace->tgtdev != NULL) { ret = get_extra_mirror_from_replace(fs_info, logical, *length, dev_replace->srcdev->devid, &mirror_num, &physical_to_patch_in_first_stripe); patch_the_first_stripe_for_dev_replace = 1; } Thus if we're repairing the replace target device, we're going to trigger that BUG_ON(). But in reality, the read failure from the replace target device may be that, our replace hasn't reached the range we're reading, thus we're reading garbage, but with replace running, the range would be properly filled later. Thus in that case, we don't need to do anything but let the replace routine to handle it. [FIX] Instead of a BUG_ON(), just skip the repair if we're repairing the device replace target device. Reported-by: 小太 Link: https://lore.kernel.org/linux-btrfs/CACsxjPYyJGQZ+yvjzxA1Nn2LuqkYqTCcUH43S=+wXhyf8S00Ag@mail.gmail.com/ CC: stable@vger.kernel.org # 6.0+ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent_io.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4dcf22e051ff..acb3c5c3b025 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -580,7 +580,16 @@ static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, &map_length, &bioc, mirror_num); if (ret) goto out_counter_dec; - BUG_ON(mirror_num != bioc->mirror_num); + /* + * This happens when dev-replace is also running, and the + * mirror_num indicates the dev-replace target. + * + * In this case, we don't need to do anything, as the read + * error just means the replace progress hasn't reached our + * read range, and later replace routine would handle it well. + */ + if (mirror_num != bioc->mirror_num) + goto out_counter_dec; } sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; From 4cd431722018d06076776e2d57331bb010141d8b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 31 Dec 2022 17:32:31 +0900 Subject: [PATCH 153/543] ksmbd: fix infinite loop in ksmbd_conn_handler_loop() commit 83dcedd5540d4ac61376ddff5362f7d9f866a6ec upstream. If kernel_recvmsg() return -EAGAIN in ksmbd_tcp_readv() and go round again, It will cause infinite loop issue. And all threads from next connections would be doing that. This patch add max retry count(2) to avoid it. kernel_recvmsg() will wait during 7sec timeout and try to retry two time if -EAGAIN is returned. And add flags of kvmalloc to __GFP_NOWARN and __GFP_NORETRY to disconnect immediately without retrying on memory alloation failure. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Cc: stable@vger.kernel.org Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-18259 Reviewed-by: Sergey Senozhatsky Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/ksmbd/connection.c | 7 +++++-- fs/ksmbd/transport_tcp.c | 5 ++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/ksmbd/connection.c b/fs/ksmbd/connection.c index 12be8386446a..fd0a288af299 100644 --- a/fs/ksmbd/connection.c +++ b/fs/ksmbd/connection.c @@ -316,9 +316,12 @@ int ksmbd_conn_handler_loop(void *p) /* 4 for rfc1002 length field */ size = pdu_size + 4; - conn->request_buf = kvmalloc(size, GFP_KERNEL); + conn->request_buf = kvmalloc(size, + GFP_KERNEL | + __GFP_NOWARN | + __GFP_NORETRY); if (!conn->request_buf) - continue; + break; memcpy(conn->request_buf, hdr_buf, sizeof(hdr_buf)); if (!ksmbd_smb_request(conn)) diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 63d55f543bd2..4c6bd0b69979 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -295,6 +295,7 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, struct msghdr ksmbd_msg; struct kvec *iov; struct ksmbd_conn *conn = KSMBD_TRANS(t)->conn; + int max_retry = 2; iov = get_conn_iovec(t, nr_segs); if (!iov) @@ -321,9 +322,11 @@ static int ksmbd_tcp_readv(struct tcp_transport *t, struct kvec *iov_orig, } else if (conn->status == KSMBD_SESS_NEED_RECONNECT) { total_read = -EAGAIN; break; - } else if (length == -ERESTARTSYS || length == -EAGAIN) { + } else if ((length == -ERESTARTSYS || length == -EAGAIN) && + max_retry) { usleep_range(1000, 2000); length = 0; + max_retry--; continue; } else if (length <= 0) { total_read = -EAGAIN; From ad678f30ac0d4223e7d8b08a0fb31f6b20d635a0 Mon Sep 17 00:00:00 2001 From: Marios Makassikis Date: Fri, 23 Dec 2022 11:59:31 +0100 Subject: [PATCH 154/543] ksmbd: send proper error response in smb2_tree_connect() commit cdfb2fef522d0c3f9cf293db51de88e9b3d46846 upstream. Currently, smb2_tree_connect doesn't send an error response packet on error. This causes libsmb2 to skip the specific error code and fail with the following: smb2_service failed with : Failed to parse fixed part of command payload. Unexpected size of Error reply. Expected 9, got 8 Signed-off-by: Marios Makassikis Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/ksmbd/smb2pdu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index b2fc85d440d0..533742ebcb37 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -1926,13 +1926,13 @@ int smb2_tree_connect(struct ksmbd_work *work) if (conn->posix_ext_supported) status.tree_conn->posix_extensions = true; -out_err1: rsp->StructureSize = cpu_to_le16(16); + inc_rfc1001_len(work->response_buf, 16); +out_err1: rsp->Capabilities = 0; rsp->Reserved = 0; /* default manual caching */ rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING; - inc_rfc1001_len(work->response_buf, 16); if (!IS_ERR(treename)) kfree(treename); @@ -1965,6 +1965,9 @@ out_err1: rsp->hdr.Status = STATUS_ACCESS_DENIED; } + if (status.ret != KSMBD_TREE_CONN_STATUS_OK) + smb2_set_err_rsp(work); + return rc; } From 5e7d97dbae25ab4cb0ac1b1b98aebc4915689a86 Mon Sep 17 00:00:00 2001 From: William Liu Date: Fri, 30 Dec 2022 13:03:15 +0900 Subject: [PATCH 155/543] ksmbd: check nt_len to be at least CIFS_ENCPWD_SIZE in ksmbd_decode_ntlmssp_auth_blob MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 797805d81baa814f76cf7bdab35f86408a79d707 upstream. "nt_len - CIFS_ENCPWD_SIZE" is passed directly from ksmbd_decode_ntlmssp_auth_blob to ksmbd_auth_ntlmv2. Malicious requests can set nt_len to less than CIFS_ENCPWD_SIZE, which results in a negative number (or large unsigned value) used for a subsequent memcpy in ksmbd_auth_ntlvm2 and can cause a panic. Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3") Cc: stable@vger.kernel.org Signed-off-by: William Liu Signed-off-by: Hrvoje Mišetić Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/ksmbd/auth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index 2a39ffb8423b..6e61b5bc7d86 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -322,7 +322,8 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, dn_off = le32_to_cpu(authblob->DomainName.BufferOffset); dn_len = le16_to_cpu(authblob->DomainName.Length); - if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len) + if (blob_len < (u64)dn_off + dn_len || blob_len < (u64)nt_off + nt_len || + nt_len < CIFS_ENCPWD_SIZE) return -EINVAL; /* TODO : use domain name that imported from configuration file */ From c7229577d93d53870fd77e961143305aeec97a7b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 19 Dec 2022 12:59:55 +0200 Subject: [PATCH 156/543] drm/i915/dsi: add support for ICL+ native MIPI GPIO sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 963bbdb32b47cfa67a449e715e1dcc525fbd01fc upstream. Starting from ICL, the default for MIPI GPIO sequences seems to be using native GPIOs i.e. GPIOs available in the GPU. These native GPIOs reuse many pins that quite frankly seem scary to poke based on the VBT sequences. We pretty much have to trust that the board is configured such that the relevant HPD, PP_CONTROL and GPIO bits aren't used for anything else. MIPI sequence v4 also adds a flag to fall back to non-native sequences. v5: - Wrap SHOTPLUG_CTL_DDI modification in spin_lock() in icp_irq_handler() too (Ville) - References instead of Closes issue 6131 because this does not fix everything v4: - Wrap SHOTPLUG_CTL_DDI modification in spin_lock_irq() (Ville) v3: - Fix -Wbitwise-conditional-parentheses (kernel test robot ) v2: - Fix HPD pin output set (impacts GPIOs 0 and 5) - Fix GPIO data output direction set (impacts GPIOs 4 and 9) - Reduce register accesses to single intel_de_rwm() References: https://gitlab.freedesktop.org/drm/intel/-/issues/6131 Cc: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221219105955.4014451-1-jani.nikula@intel.com (cherry picked from commit f087cfe6fcff58044f7aa3b284965af47f472fb0) Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 94 +++++++++++++++++++- drivers/gpu/drm/i915/i915_irq.c | 3 + drivers/gpu/drm/i915/i915_reg.h | 1 + 3 files changed, 95 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index fce69fa446d5..41f025f089d9 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -41,9 +41,11 @@ #include "i915_drv.h" #include "i915_reg.h" +#include "intel_de.h" #include "intel_display_types.h" #include "intel_dsi.h" #include "intel_dsi_vbt.h" +#include "intel_gmbus_regs.h" #include "vlv_dsi.h" #include "vlv_dsi_regs.h" #include "vlv_sideband.h" @@ -377,6 +379,85 @@ static void icl_exec_gpio(struct intel_connector *connector, drm_dbg_kms(&dev_priv->drm, "Skipping ICL GPIO element execution\n"); } +enum { + MIPI_RESET_1 = 0, + MIPI_AVDD_EN_1, + MIPI_BKLT_EN_1, + MIPI_AVEE_EN_1, + MIPI_VIO_EN_1, + MIPI_RESET_2, + MIPI_AVDD_EN_2, + MIPI_BKLT_EN_2, + MIPI_AVEE_EN_2, + MIPI_VIO_EN_2, +}; + +static void icl_native_gpio_set_value(struct drm_i915_private *dev_priv, + int gpio, bool value) +{ + int index; + + if (drm_WARN_ON(&dev_priv->drm, DISPLAY_VER(dev_priv) == 11 && gpio >= MIPI_RESET_2)) + return; + + switch (gpio) { + case MIPI_RESET_1: + case MIPI_RESET_2: + index = gpio == MIPI_RESET_1 ? HPD_PORT_A : HPD_PORT_B; + + /* + * Disable HPD to set the pin to output, and set output + * value. The HPD pin should not be enabled for DSI anyway, + * assuming the board design and VBT are sane, and the pin isn't + * used by a non-DSI encoder. + * + * The locking protects against concurrent SHOTPLUG_CTL_DDI + * modifications in irq setup and handling. + */ + spin_lock_irq(&dev_priv->irq_lock); + intel_de_rmw(dev_priv, SHOTPLUG_CTL_DDI, + SHOTPLUG_CTL_DDI_HPD_ENABLE(index) | + SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(index), + value ? SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(index) : 0); + spin_unlock_irq(&dev_priv->irq_lock); + break; + case MIPI_AVDD_EN_1: + case MIPI_AVDD_EN_2: + index = gpio == MIPI_AVDD_EN_1 ? 0 : 1; + + intel_de_rmw(dev_priv, PP_CONTROL(index), PANEL_POWER_ON, + value ? PANEL_POWER_ON : 0); + break; + case MIPI_BKLT_EN_1: + case MIPI_BKLT_EN_2: + index = gpio == MIPI_AVDD_EN_1 ? 0 : 1; + + intel_de_rmw(dev_priv, PP_CONTROL(index), EDP_BLC_ENABLE, + value ? EDP_BLC_ENABLE : 0); + break; + case MIPI_AVEE_EN_1: + case MIPI_AVEE_EN_2: + index = gpio == MIPI_AVEE_EN_1 ? 1 : 2; + + intel_de_rmw(dev_priv, GPIO(dev_priv, index), + GPIO_CLOCK_VAL_OUT, + GPIO_CLOCK_DIR_MASK | GPIO_CLOCK_DIR_OUT | + GPIO_CLOCK_VAL_MASK | (value ? GPIO_CLOCK_VAL_OUT : 0)); + break; + case MIPI_VIO_EN_1: + case MIPI_VIO_EN_2: + index = gpio == MIPI_VIO_EN_1 ? 1 : 2; + + intel_de_rmw(dev_priv, GPIO(dev_priv, index), + GPIO_DATA_VAL_OUT, + GPIO_DATA_DIR_MASK | GPIO_DATA_DIR_OUT | + GPIO_DATA_VAL_MASK | (value ? GPIO_DATA_VAL_OUT : 0)); + break; + default: + MISSING_CASE(gpio); + } +} + static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) { struct drm_device *dev = intel_dsi->base.base.dev; @@ -384,8 +465,7 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) struct intel_connector *connector = intel_dsi->attached_connector; u8 gpio_source, gpio_index = 0, gpio_number; bool value; - - drm_dbg_kms(&dev_priv->drm, "\n"); + bool native = DISPLAY_VER(dev_priv) >= 11; if (connector->panel.vbt.dsi.seq_version >= 3) gpio_index = *data++; @@ -398,10 +478,18 @@ static const u8 *mipi_exec_gpio(struct intel_dsi *intel_dsi, const u8 *data) else gpio_source = 0; + if (connector->panel.vbt.dsi.seq_version >= 4 && *data & BIT(1)) + native = false; + /* pull up/down */ value = *data++ & 1; - if (DISPLAY_VER(dev_priv) >= 11) + drm_dbg_kms(&dev_priv->drm, "GPIO index %u, number %u, source %u, native %s, set to %s\n", + gpio_index, gpio_number, gpio_source, str_yes_no(native), str_on_off(value)); + + if (native) + icl_native_gpio_set_value(dev_priv, gpio_number, value); + else if (DISPLAY_VER(dev_priv) >= 11) icl_exec_gpio(connector, gpio_source, gpio_index, value); else if (IS_VALLEYVIEW(dev_priv)) vlv_exec_gpio(connector, gpio_source, gpio_number, value); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 86a42d9e8041..f93ffa6626a5 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1981,8 +1981,11 @@ static void icp_irq_handler(struct drm_i915_private *dev_priv, u32 pch_iir) if (ddi_hotplug_trigger) { u32 dig_hotplug_reg; + /* Locking due to DSI native GPIO sequences */ + spin_lock(&dev_priv->irq_lock); dig_hotplug_reg = intel_uncore_read(&dev_priv->uncore, SHOTPLUG_CTL_DDI); intel_uncore_write(&dev_priv->uncore, SHOTPLUG_CTL_DDI, dig_hotplug_reg); + spin_unlock(&dev_priv->irq_lock); intel_get_hpd_pins(dev_priv, &pin_mask, &long_mask, ddi_hotplug_trigger, dig_hotplug_reg, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index da35bb2db26b..64eacd11b8bf 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6035,6 +6035,7 @@ #define SHOTPLUG_CTL_DDI _MMIO(0xc4030) #define SHOTPLUG_CTL_DDI_HPD_ENABLE(hpd_pin) (0x8 << (_HPD_PIN_DDI(hpd_pin) * 4)) +#define SHOTPLUG_CTL_DDI_HPD_OUTPUT_DATA(hpd_pin) (0x4 << (_HPD_PIN_DDI(hpd_pin) * 4)) #define SHOTPLUG_CTL_DDI_HPD_STATUS_MASK(hpd_pin) (0x3 << (_HPD_PIN_DDI(hpd_pin) * 4)) #define SHOTPLUG_CTL_DDI_HPD_NO_DETECT(hpd_pin) (0x0 << (_HPD_PIN_DDI(hpd_pin) * 4)) #define SHOTPLUG_CTL_DDI_HPD_SHORT_DETECT(hpd_pin) (0x1 << (_HPD_PIN_DDI(hpd_pin) * 4)) From 0c84b7de26588f4032992ee2a1df6c3d367be829 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 20 Dec 2022 16:01:05 +0200 Subject: [PATCH 157/543] drm/i915/dsi: fix MIPI_BKLT_EN_1 native GPIO index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6217e9f05a74df48c77ee68993d587cdfdb1feb7 upstream. Due to copy-paste fail, MIPI_BKLT_EN_1 would always use PPS index 1, never 0. Fix the sloppiest commit in recent memory. Fixes: 963bbdb32b47 ("drm/i915/dsi: add support for ICL+ native MIPI GPIO sequence") Reported-by: Ville Syrjälä Signed-off-by: Jani Nikula Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221220140105.313333-1-jani.nikula@intel.com (cherry picked from commit a561933c571798868b5fa42198427a7e6df56c09) Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/intel_dsi_vbt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c index 41f025f089d9..2cbc1292ab38 100644 --- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c @@ -430,7 +430,7 @@ static void icl_native_gpio_set_value(struct drm_i915_private *dev_priv, break; case MIPI_BKLT_EN_1: case MIPI_BKLT_EN_2: - index = gpio == MIPI_AVDD_EN_1 ? 0 : 1; + index = gpio == MIPI_BKLT_EN_1 ? 0 : 1; intel_de_rmw(dev_priv, PP_CONTROL(index), EDP_BLC_ENABLE, value ? EDP_BLC_ENABLE : 0); From 15f818d4b68273244f95f47c75de2603e46bf0e2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Oct 2022 10:39:10 +0200 Subject: [PATCH 158/543] efi: random: combine bootloader provided RNG seed with RNG protocol output commit 196dff2712ca5a2e651977bb2fe6b05474111a83 upstream. Instead of blindly creating the EFI random seed configuration table if the RNG protocol is implemented and works, check whether such a EFI configuration table was provided by an earlier boot stage and if so, concatenate the existing and the new seeds, leaving it up to the core code to mix it in and credit it the way it sees fit. This can be used for, e.g., systemd-boot, to pass an additional seed to Linux in a way that can be consumed by the kernel very early. In that case, the following definitions should be used to pass the seed to the EFI stub: struct linux_efi_random_seed { u32 size; // of the 'seed' array in bytes u8 seed[]; }; The memory for the struct must be allocated as EFI_ACPI_RECLAIM_MEMORY pool memory, and the address of the struct in memory should be installed as a EFI configuration table using the following GUID: LINUX_EFI_RANDOM_SEED_TABLE_GUID 1ce1e5bc-7ceb-42f2-81e5-8aadf180f57b Note that doing so is safe even on kernels that were built without this patch applied, but the seed will simply be overwritten with a seed derived from the EFI RNG protocol, if available. The recommended seed size is 32 bytes, and seeds larger than 512 bytes are considered corrupted and ignored entirely. In order to preserve forward secrecy, seeds from previous bootloaders are memzero'd out, and in order to preserve memory, those older seeds are also freed from memory. Freeing from memory without first memzeroing is not safe to do, as it's possible that nothing else will ever overwrite those pages used by EFI. Reviewed-by: Jason A. Donenfeld [ardb: incorporate Jason's followup changes to extend the maximum seed size on the consumer end, memzero() it and drop a needless printk] Signed-off-by: Ard Biesheuvel Signed-off-by: Jason A. Donenfeld Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/efi.c | 4 +-- drivers/firmware/efi/libstub/efistub.h | 2 ++ drivers/firmware/efi/libstub/random.c | 42 ++++++++++++++++++++++---- include/linux/efi.h | 2 -- 4 files changed, 40 insertions(+), 10 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index a46df5d1d094..f12cc29bd4b8 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -611,7 +611,7 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables, seed = early_memremap(efi_rng_seed, sizeof(*seed)); if (seed != NULL) { - size = min(seed->size, EFI_RANDOM_SEED_SIZE); + size = min_t(u32, seed->size, SZ_1K); // sanity check early_memunmap(seed, sizeof(*seed)); } else { pr_err("Could not map UEFI random seed!\n"); @@ -620,8 +620,8 @@ int __init efi_config_parse_tables(const efi_config_table_t *config_tables, seed = early_memremap(efi_rng_seed, sizeof(*seed) + size); if (seed != NULL) { - pr_notice("seeding entropy pool\n"); add_bootloader_randomness(seed->bits, size); + memzero_explicit(seed->bits, size); early_memunmap(seed, sizeof(*seed) + size); } else { pr_err("Could not map UEFI random seed!\n"); diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index eb03d5a9aac8..900df67a2078 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -882,6 +882,8 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); efi_status_t efi_random_alloc(unsigned long size, unsigned long align, unsigned long *addr, unsigned long random_seed); +efi_status_t efi_random_get_seed(void); + efi_status_t check_platform_features(void); void *get_efi_config_table(efi_guid_t guid); diff --git a/drivers/firmware/efi/libstub/random.c b/drivers/firmware/efi/libstub/random.c index 33ab56769595..f85d2c066877 100644 --- a/drivers/firmware/efi/libstub/random.c +++ b/drivers/firmware/efi/libstub/random.c @@ -67,27 +67,43 @@ efi_status_t efi_random_get_seed(void) efi_guid_t rng_proto = EFI_RNG_PROTOCOL_GUID; efi_guid_t rng_algo_raw = EFI_RNG_ALGORITHM_RAW; efi_guid_t rng_table_guid = LINUX_EFI_RANDOM_SEED_TABLE_GUID; + struct linux_efi_random_seed *prev_seed, *seed = NULL; + int prev_seed_size = 0, seed_size = EFI_RANDOM_SEED_SIZE; efi_rng_protocol_t *rng = NULL; - struct linux_efi_random_seed *seed = NULL; efi_status_t status; status = efi_bs_call(locate_protocol, &rng_proto, NULL, (void **)&rng); if (status != EFI_SUCCESS) return status; + /* + * Check whether a seed was provided by a prior boot stage. In that + * case, instead of overwriting it, let's create a new buffer that can + * hold both, and concatenate the existing and the new seeds. + * Note that we should read the seed size with caution, in case the + * table got corrupted in memory somehow. + */ + prev_seed = get_efi_config_table(LINUX_EFI_RANDOM_SEED_TABLE_GUID); + if (prev_seed && prev_seed->size <= 512U) { + prev_seed_size = prev_seed->size; + seed_size += prev_seed_size; + } + /* * Use EFI_ACPI_RECLAIM_MEMORY here so that it is guaranteed that the * allocation will survive a kexec reboot (although we refresh the seed * beforehand) */ status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, - sizeof(*seed) + EFI_RANDOM_SEED_SIZE, + struct_size(seed, bits, seed_size), (void **)&seed); - if (status != EFI_SUCCESS) - return status; + if (status != EFI_SUCCESS) { + efi_warn("Failed to allocate memory for RNG seed.\n"); + goto err_warn; + } status = efi_call_proto(rng, get_rng, &rng_algo_raw, - EFI_RANDOM_SEED_SIZE, seed->bits); + EFI_RANDOM_SEED_SIZE, seed->bits); if (status == EFI_UNSUPPORTED) /* @@ -100,14 +116,28 @@ efi_status_t efi_random_get_seed(void) if (status != EFI_SUCCESS) goto err_freepool; - seed->size = EFI_RANDOM_SEED_SIZE; + seed->size = seed_size; + if (prev_seed_size) + memcpy(seed->bits + EFI_RANDOM_SEED_SIZE, prev_seed->bits, + prev_seed_size); + status = efi_bs_call(install_configuration_table, &rng_table_guid, seed); if (status != EFI_SUCCESS) goto err_freepool; + if (prev_seed_size) { + /* wipe and free the old seed if we managed to install the new one */ + memzero_explicit(prev_seed->bits, prev_seed_size); + efi_bs_call(free_pool, prev_seed); + } return EFI_SUCCESS; err_freepool: + memzero_explicit(seed, struct_size(seed, bits, seed_size)); efi_bs_call(free_pool, seed); + efi_warn("Failed to obtain seed from EFI_RNG_PROTOCOL\n"); +err_warn: + if (prev_seed) + efi_warn("Retaining bootloader-supplied seed only"); return status; } diff --git a/include/linux/efi.h b/include/linux/efi.h index 7603fc58c47c..4aa1dbc7b064 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1170,8 +1170,6 @@ void efi_check_for_embedded_firmwares(void); static inline void efi_check_for_embedded_firmwares(void) { } #endif -efi_status_t efi_random_get_seed(void); - #define arch_efi_call_virt(p, f, args...) ((p)->f(args)) /* From 177055b94fb5918f32c0f5ec05891dc73e64959c Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Mon, 10 Oct 2022 11:32:37 +0800 Subject: [PATCH 159/543] wifi: ath11k: Send PME message during wakeup from D3cold commit 3f9b09ccf7d5f23066b02881a737bee42def9d1a upstream. We are seeing system stuck on some specific platforms due to WLAN chip fails to wakeup from D3cold state. With this flag, firmware will send PME message during wakeup and this issue is gone. Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3 Signed-off-by: Baochen Qiang Signed-off-by: Kalle Valo Cc: "Limonciello, Mario" Link: https://lore.kernel.org/r/20221010033237.415478-1-quic_bqiang@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath11k/qmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index 8358fe08c234..7cc5fa325152 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -19,6 +19,7 @@ #define SLEEP_CLOCK_SELECT_INTERNAL_BIT 0x02 #define HOST_CSTATE_BIT 0x04 #define PLATFORM_CAP_PCIE_GLOBAL_RESET 0x08 +#define PLATFORM_CAP_PCIE_PME_D3COLD 0x10 #define FW_BUILD_ID_MASK "QC_IMAGE_VERSION_STRING=" @@ -1752,6 +1753,8 @@ static int ath11k_qmi_host_cap_send(struct ath11k_base *ab) if (ab->hw_params.global_reset) req.nm_modem |= PLATFORM_CAP_PCIE_GLOBAL_RESET; + req.nm_modem |= PLATFORM_CAP_PCIE_PME_D3COLD; + ath11k_dbg(ab, ATH11K_DBG_QMI, "qmi host cap request\n"); ret = qmi_txn_init(&ab->qmi.handle, &txn, From 90ca7a874a9093e500cc6147cecd85ad2e6a2852 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 Jan 2023 12:02:59 +0100 Subject: [PATCH 160/543] Linux 6.1.5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20230110180018.288460217@linuxfoundation.org Tested-by: Ronald Warsow Tested-by: Holger Hoffstätte Tested-by: Florian Fainelli Tested-by: Shuah Khan Tested-by: Takeshi Ogasawara Tested-by: Fenil Jain Tested-by: Rudi Heitbaum Tested-by: Jon Hunter Tested-by: Ron Economos Tested-by: Sudip Mukherjee Tested-by: Salvatore Bonaccorso Tested-by: Linux Kernel Functional Testing Tested-by: Allen Pais Tested-by: Justin M. Forbes Tested-by: Conor Dooley Tested-by: Guenter Roeck Tested-by: Bagas Sanjaya Tested-by: Kelsey Steele Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 56afd1509c74..ddbd2fc917c5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 4 +SUBLEVEL = 5 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From 763b92568726b55569ea4896fe36e3c174cc3b0d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 11 Dec 2022 19:50:20 +0100 Subject: [PATCH 161/543] parisc: Align parisc MADV_XXX constants with all other architectures commit 71bdea6f798b425bc0003780b13e3fdecb16a010 upstream. Adjust some MADV_XXX constants to be in sync what their values are on all other platforms. There is currently no reason to have an own numbering on parisc, but it requires workarounds in many userspace sources (e.g. glibc, qemu, ...) - which are often forgotten and thus introduce bugs and different behaviour on parisc. A wrapper avoids an ABI breakage for existing userspace applications by translating any old values to the new ones, so this change allows us to move over all programs to the new ABI over time. Signed-off-by: Helge Deller Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/uapi/asm/mman.h | 29 +++++++++++------------ arch/parisc/kernel/sys_parisc.c | 28 ++++++++++++++++++++++ arch/parisc/kernel/syscalls/syscall.tbl | 2 +- tools/arch/parisc/include/uapi/asm/mman.h | 12 +++++----- tools/perf/bench/bench.h | 12 ---------- 5 files changed, 49 insertions(+), 34 deletions(-) diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index 22133a6a506e..68c44f99bc93 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -49,6 +49,19 @@ #define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */ +#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ +#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ + +#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */ +#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */ + +#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump, + overrides the coredump filter bits */ +#define MADV_DODUMP 17 /* Clear the MADV_NODUMP flag */ + +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + #define MADV_COLD 20 /* deactivate these pages */ #define MADV_PAGEOUT 21 /* reclaim these pages */ @@ -57,27 +70,13 @@ #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ -#define MADV_MERGEABLE 65 /* KSM may merge identical pages */ -#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */ - -#define MADV_HUGEPAGE 67 /* Worth backing with hugepages */ -#define MADV_NOHUGEPAGE 68 /* Not worth backing with hugepages */ - -#define MADV_DONTDUMP 69 /* Explicity exclude from the core dump, - overrides the coredump filter bits */ -#define MADV_DODUMP 70 /* Clear the MADV_NODUMP flag */ - -#define MADV_WIPEONFORK 71 /* Zero memory on fork, child only */ -#define MADV_KEEPONFORK 72 /* Undo MADV_WIPEONFORK */ - -#define MADV_COLLAPSE 73 /* Synchronous hugepage collapse */ +#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ #define MADV_HWPOISON 100 /* poison a page for testing */ #define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */ /* compatibility flags */ #define MAP_FILE 0 -#define MAP_VARIABLE 0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index 848b0702005d..09a34b07f02e 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -465,3 +465,31 @@ asmlinkage long parisc_inotify_init1(int flags) flags = FIX_O_NONBLOCK(flags); return sys_inotify_init1(flags); } + +/* + * madvise() wrapper + * + * Up to kernel v6.1 parisc has different values than all other + * platforms for the MADV_xxx flags listed below. + * To keep binary compatibility with existing userspace programs + * translate the former values to the new values. + * + * XXX: Remove this wrapper in year 2025 (or later) + */ + +asmlinkage notrace long parisc_madvise(unsigned long start, size_t len_in, int behavior) +{ + switch (behavior) { + case 65: behavior = MADV_MERGEABLE; break; + case 66: behavior = MADV_UNMERGEABLE; break; + case 67: behavior = MADV_HUGEPAGE; break; + case 68: behavior = MADV_NOHUGEPAGE; break; + case 69: behavior = MADV_DONTDUMP; break; + case 70: behavior = MADV_DODUMP; break; + case 71: behavior = MADV_WIPEONFORK; break; + case 72: behavior = MADV_KEEPONFORK; break; + case 73: behavior = MADV_COLLAPSE; break; + } + + return sys_madvise(start, len_in, behavior); +} diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 8a99c998da9b..0e42fceb2d5e 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -131,7 +131,7 @@ 116 common sysinfo sys_sysinfo compat_sys_sysinfo 117 common shutdown sys_shutdown 118 common fsync sys_fsync -119 common madvise sys_madvise +119 common madvise parisc_madvise 120 common clone sys_clone_wrapper 121 common setdomainname sys_setdomainname 122 common sendfile sys_sendfile compat_sys_sendfile diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h index 506c06a6536f..4cc88a642e10 100644 --- a/tools/arch/parisc/include/uapi/asm/mman.h +++ b/tools/arch/parisc/include/uapi/asm/mman.h @@ -1,20 +1,20 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H #define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H -#define MADV_DODUMP 70 +#define MADV_DODUMP 17 #define MADV_DOFORK 11 -#define MADV_DONTDUMP 69 +#define MADV_DONTDUMP 16 #define MADV_DONTFORK 10 #define MADV_DONTNEED 4 #define MADV_FREE 8 -#define MADV_HUGEPAGE 67 -#define MADV_MERGEABLE 65 -#define MADV_NOHUGEPAGE 68 +#define MADV_HUGEPAGE 14 +#define MADV_MERGEABLE 12 +#define MADV_NOHUGEPAGE 15 #define MADV_NORMAL 0 #define MADV_RANDOM 1 #define MADV_REMOVE 9 #define MADV_SEQUENTIAL 2 -#define MADV_UNMERGEABLE 66 +#define MADV_UNMERGEABLE 13 #define MADV_WILLNEED 3 #define MAP_ANONYMOUS 0x10 #define MAP_DENYWRITE 0x0800 diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 6cefb4315d75..a5d49b3b6a09 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -10,25 +10,13 @@ extern struct timeval bench__start, bench__end, bench__runtime; * The madvise transparent hugepage constants were added in glibc * 2.13. For compatibility with older versions of glibc, define these * tokens if they are not already defined. - * - * PA-RISC uses different madvise values from other architectures and - * needs to be special-cased. */ -#ifdef __hppa__ -# ifndef MADV_HUGEPAGE -# define MADV_HUGEPAGE 67 -# endif -# ifndef MADV_NOHUGEPAGE -# define MADV_NOHUGEPAGE 68 -# endif -#else # ifndef MADV_HUGEPAGE # define MADV_HUGEPAGE 14 # endif # ifndef MADV_NOHUGEPAGE # define MADV_NOHUGEPAGE 15 # endif -#endif int bench_numa(int argc, const char **argv); int bench_sched_messaging(int argc, const char **argv); From f28418b06f6b1e83c81369ef1705ee53da106ae6 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:27 -0800 Subject: [PATCH 162/543] x86/fpu: Take task_struct* in copy_sigframe_from_user_to_xstate() commit 6a877d2450ace4f27c012519e5a1ae818f931983 upstream. This will allow copy_sigframe_from_user_to_xstate() to grab the address of thread_struct's pkru value in a later patch. Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-2-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/signal.c | 2 +- arch/x86/kernel/fpu/xstate.c | 4 ++-- arch/x86/kernel/fpu/xstate.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 91d4b6de58ab..558076dbde5b 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -396,7 +396,7 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, fpregs = &fpu->fpstate->regs; if (use_xsave() && !fx_only) { - if (copy_sigframe_from_user_to_xstate(fpu->fpstate, buf_fx)) + if (copy_sigframe_from_user_to_xstate(tsk, buf_fx)) return false; } else { if (__copy_from_user(&fpregs->fxsave, buf_fx, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c2dde46a538e..c88c60bac3ae 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1278,10 +1278,10 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf) * XSAVE[S] format and copy to the target thread. This is called from the * sigreturn() and rt_sigreturn() system calls. */ -int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, +int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf) { - return copy_uabi_to_xstate(fpstate, NULL, ubuf); + return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf); } static bool validate_independent_components(u64 mask) diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 5ad47031383b..f08ee2722e74 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -47,7 +47,7 @@ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode mode); extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf); -extern int copy_sigframe_from_user_to_xstate(struct fpstate *fpstate, const void __user *ubuf); +extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf); extern void fpu__init_cpu_xstate(void); From a442736b704d4194ae68f75f4ee2e64cf2b8a142 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:28 -0800 Subject: [PATCH 163/543] x86/fpu: Add a pkru argument to copy_uabi_from_kernel_to_xstate(). commit 1c813ce0305571e1b2e4cc4acca451da9e6ad18f upstream. Both KVM (through KVM_SET_XSTATE) and ptrace (through PTRACE_SETREGSET with NT_X86_XSTATE) ultimately call copy_uabi_from_kernel_to_xstate(), but the canonical locations for the current PKRU value for KVM guests and processes in a ptrace stop are different (in the kvm_vcpu_arch and the thread_state structs respectively). In preparation for eventually handling PKRU in copy_uabi_to_xstate, pass in a pointer to the PKRU location. Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-3-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/core.c | 2 +- arch/x86/kernel/fpu/regset.c | 2 +- arch/x86/kernel/fpu/xstate.c | 2 +- arch/x86/kernel/fpu/xstate.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d00db56a8868..db3300a617b7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -406,7 +406,7 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, if (ustate->xsave.header.xfeatures & ~xcr0) return -EINVAL; - ret = copy_uabi_from_kernel_to_xstate(kstate, ustate); + ret = copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); if (ret) return ret; diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 75ffaef8c299..6d056b68f4ed 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -167,7 +167,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, } fpu_force_restore(fpu); - ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf); + ret = copy_uabi_from_kernel_to_xstate(fpu->fpstate, kbuf ?: tmpbuf, &target->thread.pkru); out: vfree(tmpbuf); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c88c60bac3ae..d6f414af00cb 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1268,7 +1268,7 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, * Convert from a ptrace standard-format kernel buffer to kernel XSAVE[S] * format and copy to the target thread. Used by ptrace and KVM. */ -int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf) +int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru) { return copy_uabi_to_xstate(fpstate, kbuf, NULL); } diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index f08ee2722e74..a4ecb04d8d64 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -46,7 +46,7 @@ extern void __copy_xstate_to_uabi_buf(struct membuf to, struct fpstate *fpstate, u32 pkru_val, enum xstate_copy_mode copy_mode); extern void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk, enum xstate_copy_mode mode); -extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf); +extern int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru); extern int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf); From bfa72faf69e82f7d64fefe0ca162f803d98e6679 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:29 -0800 Subject: [PATCH 164/543] x86/fpu: Add a pkru argument to copy_uabi_to_xstate() commit 2c87767c35ee9744f666ccec869d5fe742c3de0a upstream. In preparation for moving PKRU handling code out of fpu_copy_uabi_to_guest_fpstate() and into copy_uabi_to_xstate(), add an argument that copy_uabi_from_kernel_to_xstate() can use to pass the canonical location of the PKRU value. For copy_sigframe_from_user_to_xstate() the kernel will actually restore the PKRU value from the fpstate, but pass in the thread_struct's pkru location anyways for consistency. Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-4-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/xstate.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d6f414af00cb..d657c8b1fb08 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1200,8 +1200,18 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, } +/** + * copy_uabi_to_xstate - Copy a UABI format buffer to the kernel xstate + * @fpstate: The fpstate buffer to copy to + * @kbuf: The UABI format buffer, if it comes from the kernel + * @ubuf: The UABI format buffer, if it comes from userspace + * @pkru: unused + * + * Converts from the UABI format into the kernel internal hardware + * dependent format. + */ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, - const void __user *ubuf) + const void __user *ubuf, u32 *pkru) { struct xregs_state *xsave = &fpstate->regs.xsave; unsigned int offset, size; @@ -1270,7 +1280,7 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, */ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u32 *pkru) { - return copy_uabi_to_xstate(fpstate, kbuf, NULL); + return copy_uabi_to_xstate(fpstate, kbuf, NULL, pkru); } /* @@ -1281,7 +1291,7 @@ int copy_uabi_from_kernel_to_xstate(struct fpstate *fpstate, const void *kbuf, u int copy_sigframe_from_user_to_xstate(struct task_struct *tsk, const void __user *ubuf) { - return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf); + return copy_uabi_to_xstate(tsk->thread.fpu.fpstate, NULL, ubuf, &tsk->thread.pkru); } static bool validate_independent_components(u64 mask) From 587478470b0a632c706090a8b0cbf59de0e98b48 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:30 -0800 Subject: [PATCH 165/543] x86/fpu: Allow PKRU to be (once again) written by ptrace. commit 4a804c4f8356393d6b5eff7600f07615d7869c13 upstream. Move KVM's PKRU handling code in fpu_copy_uabi_to_guest_fpstate() to copy_uabi_to_xstate() so that it is shared with other APIs that write the XSTATE such as PTRACE_SETREGSET with NT_X86_XSTATE. This restores the pre-5.14 behavior of ptrace. The regression can be seen by running gdb and executing `p $pkru`, `set $pkru = 42`, and `p $pkru`. On affected kernels (5.14+) the write to the PKRU register (which gdb performs through ptrace) is ignored. [ dhansen: removed stable@ tag for now. The ABI was broken for long enough that this is not urgent material. Let's let it stew in tip for a few weeks before it's submitted to stable because there are so many ABIs potentially affected. ] Fixes: e84ba47e313d ("x86/fpu: Hook up PKRU into ptrace()") Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-5-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/core.c | 13 +------------ arch/x86/kernel/fpu/xstate.c | 21 ++++++++++++++++++++- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index db3300a617b7..4bca73e065c6 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -391,8 +391,6 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, { struct fpstate *kstate = gfpu->fpstate; const union fpregs_state *ustate = buf; - struct pkru_state *xpkru; - int ret; if (!cpu_feature_enabled(X86_FEATURE_XSAVE)) { if (ustate->xsave.header.xfeatures & ~XFEATURE_MASK_FPSSE) @@ -406,16 +404,7 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, if (ustate->xsave.header.xfeatures & ~xcr0) return -EINVAL; - ret = copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); - if (ret) - return ret; - - /* Retrieve PKRU if not in init state */ - if (kstate->regs.xsave.header.xfeatures & XFEATURE_MASK_PKRU) { - xpkru = get_xsave_addr(&kstate->regs.xsave, XFEATURE_PKRU); - *vpkru = xpkru->pkru; - } - return 0; + return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); } EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate); #endif /* CONFIG_KVM */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index d657c8b1fb08..a8cf604d8a25 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1205,10 +1205,22 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, * @fpstate: The fpstate buffer to copy to * @kbuf: The UABI format buffer, if it comes from the kernel * @ubuf: The UABI format buffer, if it comes from userspace - * @pkru: unused + * @pkru: The location to write the PKRU value to * * Converts from the UABI format into the kernel internal hardware * dependent format. + * + * This function ultimately has three different callers with distinct PKRU + * behavior. + * 1. When called from sigreturn the PKRU register will be restored from + * @fpstate via an XRSTOR. Correctly copying the UABI format buffer to + * @fpstate is sufficient to cover this case, but the caller will also + * pass a pointer to the thread_struct's pkru field in @pkru and updating + * it is harmless. + * 2. When called from ptrace the PKRU register will be restored from the + * thread_struct's pkru field. A pointer to that is passed in @pkru. + * 3. When called from KVM the PKRU register will be restored from the vcpu's + * pkru field. A pointer to that is passed in @pkru. */ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, const void __user *ubuf, u32 *pkru) @@ -1260,6 +1272,13 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, } } + if (hdr.xfeatures & XFEATURE_MASK_PKRU) { + struct pkru_state *xpkru; + + xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); + *pkru = xpkru->pkru; + } + /* * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': From d2602da3da87a1832f708c8639b0afbfb5704cd4 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:31 -0800 Subject: [PATCH 166/543] x86/fpu: Emulate XRSTOR's behavior if the xfeatures PKRU bit is not set commit d7e5aceace514a2b1b3ca3dc44f93f1704766ca7 upstream. The hardware XRSTOR instruction resets the PKRU register to its hardware init value (namely 0) if the PKRU bit is not set in the xfeatures mask. Emulating that here restores the pre-5.14 behavior for PTRACE_SET_REGSET with NT_X86_XSTATE, and makes sigreturn (which still uses XRSTOR) and ptrace behave identically. KVM has never used XRSTOR and never had this behavior, so KVM opts-out of this emulation by passing a NULL pkru pointer to copy_uabi_to_xstate(). Fixes: e84ba47e313d ("x86/fpu: Hook up PKRU into ptrace()") Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-6-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/core.c | 8 ++++++++ arch/x86/kernel/fpu/xstate.c | 15 ++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 4bca73e065c6..9baa89a8877d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -404,6 +404,14 @@ int fpu_copy_uabi_to_guest_fpstate(struct fpu_guest *gfpu, const void *buf, if (ustate->xsave.header.xfeatures & ~xcr0) return -EINVAL; + /* + * Nullify @vpkru to preserve its current value if PKRU's bit isn't set + * in the header. KVM's odd ABI is to leave PKRU untouched in this + * case (all other components are eventually re-initialized). + */ + if (!(ustate->xsave.header.xfeatures & XFEATURE_MASK_PKRU)) + vpkru = NULL; + return copy_uabi_from_kernel_to_xstate(kstate, ustate, vpkru); } EXPORT_SYMBOL_GPL(fpu_copy_uabi_to_guest_fpstate); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index a8cf604d8a25..714166cc25f2 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1219,8 +1219,14 @@ static int copy_from_buffer(void *dst, unsigned int offset, unsigned int size, * it is harmless. * 2. When called from ptrace the PKRU register will be restored from the * thread_struct's pkru field. A pointer to that is passed in @pkru. + * The kernel will restore it manually, so the XRSTOR behavior that resets + * the PKRU register to the hardware init value (0) if the corresponding + * xfeatures bit is not set is emulated here. * 3. When called from KVM the PKRU register will be restored from the vcpu's - * pkru field. A pointer to that is passed in @pkru. + * pkru field. A pointer to that is passed in @pkru. KVM hasn't used + * XRSTOR and hasn't had the PKRU resetting behavior described above. To + * preserve that KVM behavior, it passes NULL for @pkru if the xfeatures + * bit is not set. */ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, const void __user *ubuf, u32 *pkru) @@ -1277,6 +1283,13 @@ static int copy_uabi_to_xstate(struct fpstate *fpstate, const void *kbuf, xpkru = __raw_xsave_addr(xsave, XFEATURE_PKRU); *pkru = xpkru->pkru; + } else { + /* + * KVM may pass NULL here to indicate that it does not need + * PKRU updated. + */ + if (pkru) + *pkru = 0; } /* From 29fbaa434c223f29917e8de054cc48a96e6c0ed4 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 15 Nov 2022 15:09:32 -0800 Subject: [PATCH 167/543] selftests/vm/pkeys: Add a regression test for setting PKRU through ptrace commit 6ea25770b043c7997ab21d1ce95ba5de4d3d85d9 upstream. This tests PTRACE_SETREGSET with NT_X86_XSTATE modifying PKRU directly and removing the PKRU bit from XSTATE_BV. Signed-off-by: Kyle Huey Signed-off-by: Dave Hansen Link: https://lore.kernel.org/all/20221115230932.7126-7-khuey%40kylehuey.com Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/vm/pkey-x86.h | 12 ++ tools/testing/selftests/vm/protection_keys.c | 131 ++++++++++++++++++- 2 files changed, 141 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h index b078ce9c6d2a..72c14cd3ddc7 100644 --- a/tools/testing/selftests/vm/pkey-x86.h +++ b/tools/testing/selftests/vm/pkey-x86.h @@ -104,6 +104,18 @@ static inline int cpu_has_pkeys(void) return 1; } +static inline int cpu_max_xsave_size(void) +{ + unsigned long XSTATE_CPUID = 0xd; + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + + __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx); + return ecx; +} + static inline u32 pkey_bit_position(int pkey) { return pkey * PKEY_BITS_PER_PKEY; diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c index 291bc1e07842..95f403a0c46d 100644 --- a/tools/testing/selftests/vm/protection_keys.c +++ b/tools/testing/selftests/vm/protection_keys.c @@ -18,12 +18,13 @@ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks * * Compile like this: - * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm - * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm + * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm */ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ #include +#include #include #include #include @@ -1550,6 +1551,129 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) do_not_expect_pkey_fault("plain read on recently PROT_EXEC area"); } +#if defined(__i386__) || defined(__x86_64__) +void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +{ + u32 new_pkru; + pid_t child; + int status, ret; + int pkey_offset = pkey_reg_xstate_offset(); + size_t xsave_size = cpu_max_xsave_size(); + void *xsave; + u32 *pkey_register; + u64 *xstate_bv; + struct iovec iov; + + new_pkru = ~read_pkey_reg(); + /* Don't make PROT_EXEC mappings inaccessible */ + new_pkru &= ~3; + + child = fork(); + pkey_assert(child >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), child); + if (!child) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + /* Stop and allow the tracer to modify PKRU directly */ + raise(SIGSTOP); + + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + if (__read_pkey_reg() != new_pkru) + exit(1); + + /* Stop and allow the tracer to clear XSTATE_BV for PKRU */ + raise(SIGSTOP); + + if (__read_pkey_reg() != 0) + exit(1); + + /* Stop and allow the tracer to examine PKRU */ + raise(SIGSTOP); + + exit(0); + } + + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + xsave = (void *)malloc(xsave_size); + pkey_assert(xsave > 0); + + /* Modify the PKRU register directly */ + iov.iov_base = xsave; + iov.iov_len = xsave_size; + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + pkey_register = (u32 *)(xsave + pkey_offset); + pkey_assert(*pkey_register == read_pkey_reg()); + + *pkey_register = new_pkru; + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Execute the tracee */ + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value change */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == new_pkru); + + /* Clear the PKRU bit from XSTATE_BV */ + xstate_bv = (u64 *)(xsave + 512); + *xstate_bv &= ~(1 << 9); + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value go to 0 */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(xsave, 0xCC, xsave_size); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov); + pkey_assert(ret == 0); + pkey_assert(*pkey_register == 0); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFEXITED(status)); + pkey_assert(WEXITSTATUS(status) == 0); + free(xsave); +} +#endif + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1585,6 +1709,9 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, test_pkey_alloc_free_attach_pkey0, +#if defined(__i386__) || defined(__x86_64__) + test_ptrace_modifies_pkru, +#endif }; void run_tests_once(void) From 8cc0e63ba86c9f4e732de9f42642231c8e3a31d2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 6 Jan 2023 12:43:37 -0500 Subject: [PATCH 168/543] Revert "SUNRPC: Use RMW bitops in single-threaded hot paths" commit 7827c81f0248e3c2f40d438b020f3d222f002171 upstream. The premise that "Once an svc thread is scheduled and executing an RPC, no other processes will touch svc_rqst::rq_flags" is false. svc_xprt_enqueue() examines the RQ_BUSY flag in scheduled nfsd threads when determining which thread to wake up next. Found via KCSAN. Fixes: 28df0988815f ("SUNRPC: Use RMW bitops in single-threaded hot paths") Reviewed-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4proc.c | 7 +++---- fs/nfsd/nfs4xdr.c | 2 +- net/sunrpc/auth_gss/svcauth_gss.c | 4 ++-- net/sunrpc/svc.c | 6 +++--- net/sunrpc/svc_xprt.c | 2 +- net/sunrpc/svcsock.c | 8 ++++---- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- 7 files changed, 15 insertions(+), 16 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 34d1cd5883fb..c7329523a10f 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -928,7 +928,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, * the client wants us to do more in this compound: */ if (!nfsd4_last_compound_op(rqstp)) - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* check stateid */ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, @@ -2615,12 +2615,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) cstate->minorversion = args->minorversion; fh_init(current_fh, NFS4_FHSIZE); fh_init(save_fh, NFS4_FHSIZE); - /* * Don't use the deferral mechanism for NFSv4; compounds make it * too hard to avoid non-idempotency problems. */ - __clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); /* * According to RFC3010, this takes precedence over all other errors. @@ -2742,7 +2741,7 @@ encode_op: out: cstate->status = status; /* Reset deferral mechanism for RPC deferrals */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); return rpc_success; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 78849646fe83..8377e14b8fba 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2464,7 +2464,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack) - __clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags); return true; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 9a5db285d4ae..bdc34ea0d939 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -900,7 +900,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g * rejecting the server-computed MIC in this somewhat rare case, * do not use splice with the GSS integrity service. */ - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Did we already verify the signature on the original pass through? */ if (rqstp->rq_deferred) @@ -972,7 +972,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs int pad, remaining_len, offset; u32 rseqno; - __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); priv_len = svc_getnl(&buf->head[0]); if (rqstp->rq_deferred) { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 149171774bc6..24577d1b9907 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1244,10 +1244,10 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto err_short_len; /* Will be turned off by GSS integrity and privacy services */ - __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); + set_bit(RQ_SPLICE_OK, &rqstp->rq_flags); /* Will be turned off only when NFSv4 Sessions are used */ - __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); - __clear_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags); + clear_bit(RQ_DROPME, &rqstp->rq_flags); svc_putu32(resv, rqstp->rq_xid); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2106003645a7..c2ce12538008 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1238,7 +1238,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) trace_svc_defer(rqstp); svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; - __set_bit(RQ_DROPME, &rqstp->rq_flags); + set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; return &dr->handle; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2fc98fea59b4..e833103f4629 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -298,9 +298,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs) static void svc_sock_secure_port(struct svc_rqst *rqstp) { if (svc_port_is_privileged(svc_addr(rqstp))) - __set_bit(RQ_SECURE, &rqstp->rq_flags); + set_bit(RQ_SECURE, &rqstp->rq_flags); else - __clear_bit(RQ_SECURE, &rqstp->rq_flags); + clear_bit(RQ_SECURE, &rqstp->rq_flags); } /* @@ -1008,9 +1008,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags)) - __set_bit(RQ_LOCAL, &rqstp->rq_flags); + set_bit(RQ_LOCAL, &rqstp->rq_flags); else - __clear_bit(RQ_LOCAL, &rqstp->rq_flags); + clear_bit(RQ_LOCAL, &rqstp->rq_flags); p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 199fa012f18a..94b20fb47135 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -602,7 +602,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) static void svc_rdma_secure_port(struct svc_rqst *rqstp) { - __set_bit(RQ_SECURE, &rqstp->rq_flags); + set_bit(RQ_SECURE, &rqstp->rq_flags); } static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt) From cd2e80a3ac7005f964680ae63946cdde4e970e0a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 9 Jan 2023 17:04:49 -0600 Subject: [PATCH 169/543] gcc: disable -Warray-bounds for gcc-11 too commit 5a41237ad1d4b62008f93163af1d9b1da90729d8 upstream. We had already disabled this warning for gcc-12 due to bugs in the value range analysis, but it turns out we end up having some similar problems with gcc-11.3 too, so let's disable it there too. Older gcc versions end up being increasingly less relevant, and hopefully clang and newer version of gcc (ie gcc-13) end up working reliably enough that we still get the build coverage even when we disable this for some versions. Link: https://lore.kernel.org/all/20221227002941.GA2691687@roeck-us.net/ Link: https://lore.kernel.org/all/D8BDBF66-E44C-45D4-9758-BAAA4F0C1998@kernel.org/ Cc: Kees Cook Cc: Vlastimil Babka Cc: Guenter Roeck Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- init/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/init/Kconfig b/init/Kconfig index 94125d3b6893..0c214af99085 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -892,13 +892,17 @@ config CC_IMPLICIT_FALLTHROUGH default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5) default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough) -# Currently, disable gcc-12 array-bounds globally. +# Currently, disable gcc-11,12 array-bounds globally. # We may want to target only particular configurations some day. +config GCC11_NO_ARRAY_BOUNDS + def_bool y + config GCC12_NO_ARRAY_BOUNDS def_bool y config CC_NO_ARRAY_BOUNDS bool + default y if CC_IS_GCC && GCC_VERSION >= 110000 && GCC_VERSION < 120000 && GCC11_NO_ARRAY_BOUNDS default y if CC_IS_GCC && GCC_VERSION >= 120000 && GCC_VERSION < 130000 && GCC12_NO_ARRAY_BOUNDS # From e8988e878af693ac13b0fa80ba2e72d22d68f2dd Mon Sep 17 00:00:00 2001 From: Frederick Lawler Date: Mon, 9 Jan 2023 10:39:06 -0600 Subject: [PATCH 170/543] net: sched: disallow noqueue for qdisc classes commit 96398560f26aa07e8f2969d73c8197e6a6d10407 upstream. While experimenting with applying noqueue to a classful queue discipline, we discovered a NULL pointer dereference in the __dev_queue_xmit() path that generates a kernel OOPS: # dev=enp0s5 # tc qdisc replace dev $dev root handle 1: htb default 1 # tc class add dev $dev parent 1: classid 1:1 htb rate 10mbit # tc qdisc add dev $dev parent 1:1 handle 10: noqueue # ping -I $dev -w 1 -c 1 1.1.1.1 [ 2.172856] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 2.173217] #PF: supervisor instruction fetch in kernel mode ... [ 2.178451] Call Trace: [ 2.178577] [ 2.178686] htb_enqueue+0x1c8/0x370 [ 2.178880] dev_qdisc_enqueue+0x15/0x90 [ 2.179093] __dev_queue_xmit+0x798/0xd00 [ 2.179305] ? _raw_write_lock_bh+0xe/0x30 [ 2.179522] ? __local_bh_enable_ip+0x32/0x70 [ 2.179759] ? ___neigh_create+0x610/0x840 [ 2.179968] ? eth_header+0x21/0xc0 [ 2.180144] ip_finish_output2+0x15e/0x4f0 [ 2.180348] ? dst_output+0x30/0x30 [ 2.180525] ip_push_pending_frames+0x9d/0xb0 [ 2.180739] raw_sendmsg+0x601/0xcb0 [ 2.180916] ? _raw_spin_trylock+0xe/0x50 [ 2.181112] ? _raw_spin_unlock_irqrestore+0x16/0x30 [ 2.181354] ? get_page_from_freelist+0xcd6/0xdf0 [ 2.181594] ? sock_sendmsg+0x56/0x60 [ 2.181781] sock_sendmsg+0x56/0x60 [ 2.181958] __sys_sendto+0xf7/0x160 [ 2.182139] ? handle_mm_fault+0x6e/0x1d0 [ 2.182366] ? do_user_addr_fault+0x1e1/0x660 [ 2.182627] __x64_sys_sendto+0x1b/0x30 [ 2.182881] do_syscall_64+0x38/0x90 [ 2.183085] entry_SYSCALL_64_after_hwframe+0x63/0xcd ... [ 2.187402] Previously in commit d66d6c3152e8 ("net: sched: register noqueue qdisc"), NULL was set for the noqueue discipline on noqueue init so that __dev_queue_xmit() falls through for the noqueue case. This also sets a bypass of the enqueue NULL check in the register_qdisc() function for the struct noqueue_disc_ops. Classful queue disciplines make it past the NULL check in __dev_queue_xmit() because the discipline is set to htb (in this case), and then in the call to __dev_xmit_skb(), it calls into htb_enqueue() which grabs a leaf node for a class and then calls qdisc_enqueue() by passing in a queue discipline which assumes ->enqueue() is not set to NULL. Fix this by not allowing classes to be assigned to the noqueue discipline. Linux TC Notes states that classes cannot be set to the noqueue discipline. [1] Let's enforce that here. Links: 1. https://linux-tc-notes.sourceforge.net/tc/doc/sch_noqueue.txt Fixes: d66d6c3152e8 ("net: sched: register noqueue qdisc") Cc: stable@vger.kernel.org Signed-off-by: Frederick Lawler Reviewed-by: Jakub Sitnicki Link: https://lore.kernel.org/r/20230109163906.706000-1-fred@cloudflare.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4a27dfb1ba0f..c82532e20699 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1132,6 +1132,11 @@ skip: return -ENOENT; } + if (new && new->ops == &noqueue_qdisc_ops) { + NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); + return -EINVAL; + } + err = cops->graft(parent, cl, new, &old, extack); if (err) return err; From d6ad4bd1d896ae1daffd7628cd50f124280fb8b1 Mon Sep 17 00:00:00 2001 From: Clement Lecigne Date: Fri, 13 Jan 2023 13:07:45 +0100 Subject: [PATCH 171/543] ALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF commit 56b88b50565cd8b946a2d00b0c83927b7ebb055e upstream. Takes rwsem lock inside snd_ctl_elem_read instead of snd_ctl_elem_read_user like it was done for write in commit 1fa4445f9adf1 ("ALSA: control - introduce snd_ctl_notify_one() helper"). Doing this way we are also fixing the following locking issue happening in the compat path which can be easily triggered and turned into an use-after-free. 64-bits: snd_ctl_ioctl snd_ctl_elem_read_user [takes controls_rwsem] snd_ctl_elem_read [lock properly held, all good] [drops controls_rwsem] 32-bits: snd_ctl_ioctl_compat snd_ctl_elem_write_read_compat ctl_elem_write_read snd_ctl_elem_read [missing lock, not good] CVE-2023-0266 was assigned for this issue. Cc: stable@kernel.org # 5.13+ Signed-off-by: Clement Lecigne Reviewed-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230113120745.25464-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/sound/core/control.c b/sound/core/control.c index 50e7ba66f187..82aa1af1d1d8 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1203,14 +1203,19 @@ static int snd_ctl_elem_read(struct snd_card *card, const u32 pattern = 0xdeadbeef; int ret; + down_read(&card->controls_rwsem); kctl = snd_ctl_find_id(card, &control->id); - if (kctl == NULL) - return -ENOENT; + if (kctl == NULL) { + ret = -ENOENT; + goto unlock; + } index_offset = snd_ctl_get_ioff(kctl, &control->id); vd = &kctl->vd[index_offset]; - if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) - return -EPERM; + if (!(vd->access & SNDRV_CTL_ELEM_ACCESS_READ) || kctl->get == NULL) { + ret = -EPERM; + goto unlock; + } snd_ctl_build_ioff(&control->id, kctl, index_offset); @@ -1220,7 +1225,7 @@ static int snd_ctl_elem_read(struct snd_card *card, info.id = control->id; ret = __snd_ctl_elem_info(card, kctl, &info, NULL); if (ret < 0) - return ret; + goto unlock; #endif if (!snd_ctl_skip_validation(&info)) @@ -1230,7 +1235,7 @@ static int snd_ctl_elem_read(struct snd_card *card, ret = kctl->get(kctl, control); snd_power_unref(card); if (ret < 0) - return ret; + goto unlock; if (!snd_ctl_skip_validation(&info) && sanity_check_elem_value(card, control, &info, pattern) < 0) { dev_err(card->dev, @@ -1238,8 +1243,11 @@ static int snd_ctl_elem_read(struct snd_card *card, control->id.iface, control->id.device, control->id.subdevice, control->id.name, control->id.index); - return -EINVAL; + ret = -EINVAL; + goto unlock; } +unlock: + up_read(&card->controls_rwsem); return ret; } @@ -1253,9 +1261,7 @@ static int snd_ctl_elem_read_user(struct snd_card *card, if (IS_ERR(control)) return PTR_ERR(control); - down_read(&card->controls_rwsem); result = snd_ctl_elem_read(card, control); - up_read(&card->controls_rwsem); if (result < 0) goto error; From 0e19738afc84fc2f8ed1ad1baa0373fcb8ebca36 Mon Sep 17 00:00:00 2001 From: Adrian Chan Date: Mon, 9 Jan 2023 16:05:20 -0500 Subject: [PATCH 172/543] ALSA: hda/hdmi: Add a HP device 0x8715 to force connect list commit de1ccb9e61728dd941fe0e955a7a129418657267 upstream. Add the 'HP Engage Flex Mini' device to the force connect list to enable audio through HDMI. Signed-off-by: Adrian Chan Cc: Link: https://lore.kernel.org/r/20230109210520.16060-1-adchan@google.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 386dd9d9143f..9ea633fe9339 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1981,6 +1981,7 @@ static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), + SND_PCI_QUIRK(0x103c, 0x8715, "HP", 1), SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), {} From 57f6bea5218c37426ccf68b37fa66a0f476ef869 Mon Sep 17 00:00:00 2001 From: Jeremy Szu Date: Thu, 5 Jan 2023 12:41:53 +0800 Subject: [PATCH 173/543] ALSA: hda/realtek: fix mute/micmute LEDs don't work for a HP platform commit 9c694fbfe6f36017b060ad74c7565cb379852e40 upstream. There is a HP platform uses ALC236 codec which using GPIO2 to control mute LED and GPIO1 to control micmute LED. Thus, add a quirk to make them work. Signed-off-by: Jeremy Szu Cc: Link: https://lore.kernel.org/r/20230105044154.8242-1-jeremy.szu@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 3794b522c222..9865cc5dd56c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9406,6 +9406,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), From 8107550bfe1189d7459292a66aab30a74c349cda Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 5 Jan 2023 10:35:30 +0100 Subject: [PATCH 174/543] ALSA: hda: cs35l41: Don't return -EINVAL from system suspend/resume commit 15a59cb0a3d6ddf2cb79f8dc3081b3130aad3767 upstream. The recent commit to support the system suspend for CS35L41 caused a regression on the models with CS35L41_EXT_BOOST_NO_VSPK_SWITC boost type, as the suspend/resume callbacks just return -EINVAL. This is eventually handled as a fatal error and blocks the whole system suspend/resume. For avoiding the problem, this patch corrects the return code from cs35l41_system_suspend() and _resume() to 0, and replace dev_err() with dev_err_once() for stop spamming too much. Fixes: 88672826e2a4 ("ALSA: hda: cs35l41: Support System Suspend") Cc: Link: https://lore.kernel.org/all/e6751ac2-34f3-d13f-13db-8174fade8308@pm.me Link: https://lore.kernel.org/r/20230105093531.16960-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/cs35l41_hda.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index e5f0549bf06d..c64420d389b0 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -598,8 +598,8 @@ static int cs35l41_system_suspend(struct device *dev) dev_dbg(cs35l41->dev, "System Suspend\n"); if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) { - dev_err(cs35l41->dev, "System Suspend not supported\n"); - return -EINVAL; + dev_err_once(cs35l41->dev, "System Suspend not supported\n"); + return 0; /* don't block the whole system suspend */ } ret = pm_runtime_force_suspend(dev); @@ -624,8 +624,8 @@ static int cs35l41_system_resume(struct device *dev) dev_dbg(cs35l41->dev, "System Resume\n"); if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) { - dev_err(cs35l41->dev, "System Resume not supported\n"); - return -EINVAL; + dev_err_once(cs35l41->dev, "System Resume not supported\n"); + return 0; /* don't block the whole system resume */ } if (cs35l41->reset_gpio) { From a6d58eff4f8367af588c49cbceed28147b1030c7 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Tue, 3 Jan 2023 17:53:32 +0800 Subject: [PATCH 175/543] ALSA: hda - Enable headset mic on another Dell laptop with ALC3254 commit a5751933a7f6abbdad90d98f25a25bb4b133a9e6 upstream. There is another Dell Latitude laptop (1028:0c03) with Realtek codec ALC3254 which needs the ALC269_FIXUP_DELL4_MIC_NO_PRESENCE instead of the default matched ALC269_FIXUP_DELL1_MIC_NO_PRESENCE. Apply correct fixup for this particular model to enable headset mic. Signed-off-by: Chris Chiu Cc: Link: https://lore.kernel.org/r/20230103095332.730677-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9865cc5dd56c..764eb07bbaff 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9239,6 +9239,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0b1a, "Dell Precision 5570", ALC289_FIXUP_DUAL_SPK), SND_PCI_QUIRK(0x1028, 0x0b37, "Dell Inspiron 16 Plus 7620 2-in-1", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), SND_PCI_QUIRK(0x1028, 0x0b71, "Dell Inspiron 16 Plus 7620", ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS), + SND_PCI_QUIRK(0x1028, 0x0c03, "Dell Precision 5340", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x0c19, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1a, "Dell Precision 3340", ALC236_FIXUP_DELL_DUAL_CODECS), SND_PCI_QUIRK(0x1028, 0x0c1b, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS), From cb9b3951d6366c30f0085c9830381b1334cdcf62 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 5 Jan 2023 10:35:31 +0100 Subject: [PATCH 176/543] ALSA: hda: cs35l41: Check runtime suspend capability at runtime_idle commit ae50e2ab122cef68f46b7799fb9deffe3334f5e2 upstream. The runtime PM core checks with runtime_idle callback whether it can goes to the runtime suspend or not, and we can put the boost type check there instead of runtime_suspend and _resume calls. This will reduce the unnecessary runtime_suspend() calls. Fixes: 1873ebd30cc8 ("ALSA: hda: cs35l41: Support Hibernation during Suspend") Cc: Link: https://lore.kernel.org/r/20230105093531.16960-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/cs35l41_hda.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index c64420d389b0..a5b10a6a33a5 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -647,6 +647,15 @@ static int cs35l41_system_resume(struct device *dev) return ret; } +static int cs35l41_runtime_idle(struct device *dev) +{ + struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); + + if (cs35l41->hw_cfg.bst_type == CS35L41_EXT_BOOST_NO_VSPK_SWITCH) + return -EBUSY; /* suspend not supported yet on this model */ + return 0; +} + static int cs35l41_runtime_suspend(struct device *dev) { struct cs35l41_hda *cs35l41 = dev_get_drvdata(dev); @@ -1536,7 +1545,8 @@ void cs35l41_hda_remove(struct device *dev) EXPORT_SYMBOL_NS_GPL(cs35l41_hda_remove, SND_HDA_SCODEC_CS35L41); const struct dev_pm_ops cs35l41_hda_pm_ops = { - RUNTIME_PM_OPS(cs35l41_runtime_suspend, cs35l41_runtime_resume, NULL) + RUNTIME_PM_OPS(cs35l41_runtime_suspend, cs35l41_runtime_resume, + cs35l41_runtime_idle) SYSTEM_SLEEP_PM_OPS(cs35l41_system_suspend, cs35l41_system_resume) }; EXPORT_SYMBOL_NS_GPL(cs35l41_hda_pm_ops, SND_HDA_SCODEC_CS35L41); From 38f3ee12661fdc2805e06942e4e3d604e03cd9cf Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 14 Jan 2023 10:33:46 +0100 Subject: [PATCH 177/543] Linux 6.1.6 Link: https://lore.kernel.org/r/20230112135326.981869724@linuxfoundation.org Tested-by: Conor Dooley Tested-by: Ronald Warsow Tested-by: Shuah Khan Tested-by: Kelsey Steele Tested-by: Takeshi Ogasawara Tested-by: Guenter Roeck Tested-by: Bagas Sanjaya Tested-by: Ron Economos Tested-by: Sudip Mukherjee Tested-by: Jon Hunter Tested-by: Linux Kernel Functional Testing Tested-by: Allen Pais Tested-by: Rudi Heitbaum Tested-by: Salvatore Bonaccorso Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ddbd2fc917c5..19e8c6dec6e5 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 5 +SUBLEVEL = 6 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From 76ef74d4a379faa451003621a84e3498044e7aa3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 11 Jan 2023 17:07:33 +0100 Subject: [PATCH 178/543] netfilter: nft_payload: incorrect arithmetics when fetching VLAN header bits commit 696e1a48b1a1b01edad542a1ef293665864a4dd0 upstream. If the offset + length goes over the ethernet + vlan header, then the length is adjusted to copy the bytes that are within the boundaries of the vlan_ethhdr scratchpad area. The remaining bytes beyond ethernet + vlan header are copied directly from the skbuff data area. Fix incorrect arithmetic operator: subtract, not add, the size of the vlan header in case of double-tagged packets to adjust the length accordingly to address CVE-2023-0179. Reported-by: Davide Ornaghi Fixes: f6ae9f120dad ("netfilter: nft_payload: add C-VLAN support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_payload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 4edd899aeb9b..d7de2ecb287e 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -62,7 +62,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) return false; if (offset + len > VLAN_ETH_HLEN + vlan_hlen) - ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen; + ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen; memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen); From 303a04b6562ec7c96eb91a6648a7e7f7e3e10d41 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Jan 2023 16:09:44 +0100 Subject: [PATCH 179/543] Revert "ALSA: usb-audio: Drop superfluous interface setup at parsing" commit 16f1f838442dc6430d32d51ddda347b8421ec34b upstream. This reverts commit ac5e2fb425e1121ceef2b9d1b3ffccc195d55707. The commit caused a regression on Behringer UMC404HD (and likely others). As the change was meant only as a minor optimization, it's better to revert it to address the regression. Reported-and-tested-by: Michael Ralston Cc: Link: https://lore.kernel.org/r/CAC2975JXkS1A5Tj9b02G_sy25ZWN-ys+tc9wmkoS=qPgKCogSg@mail.gmail.com Link: https://lore.kernel.org/r/20230104150944.24918-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/stream.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index f75601ca2d52..f10f4e6d3fb8 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -1222,6 +1222,12 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip, if (err < 0) return err; } + + /* try to set the interface... */ + usb_set_interface(chip->dev, iface_no, 0); + snd_usb_init_pitch(chip, fp); + snd_usb_init_sample_rate(chip, fp, fp->rate_max); + usb_set_interface(chip->dev, iface_no, altno); } return 0; } From e59e115d67d66d690904708bfef80c400c16a28b Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 9 Jan 2023 16:12:49 +0100 Subject: [PATCH 180/543] ALSA: control-led: use strscpy in set_led_id() commit 70051cffb31b5ee09096351c3b41fcae6f89de31 upstream. The use of strncpy() in the set_led_id() was incorrect. The len variable should use 'min(sizeof(buf2) - 1, count)' expression. Use strscpy() function to simplify things and handle the error gracefully. Fixes: a135dfb5de15 ("ALSA: led control - add sysfs kcontrol LED marking layer") Reported-by: yang.yang29@zte.com.cn Link: https://lore.kernel.org/alsa-devel/202301091945513559977@zte.com.cn/ Cc: Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control_led.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/core/control_led.c b/sound/core/control_led.c index f975cc85772b..3cadd40100f3 100644 --- a/sound/core/control_led.c +++ b/sound/core/control_led.c @@ -530,12 +530,11 @@ static ssize_t set_led_id(struct snd_ctl_led_card *led_card, const char *buf, si bool attach) { char buf2[256], *s, *os; - size_t len = max(sizeof(s) - 1, count); struct snd_ctl_elem_id id; int err; - strncpy(buf2, buf, len); - buf2[len] = '\0'; + if (strscpy(buf2, buf, sizeof(buf2)) < 0) + return -E2BIG; memset(&id, 0, sizeof(id)); id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; s = buf2; From e1e0a181aea375edfae2f9a59070f95d904980d1 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 9 Jan 2023 15:11:33 +0100 Subject: [PATCH 181/543] ALSA: usb-audio: Always initialize fixed_rate in snd_usb_find_implicit_fb_sync_format() commit 291e9da91403e0e628d7692b5ed505100e7b7706 upstream. Handle the fallback code path, too. Fixes: fd28941cff1c ("ALSA: usb-audio: Add new quirk FIXED_RATE for JBL Quantum810 Wireless") BugLink: https://lore.kernel.org/alsa-devel/Y7frf3N%2FxzvESEsN@kili/ Reported-by: Dan Carpenter Cc: Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230109141133.335543-1-perex@perex.cz Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/implicit.c | 3 ++- sound/usb/pcm.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 41ac7185b42b..4727043fd745 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -471,7 +471,7 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, subs = find_matching_substream(chip, stream, target->sync_ep, target->fmt_type); if (!subs) - return sync_fmt; + goto end; high_score = 0; list_for_each_entry(fp, &subs->fmt_list, list) { @@ -485,6 +485,7 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, } } + end: if (fixed_rate) *fixed_rate = snd_usb_pcm_has_fixed_rate(subs); return sync_fmt; diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 99a66d0ef5b2..d2c652aa1385 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -163,6 +163,8 @@ bool snd_usb_pcm_has_fixed_rate(struct snd_usb_substream *subs) struct snd_usb_audio *chip = subs->stream->chip; int rate = -1; + if (!subs) + return false; if (!(chip->quirk_flags & QUIRK_FLAG_FIXED_RATE)) return false; list_for_each_entry(fp, &subs->fmt_list, list) { From 779c7c7ea8e011f8ba404dbc73964ce75ef421ba Mon Sep 17 00:00:00 2001 From: Yuchi Yang Date: Fri, 30 Dec 2022 15:22:25 +0800 Subject: [PATCH 182/543] ALSA: hda/realtek - Turn on power early commit 1f680609bf1beac20e2a31ddcb1b88874123c39f upstream. Turn on power early to avoid wrong state for power relation register. This can earlier update JD state when resume back. Signed-off-by: Yuchi Yang Cc: Link: https://lore.kernel.org/r/e35d8f4fa18f4448a2315cc7d4a3715f@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 764eb07bbaff..8362eb4642d8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3564,6 +3564,15 @@ static void alc256_init(struct hda_codec *codec) hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; + if (spec->ultra_low_power) { + alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1); + alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2); + alc_update_coef_idx(codec, 0x08, 7<<4, 0); + alc_update_coef_idx(codec, 0x3b, 1<<15, 0); + alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); + msleep(30); + } + if (!hp_pin) hp_pin = 0x21; @@ -3575,14 +3584,6 @@ static void alc256_init(struct hda_codec *codec) msleep(2); alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ - if (spec->ultra_low_power) { - alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1); - alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2); - alc_update_coef_idx(codec, 0x08, 7<<4, 0); - alc_update_coef_idx(codec, 0x3b, 1<<15, 0); - alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); - msleep(30); - } snd_hda_codec_write(codec, hp_pin, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); @@ -3713,6 +3714,13 @@ static void alc225_init(struct hda_codec *codec) hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp1_pin_sense, hp2_pin_sense; + if (spec->ultra_low_power) { + alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2); + alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); + alc_update_coef_idx(codec, 0x33, 1<<11, 0); + msleep(30); + } + if (spec->codec_variant != ALC269_TYPE_ALC287 && spec->codec_variant != ALC269_TYPE_ALC245) /* required only at boot or S3 and S4 resume time */ @@ -3734,12 +3742,6 @@ static void alc225_init(struct hda_codec *codec) msleep(2); alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ - if (spec->ultra_low_power) { - alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2); - alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); - alc_update_coef_idx(codec, 0x33, 1<<11, 0); - msleep(30); - } if (hp1_pin_sense || spec->ultra_low_power) snd_hda_codec_write(codec, hp_pin, 0, From 82972d60f46f3241e3aa04a12c05dca0c2a1e166 Mon Sep 17 00:00:00 2001 From: Luka Guzenko Date: Tue, 10 Jan 2023 21:25:14 +0100 Subject: [PATCH 183/543] ALSA: hda/realtek: Enable mute/micmute LEDs on HP Spectre x360 13-aw0xxx commit ca88eeb308a221c2dcd4a64031d2e5fcd3db9eaa upstream. The HP Spectre x360 13-aw0xxx devices use the ALC285 codec with GPIO 0x04 controlling the micmute LED and COEF 0x0b index 8 controlling the mute LED. A quirk was added to make these work as well as a fixup. Signed-off-by: Luka Guzenko Cc: Link: https://lore.kernel.org/r/20230110202514.2792-1-l.guzenko@web.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8362eb4642d8..6fab7c8fc19a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4646,6 +4646,16 @@ static void alc285_fixup_hp_coef_micmute_led(struct hda_codec *codec, } } +static void alc285_fixup_hp_gpio_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->micmute_led_polarity = 1; + alc_fixup_hp_gpio_led(codec, action, 0, 0x04); +} + static void alc236_fixup_hp_coef_micmute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -4667,6 +4677,13 @@ static void alc285_fixup_hp_mute_led(struct hda_codec *codec, alc285_fixup_hp_coef_micmute_led(codec, fix, action); } +static void alc285_fixup_hp_spectre_x360_mute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + alc285_fixup_hp_mute_led_coefbit(codec, fix, action); + alc285_fixup_hp_gpio_micmute_led(codec, fix, action); +} + static void alc236_fixup_hp_mute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -7108,6 +7125,7 @@ enum { ALC285_FIXUP_ASUS_G533Z_PINS, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, + ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED, ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, @@ -8488,6 +8506,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_mute_led, }, + [ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_spectre_x360_mute_led, + }, [ALC236_FIXUP_HP_GPIO_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_gpio_led, @@ -9330,6 +9352,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x86f9, "HP Spectre x360 13-aw0xxx", ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), From d778e68faa6a8f42a0bf19c1d9dd9d19b21d5c0a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 22 Oct 2022 04:17:53 -0400 Subject: [PATCH 184/543] KVM: x86: Do not return host topology information from KVM_GET_SUPPORTED_CPUID commit 45e966fcca03ecdcccac7cb236e16eea38cc18af upstream. Passing the host topology to the guest is almost certainly wrong and will confuse the scheduler. In addition, several fields of these CPUID leaves vary on each processor; it is simply impossible to return the right values from KVM_GET_SUPPORTED_CPUID in such a way that they can be passed to KVM_SET_CPUID2. The values that will most likely prevent confusion are all zeroes. Userspace will have to override it anyway if it wishes to present a specific topology to the guest. Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- Documentation/virt/kvm/api.rst | 14 ++++++++++++++ arch/x86/kvm/cpuid.c | 34 +++++++++++++++++----------------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 896914e3a847..b8ec88ef2efa 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8248,6 +8248,20 @@ CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID`` It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel has enabled in-kernel emulation of the local APIC. +CPU topology +~~~~~~~~~~~~ + +Several CPUID values include topology information for the host CPU: +0x0b and 0x1f for Intel systems, 0x8000001e for AMD systems. Different +versions of KVM return different values for this information and userspace +should not rely on it. Currently they return all zeroes. + +If userspace wishes to set up a guest topology, it should be careful that +the values of these three leaves differ for each CPU. In particular, +the APIC ID is found in EDX for all subleaves of 0x0b and 0x1f, and in EAX +for 0x8000001e; the latter also encodes the core id and node id in bits +7:0 of EBX and ECX respectively. + Obsolete ioctls and capabilities ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 62bc7a01cecc..6047dbe04880 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -759,15 +759,21 @@ struct kvm_cpuid_array { int nent; }; -static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, - u32 function, u32 index) +static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array) { - struct kvm_cpuid_entry2 *entry; - if (array->nent >= array->maxnent) return NULL; - entry = &array->entries[array->nent++]; + return &array->entries[array->nent++]; +} + +static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, + u32 function, u32 index) +{ + struct kvm_cpuid_entry2 *entry = get_next_cpuid(array); + + if (!entry) + return NULL; memset(entry, 0, sizeof(*entry)); entry->function = function; @@ -945,22 +951,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = edx.full; break; } - /* - * Per Intel's SDM, the 0x1f is a superset of 0xb, - * thus they can be handled by common code. - */ case 0x1f: case 0xb: /* - * Populate entries until the level type (ECX[15:8]) of the - * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is - * the starting entry, filled by the primary do_host_cpuid(). + * No topology; a valid topology is indicated by the presence + * of subleaf 1. */ - for (i = 1; entry->ecx & 0xff00; ++i) { - entry = do_host_cpuid(array, function, i); - if (!entry) - goto out; - } + entry->eax = entry->ebx = entry->ecx = 0; break; case 0xd: { u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm(); @@ -1193,6 +1190,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ebx = entry->ecx = entry->edx = 0; break; case 0x8000001e: + /* Do not return host topology information. */ + entry->eax = entry->ebx = entry->ecx = 0; + entry->edx = 0; /* reserved */ break; case 0x8000001F: if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { From 9a1195c584321d49dd11aad90ce855994b2f4455 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2022 14:03:52 +0000 Subject: [PATCH 185/543] KVM: arm64: Fix S1PTW handling on RO memslots commit 406504c7b0405d74d74c15a667cd4c4620c3e7a9 upstream. A recent development on the EFI front has resulted in guests having their page tables baked in the firmware binary, and mapped into the IPA space as part of a read-only memslot. Not only is this legitimate, but it also results in added security, so thumbs up. It is possible to take an S1PTW translation fault if the S1 PTs are unmapped at stage-2. However, KVM unconditionally treats S1PTW as a write to correctly handle hardware AF/DB updates to the S1 PTs. Furthermore, KVM injects an exception into the guest for S1PTW writes. In the aforementioned case this results in the guest taking an abort it won't recover from, as the S1 PTs mapping the vectors suffer from the same problem. So clearly our handling is... wrong. Instead, switch to a two-pronged approach: - On S1PTW translation fault, handle the fault as a read - On S1PTW permission fault, handle the fault as a write This is of no consequence to SW that *writes* to its PTs (the write will trigger a non-S1PTW fault), and SW that uses RO PTs will not use HW-assisted AF/DB anyway, as that'd be wrong. Only in the case described in c4ad98e4b72c ("KVM: arm64: Assume write fault on S1PTW permission fault on instruction fetch") do we end-up with two back-to-back faults (page being evicted and faulted back). I don't think this is a case worth optimising for. Fixes: c4ad98e4b72c ("KVM: arm64: Assume write fault on S1PTW permission fault on instruction fetch") Reviewed-by: Oliver Upton Reviewed-by: Ard Biesheuvel Regression-tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_emulate.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 9bdba47f7e14..0d40c48d8132 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_abt_iss1tw(vcpu)) - return true; + if (kvm_vcpu_abt_iss1tw(vcpu)) { + /* + * Only a permission fault on a S1PTW should be + * considered as a write. Otherwise, page tables baked + * in a read-only memslot will result in an exception + * being delivered in the guest. + * + * The drawback is that we end-up faulting twice if the + * guest is using any of HW AF/DB: a translation fault + * to map the page containing the PT (read only at + * first), then a permission fault to allow the flags + * to be set. + */ + switch (kvm_vcpu_trap_get_fault_type(vcpu)) { + case ESR_ELx_FSC_PERM: + return true; + default: + return false; + } + } if (kvm_vcpu_trap_is_iabt(vcpu)) return false; From 5b66b27b9deee212e2404bf4da801b94f4a84589 Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Tue, 27 Dec 2022 23:09:36 +0800 Subject: [PATCH 186/543] efi: fix userspace infinite retry read efivars after EFI runtime services page fault commit e006ac3003080177cf0b673441a4241f77aaecce upstream. After [1][2], if we catch exceptions due to EFI runtime service, we will clear EFI_RUNTIME_SERVICES bit to disable EFI runtime service, then the subsequent routine which invoke the EFI runtime service should fail. But the userspace cat efivars through /sys/firmware/efi/efivars/ will stuck and infinite loop calling read() due to efivarfs_file_read() return -EINTR. The -EINTR is converted from EFI_ABORTED by efi_status_to_err(), and is an improper return value in this situation, so let virt_efi_xxx() return EFI_DEVICE_ERROR and converted to -EIO to invoker. Cc: Fixes: 3425d934fc03 ("efi/x86: Handle page faults occurring while running EFI runtime services") Fixes: 23715a26c8d8 ("arm64: efi: Recover from synchronous exceptions occurring in firmware") Signed-off-by: Ding Hui Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/runtime-wrappers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index f3e54f6616f0..60075e0e4943 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -62,6 +62,7 @@ struct efi_runtime_work efi_rts_work; \ if (!efi_enabled(EFI_RUNTIME_SERVICES)) { \ pr_warn_once("EFI Runtime Services are disabled!\n"); \ + efi_rts_work.status = EFI_DEVICE_ERROR; \ goto exit; \ } \ \ From e2cee489f24feaf922cdffdbe56605337dfc8f1a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 9 Jan 2023 10:44:31 +0100 Subject: [PATCH 187/543] efi: tpm: Avoid READ_ONCE() for accessing the event log commit d3f450533bbcb6dd4d7d59cadc9b61b7321e4ac1 upstream. Nathan reports that recent kernels built with LTO will crash when doing EFI boot using Fedora's GRUB and SHIM. The culprit turns out to be a misaligned load from the TPM event log, which is annotated with READ_ONCE(), and under LTO, this gets translated into a LDAR instruction which does not tolerate misaligned accesses. Interestingly, this does not happen when booting the same kernel straight from the UEFI shell, and so the fact that the event log may appear misaligned in memory may be caused by a bug in GRUB or SHIM. However, using READ_ONCE() to access firmware tables is slightly unusual in any case, and here, we only need to ensure that 'event' is not dereferenced again after it gets unmapped, but this is already taken care of by the implicit barrier() semantics of the early_memunmap() call. Cc: Cc: Peter Jones Cc: Jarkko Sakkinen Cc: Matthew Garrett Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1782 Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- include/linux/tpm_eventlog.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 20c0ff54b7a0..7d68a5cc5881 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -198,8 +198,8 @@ static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *ev * The loop below will unmap these fields if the log is larger than * one page, so save them here for reference: */ - count = READ_ONCE(event->count); - event_type = READ_ONCE(event->event_type); + count = event->count; + event_type = event->event_type; /* Verify that it's the log header */ if (event_header->pcr_idx != 0 || From 78c8b66a697abed34c34a37eaa7fc3865b4f3ec4 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 4 Jan 2023 10:47:39 -0700 Subject: [PATCH 188/543] docs: Fix the docs build with Sphinx 6.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0283189e8f3d0917e2ac399688df85211f48447b upstream. Sphinx 6.0 removed the execfile_() function, which we use as part of the configuration process. They *did* warn us... Just open-code the functionality as is done in Sphinx itself. Tested (using SPHINX_CONF, since this code is only executed with an alternative config file) on various Sphinx versions from 2.5 through 6.0. Reported-by: Martin Liška Cc: stable@vger.kernel.org Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/sphinx/load_config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/sphinx/load_config.py b/Documentation/sphinx/load_config.py index eeb394b39e2c..8b416bfd75ac 100644 --- a/Documentation/sphinx/load_config.py +++ b/Documentation/sphinx/load_config.py @@ -3,7 +3,7 @@ import os import sys -from sphinx.util.pycompat import execfile_ +from sphinx.util.osutil import fs_encoding # ------------------------------------------------------------------------------ def loadConfig(namespace): @@ -48,7 +48,9 @@ def loadConfig(namespace): sys.stdout.write("load additional sphinx-config: %s\n" % config_file) config = namespace.copy() config['__file__'] = config_file - execfile_(config_file, config) + with open(config_file, 'rb') as f: + code = compile(f.read(), fs_encoding, 'exec') + exec(code, config) del config['__file__'] namespace.update(config) else: From 4ad6c063541665c407d17e1faf2fe4f04e947dcc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Jan 2023 14:46:10 -0700 Subject: [PATCH 189/543] io_uring/poll: add hash if ready poll request can't complete inline commit febb985c06cb6f5fac63598c0bffd4fd823d110d upstream. If we don't, then we may lose access to it completely, leading to a request leak. This will eventually stall the ring exit process as well. Cc: stable@vger.kernel.org Fixes: 49f1c68e048f ("io_uring: optimise submission side poll_refs") Reported-and-tested-by: syzbot+6c95df01470a47fc3af4@syzkaller.appspotmail.com Link: https://lore.kernel.org/io-uring/0000000000009f829805f1ce87b2@google.com/ Suggested-by: Pavel Begunkov Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/poll.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index fded1445a803..df42fd8a6ab0 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -549,6 +549,14 @@ static bool io_poll_can_finish_inline(struct io_kiocb *req, return pt->owning || io_poll_get_ownership(req); } +static void io_poll_add_hash(struct io_kiocb *req) +{ + if (req->flags & REQ_F_HASH_LOCKED) + io_poll_req_insert_locked(req); + else + io_poll_req_insert(req); +} + /* * Returns 0 when it's handed over for polling. The caller owns the requests if * it returns non-zero, but otherwise should not touch it. Negative values @@ -607,18 +615,17 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (mask && ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { - if (!io_poll_can_finish_inline(req, ipt)) + if (!io_poll_can_finish_inline(req, ipt)) { + io_poll_add_hash(req); return 0; + } io_poll_remove_entries(req); ipt->result_mask = mask; /* no one else has access to the req, forget about the ref */ return 1; } - if (req->flags & REQ_F_HASH_LOCKED) - io_poll_req_insert_locked(req); - else - io_poll_req_insert(req); + io_poll_add_hash(req); if (mask && (poll->events & EPOLLET) && io_poll_can_finish_inline(req, ipt)) { From 87b3a402bbc535dfdd86f02944ee2aaa4c62597c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Dec 2022 18:12:49 +0000 Subject: [PATCH 190/543] arm64: mte: Fix double-freeing of the temporary tag storage during coredump commit 736eedc974eaafbf4360e0ea85fc892cea72a223 upstream. Commit 16decce22efa ("arm64: mte: Fix the stack frame size warning in mte_dump_tag_range()") moved the temporary tag storage array from the stack to slab but it also introduced an error in double freeing this object. Remove the in-loop freeing. Fixes: 16decce22efa ("arm64: mte: Fix the stack frame size warning in mte_dump_tag_range()") Cc: # 5.18.x Signed-off-by: Catalin Marinas Reported-by: Seth Jenkins Cc: Will Deacon Link: https://lore.kernel.org/r/20221222181251.1345752-2-catalin.marinas@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/elfcore.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 27ef7ad3ffd2..4e3f84799669 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -65,7 +65,6 @@ static int mte_dump_tag_range(struct coredump_params *cprm, mte_save_page_tags(page_address(page), tags); put_page(page); if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) { - mte_free_tag_storage(tags); ret = 0; break; } From f5731a7924866282efc133c2b2f9d2a97a3a3dd5 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Dec 2022 18:12:51 +0000 Subject: [PATCH 191/543] arm64: mte: Avoid the racy walk of the vma list during core dump commit 4f4c549feb4ecca95ae9abb88887b941d196f83a upstream. The MTE coredump code in arch/arm64/kernel/elfcore.c iterates over the vma list without the mmap_lock held. This can race with another process or userfaultfd concurrently modifying the vma list. Change the for_each_mte_vma macro and its callers to instead use the vma snapshot taken by dump_vma_snapshot() and stored in the cprm object. Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file") Cc: # 5.18.x Signed-off-by: Catalin Marinas Reported-by: Seth Jenkins Suggested-by: Seth Jenkins Cc: Will Deacon Link: https://lore.kernel.org/r/20221222181251.1345752-4-catalin.marinas@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/elfcore.c | 56 +++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 4e3f84799669..4077a625342b 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,28 +8,27 @@ #include #include -#define for_each_mte_vma(vmi, vma) \ +#define for_each_mte_vma(cprm, i, m) \ if (system_supports_mte()) \ - for_each_vma(vmi, vma) \ - if (vma->vm_flags & VM_MTE) + for (i = 0, m = cprm->vma_meta; \ + i < cprm->vma_count; \ + i++, m = cprm->vma_meta + i) \ + if (m->flags & VM_MTE) -static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) +static unsigned long mte_vma_tag_dump_size(struct core_vma_metadata *m) { - if (vma->vm_flags & VM_DONTDUMP) - return 0; - - return vma_pages(vma) * MTE_PAGE_TAG_STORAGE; + return (m->dump_size >> PAGE_SHIFT) * MTE_PAGE_TAG_STORAGE; } /* Derived from dump_user_range(); start/end must be page-aligned */ static int mte_dump_tag_range(struct coredump_params *cprm, - unsigned long start, unsigned long end) + unsigned long start, unsigned long len) { int ret = 1; unsigned long addr; void *tags = NULL; - for (addr = start; addr < end; addr += PAGE_SIZE) { + for (addr = start; addr < start + len; addr += PAGE_SIZE) { struct page *page = get_dump_page(addr); /* @@ -78,11 +77,11 @@ static int mte_dump_tag_range(struct coredump_params *cprm, Elf_Half elf_core_extra_phdrs(void) { - struct vm_area_struct *vma; + int i; + struct core_vma_metadata *m; int vma_count = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(cprm, i, m) vma_count++; return vma_count; @@ -90,18 +89,18 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); + int i; + struct core_vma_metadata *m; - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(cprm, i, m) { struct elf_phdr phdr; phdr.p_type = PT_AARCH64_MEMTAG_MTE; phdr.p_offset = offset; - phdr.p_vaddr = vma->vm_start; + phdr.p_vaddr = m->start; phdr.p_paddr = 0; - phdr.p_filesz = mte_vma_tag_dump_size(vma); - phdr.p_memsz = vma->vm_end - vma->vm_start; + phdr.p_filesz = mte_vma_tag_dump_size(m); + phdr.p_memsz = m->end - m->start; offset += phdr.p_filesz; phdr.p_flags = 0; phdr.p_align = 0; @@ -115,26 +114,23 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) size_t elf_core_extra_data_size(void) { - struct vm_area_struct *vma; + int i; + struct core_vma_metadata *m; size_t data_size = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) - data_size += mte_vma_tag_dump_size(vma); + for_each_mte_vma(cprm, i, m) + data_size += mte_vma_tag_dump_size(m); return data_size; } int elf_core_write_extra_data(struct coredump_params *cprm) { - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); + int i; + struct core_vma_metadata *m; - for_each_mte_vma(vmi, vma) { - if (vma->vm_flags & VM_DONTDUMP) - continue; - - if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end)) + for_each_mte_vma(cprm, i, m) { + if (!mte_dump_tag_range(cprm, m->start, m->dump_size)) return 0; } From 8f282a84f31c9f70350ce809eda132fbe99cf5d5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 4 Jan 2023 15:16:26 +0000 Subject: [PATCH 192/543] arm64: cmpxchg_double*: hazard against entire exchange variable commit 031af50045ea97ed4386eb3751ca2c134d0fc911 upstream. The inline assembly for arm64's cmpxchg_double*() implementations use a +Q constraint to hazard against other accesses to the memory location being exchanged. However, the pointer passed to the constraint is a pointer to unsigned long, and thus the hazard only applies to the first 8 bytes of the location. GCC can take advantage of this, assuming that other portions of the location are unchanged, leading to a number of potential problems. This is similar to what we fixed back in commit: fee960bed5e857eb ("arm64: xchg: hazard against entire exchange variable") ... but we forgot to adjust cmpxchg_double*() similarly at the same time. The same problem applies, as demonstrated with the following test: | struct big { | u64 lo, hi; | } __aligned(128); | | unsigned long foo(struct big *b) | { | u64 hi_old, hi_new; | | hi_old = b->hi; | cmpxchg_double_local(&b->lo, &b->hi, 0x12, 0x34, 0x56, 0x78); | hi_new = b->hi; | | return hi_old ^ hi_new; | } ... which GCC 12.1.0 compiles as: | 0000000000000000 : | 0: d503233f paciasp | 4: aa0003e4 mov x4, x0 | 8: 1400000e b 40 | c: d2800240 mov x0, #0x12 // #18 | 10: d2800681 mov x1, #0x34 // #52 | 14: aa0003e5 mov x5, x0 | 18: aa0103e6 mov x6, x1 | 1c: d2800ac2 mov x2, #0x56 // #86 | 20: d2800f03 mov x3, #0x78 // #120 | 24: 48207c82 casp x0, x1, x2, x3, [x4] | 28: ca050000 eor x0, x0, x5 | 2c: ca060021 eor x1, x1, x6 | 30: aa010000 orr x0, x0, x1 | 34: d2800000 mov x0, #0x0 // #0 <--- BANG | 38: d50323bf autiasp | 3c: d65f03c0 ret | 40: d2800240 mov x0, #0x12 // #18 | 44: d2800681 mov x1, #0x34 // #52 | 48: d2800ac2 mov x2, #0x56 // #86 | 4c: d2800f03 mov x3, #0x78 // #120 | 50: f9800091 prfm pstl1strm, [x4] | 54: c87f1885 ldxp x5, x6, [x4] | 58: ca0000a5 eor x5, x5, x0 | 5c: ca0100c6 eor x6, x6, x1 | 60: aa0600a6 orr x6, x5, x6 | 64: b5000066 cbnz x6, 70 | 68: c8250c82 stxp w5, x2, x3, [x4] | 6c: 35ffff45 cbnz w5, 54 | 70: d2800000 mov x0, #0x0 // #0 <--- BANG | 74: d50323bf autiasp | 78: d65f03c0 ret Notice that at the lines with "BANG" comments, GCC has assumed that the higher 8 bytes are unchanged by the cmpxchg_double() call, and that `hi_old ^ hi_new` can be reduced to a constant zero, for both LSE and LL/SC versions of cmpxchg_double(). This patch fixes the issue by passing a pointer to __uint128_t into the +Q constraint, ensuring that the compiler hazards against the entire 16 bytes being modified. With this change, GCC 12.1.0 compiles the above test as: | 0000000000000000 : | 0: f9400407 ldr x7, [x0, #8] | 4: d503233f paciasp | 8: aa0003e4 mov x4, x0 | c: 1400000f b 48 | 10: d2800240 mov x0, #0x12 // #18 | 14: d2800681 mov x1, #0x34 // #52 | 18: aa0003e5 mov x5, x0 | 1c: aa0103e6 mov x6, x1 | 20: d2800ac2 mov x2, #0x56 // #86 | 24: d2800f03 mov x3, #0x78 // #120 | 28: 48207c82 casp x0, x1, x2, x3, [x4] | 2c: ca050000 eor x0, x0, x5 | 30: ca060021 eor x1, x1, x6 | 34: aa010000 orr x0, x0, x1 | 38: f9400480 ldr x0, [x4, #8] | 3c: d50323bf autiasp | 40: ca0000e0 eor x0, x7, x0 | 44: d65f03c0 ret | 48: d2800240 mov x0, #0x12 // #18 | 4c: d2800681 mov x1, #0x34 // #52 | 50: d2800ac2 mov x2, #0x56 // #86 | 54: d2800f03 mov x3, #0x78 // #120 | 58: f9800091 prfm pstl1strm, [x4] | 5c: c87f1885 ldxp x5, x6, [x4] | 60: ca0000a5 eor x5, x5, x0 | 64: ca0100c6 eor x6, x6, x1 | 68: aa0600a6 orr x6, x5, x6 | 6c: b5000066 cbnz x6, 78 | 70: c8250c82 stxp w5, x2, x3, [x4] | 74: 35ffff45 cbnz w5, 5c | 78: f9400480 ldr x0, [x4, #8] | 7c: d50323bf autiasp | 80: ca0000e0 eor x0, x7, x0 | 84: d65f03c0 ret ... sampling the high 8 bytes before and after the cmpxchg, and performing an EOR, as we'd expect. For backporting, I've tested this atop linux-4.9.y with GCC 5.5.0. Note that linux-4.9.y is oldest currently supported stable release, and mandates GCC 5.1+. Unfortunately I couldn't get a GCC 5.1 binary to run on my machines due to library incompatibilities. I've also used a standalone test to check that we can use a __uint128_t pointer in a +Q constraint at least as far back as GCC 4.8.5 and LLVM 3.9.1. Fixes: 5284e1b4bc8a ("arm64: xchg: Implement cmpxchg_double") Fixes: e9a4b795652f ("arm64: cmpxchg_dbl: patch in lse instructions when supported by the CPU") Reported-by: Boqun Feng Link: https://lore.kernel.org/lkml/Y6DEfQXymYVgL3oJ@boqun-archlinux/ Reported-by: Peter Zijlstra Link: https://lore.kernel.org/lkml/Y6GXoO4qmH9OIZ5Q@hirez.programming.kicks-ass.net/ Signed-off-by: Mark Rutland Cc: stable@vger.kernel.org Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Steve Capper Cc: Will Deacon Link: https://lore.kernel.org/r/20230104151626.3262137-1-mark.rutland@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/atomic_ll_sc.h | 2 +- arch/arm64/include/asm/atomic_lse.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 0890e4f568fb..cbb3d961123b 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -315,7 +315,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \ " cbnz %w0, 1b\n" \ " " #mb "\n" \ "2:" \ - : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \ + : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \ : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ : cl); \ \ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 52075e93de6c..a94d6dacc029 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -311,7 +311,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \ " eor %[old2], %[old2], %[oldval2]\n" \ " orr %[old1], %[old1], %[old2]" \ : [old1] "+&r" (x0), [old2] "+&r" (x1), \ - [v] "+Q" (*(unsigned long *)ptr) \ + [v] "+Q" (*(__uint128_t *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ : cl); \ From 0bb6742bf81a09fb7c9c23a6f59dc0d3242eaac6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 10 Jan 2023 16:30:28 +0100 Subject: [PATCH 193/543] ACPI: Fix selecting wrong ACPI fwnode for the iGPU on some Dell laptops commit f64e4275ef7407d5c3eca20436519bbd1f796e40 upstream. The Dell Latitude E6430 both with and without the optional NVidia dGPU has a bug in its ACPI tables which is causing Linux to assign the wrong ACPI fwnode / companion to the pci_device for the i915 iGPU. Specifically under the PCI root bridge there are these 2 ACPI Device()s : Scope (_SB.PCI0) { Device (GFX0) { Name (_ADR, 0x00020000) // _ADR: Address } ... Device (VID) { Name (_ADR, 0x00020000) // _ADR: Address ... Method (_DOS, 1, NotSerialized) // _DOS: Disable Output Switching { VDP8 = Arg0 VDP1 (One, VDP8) } Method (_DOD, 0, NotSerialized) // _DOD: Display Output Devices { ... } ... } } The non-functional GFX0 ACPI device is a problem, because this gets returned as ACPI companion-device by acpi_find_child_device() for the iGPU. This is a long standing problem and the i915 driver does use the ACPI companion for some things, but works fine without it. However since commit 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()") acpi_get_pci_dev() relies on the physical-node pointer in the acpi_device and that is set on the wrong acpi_device because of the wrong acpi_find_child_device() return. This breaks the ACPI video code, leading to non working backlight control in some cases. Add a type.backlight flag, mark ACPI video bus devices with this and make find_child_checks() return a higher score for children with this flag set, so that it picks the right companion-device. Fixes: 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()") Co-developed-by: Rafael J. Wysocki Signed-off-by: Hans de Goede Cc: 6.1+ # 6.1+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/glue.c | 14 ++++++++++++-- drivers/acpi/scan.c | 7 +++++-- include/acpi/acpi_bus.h | 3 ++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 204fe94c7e45..a194f30876c5 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -75,7 +75,8 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) } #define FIND_CHILD_MIN_SCORE 1 -#define FIND_CHILD_MAX_SCORE 2 +#define FIND_CHILD_MID_SCORE 2 +#define FIND_CHILD_MAX_SCORE 3 static int match_any(struct acpi_device *adev, void *not_used) { @@ -96,8 +97,17 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) return -ENODEV; status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta); - if (status == AE_NOT_FOUND) + if (status == AE_NOT_FOUND) { + /* + * Special case: backlight device objects without _STA are + * preferred to other objects with the same _ADR value, because + * it is more likely that they are actually useful. + */ + if (adev->pnp.type.backlight) + return FIND_CHILD_MID_SCORE; + return FIND_CHILD_MIN_SCORE; + } if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) return -ENODEV; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index b47e93a24a9a..dbfa58e799e2 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1370,9 +1370,12 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, * Some devices don't reliably have _HIDs & _CIDs, so add * synthetic HIDs to make sure drivers can find them. */ - if (acpi_is_video_device(handle)) + if (acpi_is_video_device(handle)) { acpi_add_id(pnp, ACPI_VIDEO_HID); - else if (acpi_bay_match(handle)) + pnp->type.backlight = 1; + break; + } + if (acpi_bay_match(handle)) acpi_add_id(pnp, ACPI_BAY_HID); else if (acpi_dock_match(handle)) acpi_add_id(pnp, ACPI_DOCK_HID); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c09d72986968..ab2d6266038a 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -230,7 +230,8 @@ struct acpi_pnp_type { u32 hardware_id:1; u32 bus_address:1; u32 platform_id:1; - u32 reserved:29; + u32 backlight:1; + u32 reserved:28; }; struct acpi_device_pnp { From 2da1ba6f56f66202769761cf9308c0b664431474 Mon Sep 17 00:00:00 2001 From: Noor Azura Ahmad Tarmizi Date: Wed, 11 Jan 2023 13:02:00 +0800 Subject: [PATCH 194/543] net: stmmac: add aux timestamps fifo clearance wait commit ae9dcb91c6069e20b3b9505d79cbc89fd6e086f5 upstream. Add timeout polling wait for auxiliary timestamps snapshot FIFO clear bit (ATSFC) to clear. This is to ensure no residue fifo value is being read erroneously. Fixes: f4da56529da6 ("net: stmmac: Add support for external trigger timestamping") Cc: # 5.10.x Signed-off-by: Noor Azura Ahmad Tarmizi Link: https://lore.kernel.org/r/20230111050200.2130-1-noor.azura.ahmad.tarmizi@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 4d11980dcd64..9c91a3dc8e38 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -219,7 +219,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, } writel(acr_value, ptpaddr + PTP_ACR); mutex_unlock(&priv->aux_ts_lock); - ret = 0; + /* wait for auxts fifo clear to finish */ + ret = readl_poll_timeout(ptpaddr + PTP_ACR, acr_value, + !(acr_value & PTP_ACR_ATSFC), + 10, 10000); break; default: From 90e585b6ab23d047e8c09716487a7de0d3f4b3d4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 10 Jan 2023 20:56:59 +0200 Subject: [PATCH 195/543] perf auxtrace: Fix address filter duplicate symbol selection commit cf129830ee820f7fc90b98df193cd49d49344d09 upstream. When a match has been made to the nth duplicate symbol, return success not error. Example: Before: $ cat file.c cat: file.c: No such file or directory $ cat file1.c #include static void func(void) { printf("First func\n"); } void other(void); int main() { func(); other(); return 0; } $ cat file2.c #include static void func(void) { printf("Second func\n"); } void other(void) { func(); } $ gcc -Wall -Wextra -o test file1.c file2.c $ perf record -e intel_pt//u --filter 'filter func @ ./test' -- ./test Multiple symbols with name 'func' #1 0x1149 l func which is near main #2 0x1179 l func which is near other Disambiguate symbol name by inserting #n after the name e.g. func #2 Or select a global symbol by inserting #0 or #g or #G Failed to parse address filter: 'filter func @ ./test' Filter format is: filter|start|stop|tracestop [/ ] [@] Where multiple filters are separated by space or comma. $ perf record -e intel_pt//u --filter 'filter func #2 @ ./test' -- ./test Failed to parse address filter: 'filter func #2 @ ./test' Filter format is: filter|start|stop|tracestop [/ ] [@] Where multiple filters are separated by space or comma. After: $ perf record -e intel_pt//u --filter 'filter func #2 @ ./test' -- ./test First func Second func [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.016 MB perf.data ] $ perf script --itrace=b -Ftime,flags,ip,sym,addr --ns 1231062.526977619: tr strt 0 [unknown] => 558495708179 func 1231062.526977619: tr end call 558495708188 func => 558495708050 _init 1231062.526979286: tr strt 0 [unknown] => 55849570818d func 1231062.526979286: tr end return 55849570818f func => 55849570819d other Fixes: 1b36c03e356936d6 ("perf record: Add support for using symbols in address filters") Reported-by: Dmitrii Dolgov <9erthalion6@gmail.com> Signed-off-by: Adrian Hunter Tested-by: Dmitry Dolgov <9erthalion6@gmail.com> Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230110185659.15979-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/auxtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 46ada5ec3f9a..47062f459ccd 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2610,7 +2610,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, *size = sym->start - *start; if (idx > 0) { if (*size) - return 1; + return 0; } else if (dso_sym_match(sym, sym_name, &cnt, idx)) { print_duplicate_syms(dso, sym_name); return -EINVAL; From ab4b5a2154bc008db41c21fda63d1fc17d716271 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Mon, 14 Nov 2022 11:40:08 +0100 Subject: [PATCH 196/543] s390/kexec: fix ipl report address for kdump commit c2337a40e04dde1692b5b0a46ecc59f89aaba8a1 upstream. This commit addresses the following erroneous situation with file-based kdump executed on a system with a valid IPL report. On s390, a kdump kernel, its initrd and IPL report if present are loaded into a special and reserved on boot memory region - crashkernel. When a system crashes and kdump was activated before, the purgatory code is entered first which swaps the crashkernel and [0 - crashkernel size] memory regions. Only after that the kdump kernel is entered. For this reason, the pointer to an IPL report in lowcore must point to the IPL report after the swap and not to the address of the IPL report that was located in crashkernel memory region before the swap. Failing to do so, makes the kdump's decompressor try to read memory from the crashkernel memory region which already contains the production's kernel memory. The situation described above caused spontaneous kdump failures/hangs on systems where the Secure IPL is activated because on such systems an IPL report is always present. In that case kdump's decompressor tried to parse an IPL report which frequently lead to illegal memory accesses because an IPL report contains addresses to various data. Cc: Fixes: 99feaa717e55 ("s390/kexec_file: Create ipl report and pass to next kernel") Reviewed-by: Vasily Gorbik Signed-off-by: Alexander Egorenkov Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/machine_kexec_file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index fc6d5f58debe..2df94d32140c 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -187,8 +187,6 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->memsz = ALIGN(data->memsz, PAGE_SIZE); buf.mem = data->memsz; - if (image->type == KEXEC_TYPE_CRASH) - buf.mem += crashk_res.start; ptr = (void *)ipl_cert_list_addr; end = ptr + ipl_cert_list_size; @@ -225,6 +223,9 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; + ret = kexec_add_buffer(&buf); out: return ret; From 20fa7855af45e533cddab564203921e2a6e981ef Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Fri, 6 Jan 2023 15:19:05 +0200 Subject: [PATCH 197/543] brcmfmac: Prefer DT board type over DMI board type commit a5a36720c3f650f859f5e9535dd62d06f13f4f3b upstream. The introduction of support for Apple board types inadvertently changed the precedence order, causing hybrid SMBIOS+DT platforms to look up the firmware using the DMI information instead of the device tree compatible to generate the board type. Revert back to the old behavior, as affected platforms use firmwares named after the DT compatible. Fixes: 7682de8b3351 ("wifi: brcmfmac: of: Fetch Apple properties") [1] https://bugzilla.opensuse.org/show_bug.cgi?id=1206697#c13 Cc: stable@vger.kernel.org Signed-off-by: Ivan T. Ivanov Reviewed-by: Hector Martin Reviewed-by: Arend van Spriel Tested-by: Peter Robinson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c index a83699de01ec..fdd0c9abc1a1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c @@ -79,7 +79,8 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, /* Apple ARM64 platforms have their own idea of board type, passed in * via the device tree. They also have an antenna SKU parameter */ - if (!of_property_read_string(np, "brcm,board-type", &prop)) + err = of_property_read_string(np, "brcm,board-type", &prop); + if (!err) settings->board_type = prop; if (!of_property_read_string(np, "apple,antenna-sku", &prop)) @@ -87,7 +88,7 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, /* Set board-type to the first string of the machine compatible prop */ root = of_find_node_by_path("/"); - if (root && !settings->board_type) { + if (root && err) { char *board_type; const char *tmp; From 5fc5cdee8ef2db8e89451a4310be10c17fae1d4d Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 30 Dec 2022 22:15:45 -0800 Subject: [PATCH 198/543] ASoC: qcom: lpass-cpu: Fix fallback SD line index handling commit 000bca8d706d1bf7cca01af75787247c5a2fdedf upstream. These indices should reference the ID placed within the dai_driver array, not the indices of the array itself. This fixes commit 4ff028f6c108 ("ASoC: qcom: lpass-cpu: Make I2S SD lines configurable"), which among others, broke IPQ8064 audio (sound/soc/qcom/lpass-ipq806x.c) because it uses ID 4 but we'd stop initializing the mi2s_playback_sd_mode and mi2s_capture_sd_mode arrays at ID 0. Fixes: 4ff028f6c108 ("ASoC: qcom: lpass-cpu: Make I2S SD lines configurable") Cc: Signed-off-by: Brian Norris Reviewed-by: Stephan Gerhold Link: https://lore.kernel.org/r/20221231061545.2110253-1-computersforpeace@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/qcom/lpass-cpu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 54353842dc07..dbdaaa85ce48 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -1037,10 +1037,11 @@ static void of_lpass_cpu_parse_dai_data(struct device *dev, struct lpass_data *data) { struct device_node *node; - int ret, id; + int ret, i, id; /* Allow all channels by default for backwards compatibility */ - for (id = 0; id < data->variant->num_dai; id++) { + for (i = 0; i < data->variant->num_dai; i++) { + id = data->variant->dai_driver[i].id; data->mi2s_playback_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH; data->mi2s_capture_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH; } From 80a82f6eb3b75c8bb02ada8f7f55efca08b74eec Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Dec 2022 18:12:50 +0000 Subject: [PATCH 199/543] elfcore: Add a cprm parameter to elf_core_extra_{phdrs,data_size} commit 19e183b54528f11fafeca60fc6d0821e29ff281e upstream. A subsequent fix for arm64 will use this parameter to parse the vma information from the snapshot created by dump_vma_snapshot() rather than traversing the vma list without the mmap_lock. Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file") Cc: # 5.18.x Signed-off-by: Catalin Marinas Reported-by: Seth Jenkins Suggested-by: Seth Jenkins Cc: Will Deacon Cc: Eric Biederman Cc: Kees Cook Link: https://lore.kernel.org/r/20221222181251.1345752-3-catalin.marinas@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/elfcore.c | 4 ++-- arch/ia64/kernel/elfcore.c | 4 ++-- arch/x86/um/elfcore.c | 4 ++-- fs/binfmt_elf.c | 4 ++-- fs/binfmt_elf_fdpic.c | 4 ++-- include/linux/elfcore.h | 8 ++++---- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 4077a625342b..662a61e5e75e 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -75,7 +75,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm, return ret; } -Elf_Half elf_core_extra_phdrs(void) +Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) { int i; struct core_vma_metadata *m; @@ -112,7 +112,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { int i; struct core_vma_metadata *m; diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c index 94680521fbf9..8895df121540 100644 --- a/arch/ia64/kernel/elfcore.c +++ b/arch/ia64/kernel/elfcore.c @@ -7,7 +7,7 @@ #include -Elf64_Half elf_core_extra_phdrs(void) +Elf64_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return GATE_EHDR->e_phnum; } @@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { const struct elf_phdr *const gate_phdrs = (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c index 48a3eb09d951..650cdbbdaf45 100644 --- a/arch/x86/um/elfcore.c +++ b/arch/x86/um/elfcore.c @@ -7,7 +7,7 @@ #include -Elf32_Half elf_core_extra_phdrs(void) +Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0; } @@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { if ( vsyscall_ehdr ) { const struct elfhdr *const ehdrp = diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6a11025e5850..444302afc673 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2209,7 +2209,7 @@ static int elf_core_dump(struct coredump_params *cprm) * The number of segs are recored into ELF header as 16bit value. * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. */ - segs = cprm->vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -2249,7 +2249,7 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); offset += cprm->vma_data_size; - offset += elf_core_extra_data_size(); + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9ce5e1f41c26..069f12cc7634 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1509,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) tmp->next = thread_list; thread_list = tmp; - segs = cprm->vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -1555,7 +1555,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); offset += cprm->vma_data_size; - offset += elf_core_extra_data_size(); + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 346a8b56cdc8..79e26b18bf0e 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -114,14 +114,14 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg * Dumping its extra ELF program headers includes all the other information * a debugger needs to easily find how the gate DSO was being used. */ -extern Elf_Half elf_core_extra_phdrs(void); +extern Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm); extern int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int elf_core_write_extra_data(struct coredump_params *cprm); -extern size_t elf_core_extra_data_size(void); +extern size_t elf_core_extra_data_size(struct coredump_params *cprm); #else -static inline Elf_Half elf_core_extra_phdrs(void) +static inline Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return 0; } @@ -136,7 +136,7 @@ static inline int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -static inline size_t elf_core_extra_data_size(void) +static inline size_t elf_core_extra_data_size(struct coredump_params *cprm) { return 0; } From ba518c5b6429ed7273e26bff318b92bd27857e4b Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 10 Jan 2023 23:10:29 +0800 Subject: [PATCH 200/543] cpufreq: amd-pstate: fix kernel hang issue while amd-pstate unregistering commit 4f3085f87b51a551a0647f218d4f324796ecb703 upstream. In the amd_pstate_adjust_perf(), there is one cpufreq_cpu_get() call to increase increments the kobject reference count of policy and make it as busy. Therefore, a corresponding call to cpufreq_cpu_put() is needed to decrement the kobject reference count back, it will resolve the kernel hang issue when unregistering the amd-pstate driver and register the `amd_pstate_epp` driver instance. Fixes: 1d215f0319 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State") Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Wyes Karny Signed-off-by: Perry Yuan Cc: 5.17+ # 5.17+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/amd-pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 204e39006dda..c17bd845f5fc 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -307,6 +307,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, max_perf = min_perf; amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); + cpufreq_cpu_put(policy); } static int amd_get_min_freq(struct amd_cpudata *cpudata) From 45a584f139e881366b04072ad86a961673047792 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 5 Jan 2023 15:44:20 +0100 Subject: [PATCH 201/543] s390/cpum_sf: add READ_ONCE() semantics to compare and swap loops commit 82d3edb50a11bf3c5ef63294d5358ba230181413 upstream. The current cmpxchg_double() loops within the perf hw sampling code do not have READ_ONCE() semantics to read the old value from memory. This allows the compiler to generate code which reads the "old" value several times from memory, which again allows for inconsistencies. For example: /* Reset trailer (using compare-double-and-swap) */ do { te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; te_flags |= SDB_TE_ALERT_REQ_MASK; } while (!cmpxchg_double(&te->flags, &te->overflow, te->flags, te->overflow, te_flags, 0ULL)); The compiler could generate code where te->flags used within the cmpxchg_double() call may be refetched from memory and which is not necessarily identical to the previous read version which was used to generate te_flags. Which in turn means that an incorrect update could happen. Fix this by adding READ_ONCE() semantics to all cmpxchg_double() loops. Given that READ_ONCE() cannot generate code on s390 which atomically reads 16 bytes, use a private compare-and-swap-double implementation to achieve that. Also replace cmpxchg_double() with the private implementation to be able to re-use the old value within the loops. As a side effect this converts the whole code to only use bit fields to read and modify bits within the hws trailer header. Reported-by: Alexander Gordeev Acked-by: Alexander Gordeev Acked-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Link: https://lore.kernel.org/linux-s390/Y71QJBhNTIatvxUT@osiris/T/#ma14e2a5f7aa8ed4b94b6f9576799b3ad9c60f333 Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/cpu_mf.h | 31 +++++----- arch/s390/kernel/perf_cpum_sf.c | 101 ++++++++++++++++++++------------ 2 files changed, 77 insertions(+), 55 deletions(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index feaba12dbecb..efa103b52a1a 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -131,19 +131,21 @@ struct hws_combined_entry { struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ } __packed; -struct hws_trailer_entry { - union { - struct { - unsigned int f:1; /* 0 - Block Full Indicator */ - unsigned int a:1; /* 1 - Alert request control */ - unsigned int t:1; /* 2 - Timestamp format */ - unsigned int :29; /* 3 - 31: Reserved */ - unsigned int bsdes:16; /* 32-47: size of basic SDE */ - unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ - }; - unsigned long long flags; /* 0 - 63: All indicators */ +union hws_trailer_header { + struct { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned int :29; /* 3 - 31: Reserved */ + unsigned int bsdes:16; /* 32-47: size of basic SDE */ + unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ + unsigned long long overflow; /* 64 - Overflow Count */ }; - unsigned long long overflow; /* 64 - sample Overflow count */ + __uint128_t val; +}; + +struct hws_trailer_entry { + union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count */ unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ @@ -290,14 +292,11 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, return USEC_PER_SEC * qsi->cpu_speed / rate; } -#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL -#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL - /* Return TOD timestamp contained in an trailer entry */ static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) { /* TOD in STCKE format */ - if (te->t) + if (te->header.t) return *((unsigned long long *) &te->timestamp[1]); /* TOD in STCK format */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 332a49965130..ce886a03545a 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -163,14 +163,15 @@ static void free_sampling_buffer(struct sf_buffer *sfb) static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) { - unsigned long sdb, *trailer; + struct hws_trailer_entry *te; + unsigned long sdb; /* Allocate and initialize sample-data-block */ sdb = get_zeroed_page(gfp_flags); if (!sdb) return -ENOMEM; - trailer = trailer_entry_ptr(sdb); - *trailer = SDB_TE_ALERT_REQ_MASK; + te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + te->header.a = 1; /* Link SDB into the sample-data-block-table */ *sdbt = sdb; @@ -1206,7 +1207,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, "%s: Found unknown" " sampling data entry: te->f %i" " basic.def %#4x (%p)\n", __func__, - te->f, sample->def, sample); + te->header.f, sample->def, sample); /* Sample slot is not yet written or other record. * * This condition can occur if the buffer was reused @@ -1217,7 +1218,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, * that are not full. Stop processing if the first * invalid format was detected. */ - if (!te->f) + if (!te->header.f) break; } @@ -1227,6 +1228,16 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } } +static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) +{ + asm volatile( + " cdsg %[old],%[new],%[ptr]\n" + : [old] "+d" (old), [ptr] "+QS" (*ptr) + : [new] "d" (new) + : "memory", "cc"); + return old; +} + /* hw_perf_event_update() - Process sampling buffer * @event: The perf event * @flush_all: Flag to also flush partially filled sample-data-blocks @@ -1243,10 +1254,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, */ static void hw_perf_event_update(struct perf_event *event, int flush_all) { + unsigned long long event_overflow, sampl_overflow, num_sdb; + union hws_trailer_header old, prev, new; struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; unsigned long *sdbt; - unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; /* @@ -1266,25 +1278,25 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); /* Leave loop if no more work to do (block full indicator) */ - if (!te->f) { + if (!te->header.f) { done = 1; if (!flush_all) break; } /* Check the sample overflow count */ - if (te->overflow) + if (te->header.overflow) /* Account sample overflows and, if a particular limit * is reached, extend the sampling buffer. * For details, see sfb_account_overflows(). */ - sampl_overflow += te->overflow; + sampl_overflow += te->header.overflow; /* Timestamps are valid for full sample-data-blocks only */ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx " "overflow %llu timestamp %#llx\n", - __func__, (unsigned long)sdbt, te->overflow, - (te->f) ? trailer_timestamp(te) : 0ULL); + __func__, (unsigned long)sdbt, te->header.overflow, + (te->header.f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and * flag if an (perf) event overflow happened. If so, the PMU @@ -1294,12 +1306,16 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) num_sdb++; /* Reset trailer (using compare-double-and-swap) */ + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; - te_flags |= SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - te->flags, te->overflow, - te_flags, 0ULL)); + old.val = prev.val; + new.val = prev.val; + new.f = 0; + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); /* Advance to next sample-data-block */ sdbt++; @@ -1384,7 +1400,7 @@ static void aux_output_end(struct perf_output_handle *handle) range_scan = AUX_SDB_NUM_ALERT(aux); for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - if (!(te->flags & SDB_TE_BUFFER_FULL_MASK)) + if (!te->header.f) break; } /* i is num of SDBs which are full */ @@ -1392,7 +1408,7 @@ static void aux_output_end(struct perf_output_handle *handle) /* Remove alert indicators in the buffer */ te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags &= ~SDB_TE_ALERT_REQ_MASK; + te->header.a = 0; debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n", __func__, i, range_scan, aux->head); @@ -1437,9 +1453,9 @@ static int aux_output_begin(struct perf_output_handle *handle, idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - te->flags &= ~(SDB_TE_BUFFER_FULL_MASK | - SDB_TE_ALERT_REQ_MASK); - te->overflow = 0; + te->header.f = 0; + te->header.a = 0; + te->header.overflow = 0; } /* Save the position of empty SDBs */ aux->empty_mark = aux->head + range - 1; @@ -1448,7 +1464,7 @@ static int aux_output_begin(struct perf_output_handle *handle, /* Set alert indicator */ aux->alert_mark = aux->head + range/2 - 1; te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags = te->flags | SDB_TE_ALERT_REQ_MASK; + te->header.a = 1; /* Reset hardware buffer head */ head = AUX_SDB_INDEX(aux, aux->head); @@ -1475,14 +1491,17 @@ static int aux_output_begin(struct perf_output_handle *handle, static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; + union hws_trailer_header old, prev, new; struct hws_trailer_entry *te; te = aux_sdb_trailer(aux, alert_index); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - *overflow = orig_overflow = te->overflow; - if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { + old.val = prev.val; + new.val = prev.val; + *overflow = old.overflow; + if (old.f) { /* * SDB is already set by hardware. * Abort and try to set somewhere @@ -1490,10 +1509,10 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, */ return false; } - new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); return true; } @@ -1522,8 +1541,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; unsigned long i, range_scan, idx, idx_old; + union hws_trailer_header old, prev, new; + unsigned long long orig_overflow; struct hws_trailer_entry *te; debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld " @@ -1554,17 +1574,20 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, idx_old = idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - orig_overflow = te->overflow; - new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK; + old.val = prev.val; + new.val = prev.val; + orig_overflow = old.overflow; + new.f = 0; + new.overflow = 0; if (idx == aux->alert_mark) - new_flags |= SDB_TE_ALERT_REQ_MASK; + new.a = 1; else - new_flags &= ~SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); *overflow += orig_overflow; } From b6ac9ded42aeafb1d307ffc9da8e1965117e9060 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Jan 2023 11:51:20 +0100 Subject: [PATCH 202/543] s390/percpu: add READ_ONCE() to arch_this_cpu_to_op_simple() commit e3f360db08d55a14112bd27454e616a24296a8b0 upstream. Make sure that *ptr__ within arch_this_cpu_to_op_simple() is only dereferenced once by using READ_ONCE(). Otherwise the compiler could generate incorrect code. Cc: Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index cb5fc0690435..081837b391e3 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -31,7 +31,7 @@ pcp_op_T__ *ptr__; \ preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ - prev__ = *ptr__; \ + prev__ = READ_ONCE(*ptr__); \ do { \ old__ = prev__; \ new__ = old__ op (val); \ From adc48e5e408afbb01d261bd303fd9fbbbaa3e317 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 16 Dec 2022 15:33:55 -0800 Subject: [PATCH 203/543] drm/virtio: Fix GEM handle creation UAF commit 52531258318ed59a2dc5a43df2eaf0eb1d65438e upstream. Userspace can guess the handle value and try to race GEM object creation with handle close, resulting in a use-after-free if we dereference the object after dropping the handle's reference. For that reason, dropping the handle's reference must be done *after* we are done dereferencing the object. Signed-off-by: Rob Clark Reviewed-by: Chia-I Wu Fixes: 62fb7a5e1096 ("virtio-gpu: add 3d/virgl support") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20221216233355.542197-2-robdclark@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 5d05093014ac..9f4a90493aea 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -358,10 +358,18 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data, drm_gem_object_release(obj); return ret; } - drm_gem_object_put(obj); rc->res_handle = qobj->hw_res_handle; /* similiar to a VM address */ rc->bo_handle = handle; + + /* + * The handle owns the reference now. But we must drop our + * remaining reference *after* we no longer need to dereference + * the obj. Otherwise userspace could guess the handle and + * race closing it from another thread. + */ + drm_gem_object_put(obj); + return 0; } @@ -723,11 +731,18 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, drm_gem_object_release(obj); return ret; } - drm_gem_object_put(obj); rc_blob->res_handle = bo->hw_res_handle; rc_blob->bo_handle = handle; + /* + * The handle owns the reference now. But we must drop our + * remaining reference *after* we no longer need to dereference + * the obj. Otherwise userspace could guess the handle and + * race closing it from another thread. + */ + drm_gem_object_put(obj); + return 0; } From b8e6fc445990dce368950bd9553b31f46b50285e Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Tue, 10 Jan 2023 11:33:44 +0800 Subject: [PATCH 204/543] drm/amd/pm/smu13: BACO is supported when it's in BACO state commit 972fb53d3605eb6cdf0d6ae9a52e910626a91ff7 upstream. This leverages the logic in smu11. No need to talk to SMU to check BACO enablement as it's in BACO state already. Signed-off-by: Guchun Chen Reviewed-by: Kenneth Feng Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0, 6.1 Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 8e4830a311bd..9b96902adad3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2249,6 +2249,10 @@ bool smu_v13_0_baco_is_support(struct smu_context *smu) !smu_baco->platform_support) return false; + /* return true if ASIC is in BACO state already */ + if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) + return true; + if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) return false; From 42526442fe3ed9c2487a2a475cb4a6f463ce2eaf Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Thu, 12 Jan 2023 04:00:27 -0800 Subject: [PATCH 205/543] drm: Optimize drm buddy top-down allocation method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5640e81607152d7f2d2558227c0f6cb78b8f39cf upstream. We are observing performance drop in many usecases which include games, 3D benchmark applications,etc.. To solve this problem, We are strictly not allowing top down flag enabled allocations to steal the memory space from cpu visible region. The idea is, we are sorting each order list entries in ascending order and compare the last entry of each order list in the freelist and return the max block. This patch improves the 3D benchmark scores and solves fragmentation issues. All drm buddy selftests are verfied. drm_buddy: pass:6 fail:0 skip:0 total:6 Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Acked-by: Alex Deucher Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230112120027.3072-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König CC: Cc: stable@vger.kernel.org # 5.18+ Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_buddy.c | 81 ++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 11bb59399471..3d1f50f481cf 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm, kmem_cache_free(slab_blocks, block); } +static void list_insert_sorted(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + struct drm_buddy_block *node; + struct list_head *head; + + head = &mm->free_list[drm_buddy_block_order(block)]; + if (list_empty(head)) { + list_add(&block->link, head); + return; + } + + list_for_each_entry(node, head, link) + if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node)) + break; + + __list_add(&block->link, node->link.prev, &node->link); +} + static void mark_allocated(struct drm_buddy_block *block) { block->header &= ~DRM_BUDDY_HEADER_STATE; @@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm, block->header &= ~DRM_BUDDY_HEADER_STATE; block->header |= DRM_BUDDY_FREE; - list_add(&block->link, - &mm->free_list[drm_buddy_block_order(block)]); + list_insert_sorted(mm, block); } static void mark_split(struct drm_buddy_block *block) @@ -387,20 +405,26 @@ err_undo: } static struct drm_buddy_block * -get_maxblock(struct list_head *head) +get_maxblock(struct drm_buddy *mm, unsigned int order) { struct drm_buddy_block *max_block = NULL, *node; + unsigned int i; - max_block = list_first_entry_or_null(head, - struct drm_buddy_block, - link); - if (!max_block) - return NULL; + for (i = order; i <= mm->max_order; ++i) { + if (!list_empty(&mm->free_list[i])) { + node = list_last_entry(&mm->free_list[i], + struct drm_buddy_block, + link); + if (!max_block) { + max_block = node; + continue; + } - list_for_each_entry(node, head, link) { - if (drm_buddy_block_offset(node) > - drm_buddy_block_offset(max_block)) - max_block = node; + if (drm_buddy_block_offset(node) > + drm_buddy_block_offset(max_block)) { + max_block = node; + } + } } return max_block; @@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm, unsigned long flags) { struct drm_buddy_block *block = NULL; - unsigned int i; + unsigned int tmp; int err; - for (i = order; i <= mm->max_order; ++i) { - if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { - block = get_maxblock(&mm->free_list[i]); - if (block) - break; - } else { - block = list_first_entry_or_null(&mm->free_list[i], - struct drm_buddy_block, - link); - if (block) - break; + if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { + block = get_maxblock(mm, order); + if (block) + /* Store the obtained block order */ + tmp = drm_buddy_block_order(block); + } else { + for (tmp = order; tmp <= mm->max_order; ++tmp) { + if (!list_empty(&mm->free_list[tmp])) { + block = list_last_entry(&mm->free_list[tmp], + struct drm_buddy_block, + link); + if (block) + break; + } } } @@ -434,18 +461,18 @@ alloc_from_freelist(struct drm_buddy *mm, BUG_ON(!drm_buddy_block_is_free(block)); - while (i != order) { + while (tmp != order) { err = split_block(mm, block); if (unlikely(err)) goto err_undo; block = block->right; - i--; + tmp--; } return block; err_undo: - if (i != order) + if (tmp != order) __drm_buddy_free(mm, block); return ERR_PTR(err); } From 4009502c091c1543ae8708a12d1a97583ae411ac Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 12 Dec 2022 17:13:38 +0100 Subject: [PATCH 206/543] drm/i915/gt: Reset twice commit d3de5616d36462a646f5b360ba82d3b09ff668eb upstream. After applying an engine reset, on some platforms like Jasperlake, we occasionally detect that the engine state is not cleared until shortly after the resume. As we try to resume the engine with volatile internal state, the first request fails with a spurious CS event (it looks like it reports a lite-restore to the hung context, instead of the expected idle->active context switch). Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Cc: Mika Kuoppala Signed-off-by: Andi Shyti Reviewed-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi.shyti@linux.intel.com (cherry picked from commit 3db9d590557da3aa2c952f2fecd3e9b703dad790) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_reset.c | 34 ++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b36674356986..10b930eaa8cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -278,6 +278,7 @@ out: static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) { struct intel_uncore *uncore = gt->uncore; + int loops = 2; int err; /* @@ -285,18 +286,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) * for fifo space for the write or forcewake the chip for * the read */ - intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); + do { + intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); - /* Wait for the device to ack the reset requests */ - err = __intel_wait_for_register_fw(uncore, - GEN6_GDRST, hw_domain_mask, 0, - 500, 0, - NULL); + /* + * Wait for the device to ack the reset requests. + * + * On some platforms, e.g. Jasperlake, we see that the + * engine register state is not cleared until shortly after + * GDRST reports completion, causing a failure as we try + * to immediately resume while the internal state is still + * in flux. If we immediately repeat the reset, the second + * reset appears to serialise with the first, and since + * it is a no-op, the registers should retain their reset + * value. However, there is still a concern that upon + * leaving the second reset, the internal engine state + * is still in flux and not ready for resuming. + */ + err = __intel_wait_for_register_fw(uncore, GEN6_GDRST, + hw_domain_mask, 0, + 2000, 0, + NULL); + } while (err == 0 && --loops); if (err) GT_TRACE(gt, "Wait for 0x%08x engines reset failed\n", hw_domain_mask); + /* + * As we have observed that the engine state is still volatile + * after GDRST is acked, impose a small delay to let everything settle. + */ + udelay(50); + return err; } From 8874730ecefe295931a681a0ae749cda53653078 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 23 Dec 2022 10:20:11 +0100 Subject: [PATCH 207/543] drm/i915: Reserve enough fence slot for i915_vma_unbind_async MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 476fdcdaaae7b06c780cdfc234c704107f16c529 upstream. A nested dma_resv_reserve_fences(1) will not reserve slot from the 2nd call onwards and folowing dma_resv_add_fence() might hit the "BUG_ON(fobj->num_fences >= fobj->max_fences)" check. I915 hit above nested dma_resv case in ttm_bo_handle_move_mem() with async unbind: dma_resv_reserve_fences() from --> ttm_bo_handle_move_mem() dma_resv_reserve_fences() from --> i915_vma_unbind_async() dma_resv_add_fence() from --> i915_vma_unbind_async() dma_resv_add_fence() from -->ttm_bo_move_accel_cleanup() Resolve this by adding an extra fence in i915_vma_unbind_async(). Suggested-by: Thomas Hellström Fixes: 2f6b90da9192 ("drm/i915: Use vma resources for async unbinding") Cc: # v5.18+ Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20221223092011.11657-1-nirmoy.das@intel.com (cherry picked from commit 4f0755c2faf7388616109717facc5bbde6850e60) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4d06875de14a..c8ad8f37e5cf 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -2114,7 +2114,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) if (!obj->mm.rsgt) return -EBUSY; - err = dma_resv_reserve_fences(obj->base.resv, 1); + err = dma_resv_reserve_fences(obj->base.resv, 2); if (err) return -EBUSY; From b696c627b3f56e173f7f70b8487d66da8ff22506 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 3 Jan 2023 15:49:46 -0800 Subject: [PATCH 208/543] drm/i915: Fix potential context UAFs commit afce71ff6daa9c0f852df0727fe32c6fb107f0fa upstream. gem_context_register() makes the context visible to userspace, and which point a separate thread can trigger the I915_GEM_CONTEXT_DESTROY ioctl. So we need to ensure that nothing uses the ctx ptr after this. And we need to ensure that adding the ctx to the xarray is the *last* thing that gem_context_register() does with the ctx pointer. Signed-off-by: Rob Clark Fixes: eb4dedae920a ("drm/i915/gem: Delay tracking the GEM context until it is registered") Fixes: a4c1cdd34e2c ("drm/i915/gem: Delay context creation (v3)") Fixes: 49bd54b390c2 ("drm/i915: Track all user contexts per client") Cc: # v5.10+ Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti [tursulin: Stable and fixes tags add/tidy.] Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230103234948.1218393-1-robdclark@gmail.com (cherry picked from commit bed4b455cf5374e68879be56971c1da563bcd90c) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 24 +++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 1e29b1e6d186..2353723ca1bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1688,6 +1688,10 @@ void i915_gem_init__contexts(struct drm_i915_private *i915) init_contexts(&i915->gem.contexts); } +/* + * Note that this implicitly consumes the ctx reference, by placing + * the ctx in the context_xa. + */ static void gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv, u32 id) @@ -1703,10 +1707,6 @@ static void gem_context_register(struct i915_gem_context *ctx, snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); - /* And finally expose ourselves to userspace via the idr */ - old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); - WARN_ON(old); - spin_lock(&ctx->client->ctx_lock); list_add_tail_rcu(&ctx->client_link, &ctx->client->ctx_list); spin_unlock(&ctx->client->ctx_lock); @@ -1714,6 +1714,10 @@ static void gem_context_register(struct i915_gem_context *ctx, spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); + + /* And finally expose ourselves to userspace via the idr */ + old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); + WARN_ON(old); } int i915_gem_context_open(struct drm_i915_private *i915, @@ -2199,14 +2203,22 @@ finalize_create_context_locked(struct drm_i915_file_private *file_priv, if (IS_ERR(ctx)) return ctx; + /* + * One for the xarray and one for the caller. We need to grab + * the reference *prior* to making the ctx visble to userspace + * in gem_context_register(), as at any point after that + * userspace can try to race us with another thread destroying + * the context under our feet. + */ + i915_gem_context_get(ctx); + gem_context_register(ctx, file_priv, id); old = xa_erase(&file_priv->proto_context_xa, id); GEM_BUG_ON(old != pc); proto_context_close(file_priv->dev_priv, pc); - /* One for the xarray and one for the caller */ - return i915_gem_context_get(ctx); + return ctx; } struct i915_gem_context * From dcfeba477b3e3df526e0f543b58fa71c045dff8b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 27 Dec 2022 15:49:17 -0600 Subject: [PATCH 209/543] drm/amd: Delay removal of the firmware framebuffer commit 1923bc5a56daeeabd7e9093bad2febcd6af2416a upstream. Removing the firmware framebuffer from the driver means that even if the driver doesn't support the IP blocks in a GPU it will no longer be functional after the driver fails to initialize. This change will ensure that unsupported IP blocks at least cause the driver to work with the EFI framebuffer. Cc: stable@vger.kernel.org Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0be85d19a6f3..8f83d5b6ceaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -89,6 +90,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_MAX_RETRY_LIMIT 2 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) +static const struct drm_driver amdgpu_kms_driver; + const char *amdgpu_asic_name[] = { "TAHITI", "PITCAIRN", @@ -3677,6 +3680,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* Get rid of things like offb */ + r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); + if (r) + return r; + /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b59466972ed7..2e5d78b6635c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -23,7 +23,6 @@ */ #include -#include #include #include #include @@ -2123,11 +2122,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, } #endif - /* Get rid of things like offb */ - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); - if (ret) - return ret; - adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); if (IS_ERR(adev)) return PTR_ERR(adev); From 9196eb7c52e55749a332974f0081f77d53d60199 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 6 Jan 2023 14:04:15 +0800 Subject: [PATCH 210/543] drm/amdgpu: Fixed bug on error when unloading amdgpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 99f1a36c90a7524972be5a028424c57fa17753ee upstream. Fixed bug on error when unloading amdgpu. The error message is as follows: [ 377.706202] kernel BUG at drivers/gpu/drm/drm_buddy.c:278! [ 377.706215] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 377.706222] CPU: 4 PID: 8610 Comm: modprobe Tainted: G IOE 6.0.0-thomas #1 [ 377.706231] Hardware name: ASUS System Product Name/PRIME Z390-A, BIOS 2004 11/02/2021 [ 377.706238] RIP: 0010:drm_buddy_free_block+0x26/0x30 [drm_buddy] [ 377.706264] Code: 00 00 00 90 0f 1f 44 00 00 48 8b 0e 89 c8 25 00 0c 00 00 3d 00 04 00 00 75 10 48 8b 47 18 48 d3 e0 48 01 47 28 e9 fa fe ff ff <0f> 0b 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 54 55 48 89 f5 53 [ 377.706282] RSP: 0018:ffffad2dc4683cb8 EFLAGS: 00010287 [ 377.706289] RAX: 0000000000000000 RBX: ffff8b1743bd5138 RCX: 0000000000000000 [ 377.706297] RDX: ffff8b1743bd5160 RSI: ffff8b1743bd5c78 RDI: ffff8b16d1b25f70 [ 377.706304] RBP: ffff8b1743bd59e0 R08: 0000000000000001 R09: 0000000000000001 [ 377.706311] R10: ffff8b16c8572400 R11: ffffad2dc4683cf0 R12: ffff8b16d1b25f70 [ 377.706318] R13: ffff8b16d1b25fd0 R14: ffff8b1743bd59c0 R15: ffff8b16d1b25f70 [ 377.706325] FS: 00007fec56c72c40(0000) GS:ffff8b1836500000(0000) knlGS:0000000000000000 [ 377.706334] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 377.706340] CR2: 00007f9b88c1ba50 CR3: 0000000110450004 CR4: 00000000003706e0 [ 377.706347] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 377.706354] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 377.706361] Call Trace: [ 377.706365] [ 377.706369] drm_buddy_free_list+0x2a/0x60 [drm_buddy] [ 377.706376] amdgpu_vram_mgr_fini+0xea/0x180 [amdgpu] [ 377.706572] amdgpu_ttm_fini+0x12e/0x1a0 [amdgpu] [ 377.706650] amdgpu_bo_fini+0x22/0x90 [amdgpu] [ 377.706727] gmc_v11_0_sw_fini+0x26/0x30 [amdgpu] [ 377.706821] amdgpu_device_fini_sw+0xa1/0x3c0 [amdgpu] [ 377.706897] amdgpu_driver_release_kms+0x12/0x30 [amdgpu] [ 377.706975] drm_dev_release+0x20/0x40 [drm] [ 377.707006] release_nodes+0x35/0xb0 [ 377.707014] devres_release_all+0x8b/0xc0 [ 377.707020] device_unbind_cleanup+0xe/0x70 [ 377.707027] device_release_driver_internal+0xee/0x160 [ 377.707033] driver_detach+0x44/0x90 [ 377.707039] bus_remove_driver+0x55/0xe0 [ 377.707045] pci_unregister_driver+0x3b/0x90 [ 377.707052] amdgpu_exit+0x11/0x6c [amdgpu] [ 377.707194] __x64_sys_delete_module+0x142/0x2b0 [ 377.707201] ? fpregs_assert_state_consistent+0x22/0x50 [ 377.707208] ? exit_to_user_mode_prepare+0x3e/0x190 [ 377.707215] do_syscall_64+0x38/0x90 [ 377.707221] entry_SYSCALL_64_after_hwframe+0x63/0xcd Signed-off-by: YiPeng Chai Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 80dd1343594c..75c80c557b6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -882,7 +882,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) kfree(rsv); list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->blocks); + drm_buddy_free_list(&mgr->mm, &rsv->allocated); kfree(rsv); } drm_buddy_fini(&mgr->mm); From b435f68e415ca5e17e3425893ea991720a43b595 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 16 Dec 2022 17:12:53 +0800 Subject: [PATCH 211/543] drm/amd/pm: correct the reference clock for fan speed(rpm) calculation commit 6fea87637bf36bd285227f490132e83582ab7513 upstream. Correct the reference clock as 25Mhz for SMU13 fan speed calculation. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x, 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 9b96902adad3..cfb7f4475c82 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1258,7 +1258,8 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed) { struct amdgpu_device *adev = smu->adev; - uint32_t tach_period, crystal_clock_freq; + uint32_t crystal_clock_freq = 2500; + uint32_t tach_period; int ret; if (!speed) @@ -1268,7 +1269,6 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu, if (ret) return ret; - crystal_clock_freq = amdgpu_asic_get_xclk(adev); tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); WREG32_SOC15(THM, 0, regCG_TACH_CTRL, REG_SET_FIELD(RREG32_SOC15(THM, 0, regCG_TACH_CTRL), From 363781f72d46c3762d7c0fd8fc13f27dce47f5cf Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 4 Jan 2023 10:45:01 +0800 Subject: [PATCH 212/543] drm/amd/pm: add the missing mapping for PPT feature on SMU13.0.0 and 13.0.7 commit 318ca20893c19ead02845a08204c3f9249bb74cd upstream. Then we are able to set a new ppt limit via the hwmon interface(power1_cap). Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x, 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index b8430601304f..be43de9dd496 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -189,6 +189,7 @@ static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(SOC_PCC), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, }; static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 222924363a68..31deec2ce4b3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -191,6 +191,7 @@ static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(SOC_PCC), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, }; static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = { From 69a75087cd1f10d5be07527f7ed7ed38db429d7e Mon Sep 17 00:00:00 2001 From: Ao Zhong Date: Tue, 25 Oct 2022 23:17:49 +0200 Subject: [PATCH 213/543] drm/amd/display: move remaining FPU code to dml folder commit 58ddbecb14c792b7fe0d92ae5e25c9179d62ff25 upstream. pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; these two operations in dcn32/dcn32_resource.c still need to use FPU, This will cause compilation to fail on ARM64 platforms because -mgeneral-regs-only is enabled by default to disable the hardware FPU. Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function in dcn32_fpu.c, and move above two operations into this function. Reviewed-by: Rodrigo Siqueira Signed-off-by: Ao Zhong Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++-- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++++++ drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 33ab6fdc3617..9919c39f7ea0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1919,8 +1919,9 @@ int dcn32_populate_dml_pipes_from_context( timing = &pipe->stream->timing; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; + DC_FP_START(); + dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index d1bf49d207de..d90216d2fe3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2546,3 +2546,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa } } +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + dc_assert_fp_enabled(); + + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 3a3dc2ce4c73..ab010e7e840b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, void dcn32_patch_dpm_table(struct clk_bw_params *bw_params); +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt); + #endif From f905e03c8ff65d80c24f42d8b93df3cec5a7ab12 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 13 Jan 2023 14:03:02 -0500 Subject: [PATCH 214/543] Revert "drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly"" This reverts commit 9ccd11718d76b95c69aa773f2abedef560776037 The original commit 16fb4dca95daa ("drm/amdgpu: getting fan speed pwm for vega10 properly") was reverted in commit 4545ae2ed3f2 ("drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly""). but the test that resulted in the revert was wrong and was fixed so the revert was reverted in commit 30b8e7b8ee3b ("Revert "drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly"""). That should have been the end of it, but then Sasha picked up the original revert again and it was committed as 9ccd11718d76. So drop that commit so we get back to where we need to be. Signed-off-by: Alex Deucher Cc: Sasha Levin Cc: stable@vger.kernel.org # 6.1.x Cc: Yury Zhuravlev Cc: Guchun Chen Cc: Asher Song Signed-off-by: Greg Kroah-Hartman --- .../amd/pm/powerplay/hwmgr/vega10_thermal.c | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c index dad3e3741a4e..190af79f3236 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c @@ -67,22 +67,21 @@ int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr, int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t *speed) { - uint32_t current_rpm; - uint32_t percent = 0; + struct amdgpu_device *adev = hwmgr->adev; + uint32_t duty100, duty; + uint64_t tmp64; - if (hwmgr->thermal_controller.fanInfo.bNoFan) - return 0; + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), + CG_FDO_CTRL1, FMAX_DUTY100); + duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS), + CG_THERMAL_STATUS, FDO_PWM_DUTY); - if (vega10_get_current_rpm(hwmgr, ¤t_rpm)) - return -1; + if (!duty100) + return -EINVAL; - if (hwmgr->thermal_controller. - advanceFanControlParameters.usMaxFanRPM != 0) - percent = current_rpm * 255 / - hwmgr->thermal_controller. - advanceFanControlParameters.usMaxFanRPM; - - *speed = MIN(percent, 255); + tmp64 = (uint64_t)duty * 255; + do_div(tmp64, duty100); + *speed = MIN((uint32_t)tmp64, 255); return 0; } From d4b7ff9edebc6f7498873493e5b50f50a2ba6d49 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 11 Jan 2023 12:37:58 +0100 Subject: [PATCH 215/543] cifs: Fix uninitialized memory read for smb311 posix symlink create commit a152d05ae4a71d802d50cf9177dba34e8bb09f68 upstream. If smb311 posix is enabled, we send the intended mode for file creation in the posix create context. Instead of using what's there on the stack, create the mfsymlink file with 0644. Fixes: ce558b0e17f8a ("smb3: Add posix create context for smb3.11 posix mounts") Cc: stable@vger.kernel.org Signed-off-by: Volker Lendecke Reviewed-by: Tom Talpey Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/link.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/link.c b/fs/cifs/link.c index bd374feeccaa..a5a097a69983 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -428,6 +428,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.disposition = FILE_CREATE; oparms.fid = &fid; oparms.reconnect = false; + oparms.mode = 0644; rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL, NULL); From 5a574327d1c5f7036af84ebe99cddba88f7d2038 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 6 Jan 2023 20:28:30 -0300 Subject: [PATCH 216/543] cifs: fix file info setting in cifs_query_path_info() commit 29cf28235e3e57e0af01ae29db57a75f87a2ada8 upstream. We missed to set file info when CIFSSMBQPathInfo() returned 0, thus leaving cifs_open_info_data::fi unset. Fix this by setting cifs_open_info_data::fi when either CIFSSMBQPathInfo() or SMBQueryInformation() succeed. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216881 Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb1ops.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 50480751e521..5fe2c2f8ef41 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -562,17 +562,20 @@ static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls, cifs_remap(cifs_sb)); - if (!rc) - move_cifs_info_to_smb2(&data->fi, &fi); *adjustTZ = true; } - if (!rc && (le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) { + if (!rc) { int tmprc; int oplock = 0; struct cifs_fid fid; struct cifs_open_parms oparms; + move_cifs_info_to_smb2(&data->fi, &fi); + + if (!(le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) + return 0; + oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; From d54a3ef8b8bdb9e12dc5ff97d44b9827416eed54 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 6 Jan 2023 13:34:36 -0300 Subject: [PATCH 217/543] cifs: fix file info setting in cifs_open_file() commit ba5d4c1596cada37793d405dd18d695cd3508902 upstream. In cifs_open_file(), @buf must hold a pointer to a cifs_open_info_data structure which is passed by cifs_nt_open(), so assigning @buf directly to @fi was obviously wrong. Fix this by passing a valid FILE_ALL_INFO structure to SMBLegacyOpen() and CIFS_open(), and then copy the set structure to the corresponding cifs_open_info_data::fi field with move_cifs_info_to_smb2() helper. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216889 Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb1ops.c | 54 ++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 5fe2c2f8ef41..4cb364454e13 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -719,17 +719,25 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, static int cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf) { - FILE_ALL_INFO *fi = buf; + struct cifs_open_info_data *data = buf; + FILE_ALL_INFO fi = {}; + int rc; if (!(oparms->tcon->ses->capabilities & CAP_NT_SMBS)) - return SMBLegacyOpen(xid, oparms->tcon, oparms->path, - oparms->disposition, - oparms->desired_access, - oparms->create_options, - &oparms->fid->netfid, oplock, fi, - oparms->cifs_sb->local_nls, - cifs_remap(oparms->cifs_sb)); - return CIFS_open(xid, oparms, oplock, fi); + rc = SMBLegacyOpen(xid, oparms->tcon, oparms->path, + oparms->disposition, + oparms->desired_access, + oparms->create_options, + &oparms->fid->netfid, oplock, &fi, + oparms->cifs_sb->local_nls, + cifs_remap(oparms->cifs_sb)); + else + rc = CIFS_open(xid, oparms, oplock, &fi); + + if (!rc && data) + move_cifs_info_to_smb2(&data->fi, &fi); + + return rc; } static void @@ -1053,7 +1061,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; int rc = -EPERM; - FILE_ALL_INFO *buf = NULL; + struct cifs_open_info_data buf = {}; struct cifs_io_parms io_parms; __u32 oplock = 0; struct cifs_fid fid; @@ -1085,14 +1093,14 @@ cifs_make_node(unsigned int xid, struct inode *inode, cifs_sb->local_nls, cifs_remap(cifs_sb)); if (rc) - goto out; + return rc; rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (rc == 0) d_instantiate(dentry, newinode); - goto out; + return rc; } /* @@ -1100,19 +1108,13 @@ cifs_make_node(unsigned int xid, struct inode *inode, * support block and char device (no socket & fifo) */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) - goto out; + return rc; if (!S_ISCHR(mode) && !S_ISBLK(mode)) - goto out; + return rc; cifs_dbg(FYI, "sfu compat create special file\n"); - buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); - if (buf == NULL) { - rc = -ENOMEM; - goto out; - } - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; @@ -1127,21 +1129,21 @@ cifs_make_node(unsigned int xid, struct inode *inode, oplock = REQ_OPLOCK; else oplock = 0; - rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf); + rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf); if (rc) - goto out; + return rc; /* * BB Do not bother to decode buf since no local inode yet to put * timestamps in, but we can reuse it safely. */ - pdev = (struct win_dev *)buf; + pdev = (struct win_dev *)&buf.fi; io_parms.pid = current->tgid; io_parms.tcon = tcon; io_parms.offset = 0; io_parms.length = sizeof(struct win_dev); - iov[1].iov_base = buf; + iov[1].iov_base = &buf.fi; iov[1].iov_len = sizeof(struct win_dev); if (S_ISCHR(mode)) { memcpy(pdev->type, "IntxCHR", 8); @@ -1160,8 +1162,8 @@ cifs_make_node(unsigned int xid, struct inode *inode, d_drop(dentry); /* FIXME: add code here to set EAs */ -out: - kfree(buf); + + cifs_free_open_info(&buf); return rc; } From 888c060bc327e6a231304aefc0f30e97b1640338 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 10 Jan 2023 19:23:21 -0300 Subject: [PATCH 218/543] cifs: do not query ifaces on smb1 mounts commit 22aeb01db7080e18c6aeb4361cc2556c9887099a upstream. Users have reported the following error on every 600 seconds (SMB_INTERFACE_POLL_INTERVAL) when mounting SMB1 shares: CIFS: VFS: \\srv\share error -5 on ioctl to get interface list It's supported only by SMB2+, so do not query network interfaces on SMB1 mounts. Fixes: 6e1c1c08cdf3 ("cifs: periodically query network interfaces from server") Reviewed-by: Shyam Prasad N Reviewed-by: Tom Talpey Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7e7f712f97fd..fde1c371605a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2609,11 +2609,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) INIT_LIST_HEAD(&tcon->pending_opens); tcon->status = TID_GOOD; - /* schedule query interfaces poll */ INIT_DELAYED_WORK(&tcon->query_interfaces, smb2_query_server_interfaces); - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); + if (ses->server->dialect >= SMB30_PROT_ID && + (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + /* schedule query interfaces poll */ + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + } spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); From f0f326dc9794cc5820e0804d8702d5a10fb84aca Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 10 Jan 2023 17:55:20 -0300 Subject: [PATCH 219/543] cifs: fix double free on failed kerberos auth commit 39e8db3c860e2678ce5a7d74193925876507c9eb upstream. If session setup failed with kerberos auth, we ended up freeing cifs_ses::auth_key.response twice in SMB2_auth_kerberos() and sesInfoFree(). Fix this by zeroing out cifs_ses::auth_key.response after freeing it in SMB2_auth_kerberos(). Fixes: a4e430c8c8ba ("cifs: replace kfree() with kfree_sensitive() for sensitive data") Signed-off-by: Paulo Alcantara (SUSE) Acked-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a5695748a89b..4ac5b1bfaf78 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1479,8 +1479,11 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) out_put_spnego_key: key_invalidate(spnego_key); key_put(spnego_key); - if (rc) + if (rc) { kfree_sensitive(ses->auth_key.response); + ses->auth_key.response = NULL; + ses->auth_key.len = 0; + } out: sess_data->result = rc; sess_data->func = NULL; From 617401df46051dfb5dddfc74e06a2ffd93c8e376 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 10 Jan 2023 10:24:52 -0700 Subject: [PATCH 220/543] io_uring/fdinfo: include locked hash table in fdinfo output commit ea97cbebaf861d99c3e892275147e6fca6d2c1ca upstream. A previous commit split the hash table for polled requests into two parts, but didn't get the fdinfo output updated. This means that it's less useful for debugging, as we may think a given request is not pending poll. Fix this up by dumping the locked hash table contents too. Fixes: 9ca9fb24d5fe ("io_uring: mutex locked poll hashing") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/fdinfo.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 2e04850a657b..882bd56b01ed 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -170,12 +170,11 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, xa_for_each(&ctx->personalities, index, cred) io_uring_show_cred(m, index, cred); } - if (has_lock) - mutex_unlock(&ctx->uring_lock); seq_puts(m, "PollList:\n"); for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) { struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i]; + struct io_hash_bucket *hbl = &ctx->cancel_table_locked.hbs[i]; struct io_kiocb *req; spin_lock(&hb->lock); @@ -183,8 +182,17 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, seq_printf(m, " op=%d, task_works=%d\n", req->opcode, task_work_pending(req->task)); spin_unlock(&hb->lock); + + if (!has_lock) + continue; + hlist_for_each_entry(req, &hbl->list, hash_node) + seq_printf(m, " op=%d, task_works=%d\n", req->opcode, + task_work_pending(req->task)); } + if (has_lock) + mutex_unlock(&ctx->uring_lock); + seq_puts(m, "CqOverflowList:\n"); spin_lock(&ctx->completion_lock); list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { From 462ed8a2bebae3d7a039187d8d7a06094a43ff2e Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Thu, 29 Dec 2022 16:03:53 +0800 Subject: [PATCH 221/543] ASoC: rt9120: Make dev PM runtime bind AsoC component PM commit 7161bd540eebebae2bbe8c79de25d8caf12dbf78 upstream. RT9120 uses PM runtime autosuspend to decrease the frequently on/off spent time. This exists one case, when pcm is closed and dev PM is waiting for autosuspend time expired to enter runtime suspend state. At the mean time, system is going to enter suspend, dev PM runtime suspend won't be called. It makes the rt9120 suspend consumption current not as expected. This patch can fix the rt9120 dev PM issue during runtime autosuspend and system suspend by binding dev PM runtime and ASoC component PM. Fixes: 80b949f332e3 ("ASoC: rt9120: Use pm_runtime and regcache to optimize 'pwdnn' logic") Signed-off-by: ChiYuan Huang Link: https://lore.kernel.org/r/1672301033-3675-1-git-send-email-u0084500@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt9120.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/codecs/rt9120.c b/sound/soc/codecs/rt9120.c index 644300e88b4c..fcf4fbaed3c7 100644 --- a/sound/soc/codecs/rt9120.c +++ b/sound/soc/codecs/rt9120.c @@ -177,8 +177,20 @@ static int rt9120_codec_probe(struct snd_soc_component *comp) return 0; } +static int rt9120_codec_suspend(struct snd_soc_component *comp) +{ + return pm_runtime_force_suspend(comp->dev); +} + +static int rt9120_codec_resume(struct snd_soc_component *comp) +{ + return pm_runtime_force_resume(comp->dev); +} + static const struct snd_soc_component_driver rt9120_component_driver = { .probe = rt9120_codec_probe, + .suspend = rt9120_codec_suspend, + .resume = rt9120_codec_resume, .controls = rt9120_snd_controls, .num_controls = ARRAY_SIZE(rt9120_snd_controls), .dapm_widgets = rt9120_dapm_widgets, From 1f4a9408024e268fe0519bfa3d2ed8c86ab1d4a4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Jan 2023 20:18:11 +0100 Subject: [PATCH 222/543] ACPI: video: Allow selecting NVidia-WMI-EC or Apple GMUX backlight from the cmdline commit 420a1116aef0e8e12c305508f45ce73e5ae30a09 upstream. The patches adding NVidia-WMI-EC and Apple GMUX backlight detection support to acpi_video_get_backlight_type(), forgot to update acpi_video_parse_cmdline() to allow manually selecting these from the commandline. Add support for these to acpi_video_parse_cmdline(). Fixes: fe7aebb40d42 ("ACPI: video: Add Nvidia WMI EC brightness control detection (v3)") Fixes: 21245df307cb ("ACPI: video: Add Apple GMUX brightness control detection") Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 76b7e7f8894e..1db8e68cd8bc 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -50,6 +50,10 @@ static void acpi_video_parse_cmdline(void) acpi_backlight_cmdline = acpi_backlight_video; if (!strcmp("native", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_native; + if (!strcmp("nvidia_wmi_ec", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_nvidia_wmi_ec; + if (!strcmp("apple_gmux", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_apple_gmux; if (!strcmp("none", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_none; } From 3a79bad876164ac34f566c66074503124adf9336 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Dec 2022 23:07:24 +0100 Subject: [PATCH 223/543] platform/x86: dell-privacy: Only register SW_CAMERA_LENS_COVER if present commit 6dc485f9940df8105ea729cbeb7a7d18d409dde5 upstream. Unlike keys where userspace only reacts to keypresses, userspace may act on switches in both (0 and 1) of their positions. For example if a SW_TABLET_MODE switch is registered then GNOME will not automatically show the onscreen keyboard when a text field gets focus on touchscreen devices when SW_TABLET_MODE reports 0 and when SW_TABLET_MODE reports 1 libinput will block (filter out) builtin keyboard and touchpad events. So to avoid unwanted side-effects EV_SW type inputs should only be registered if they are actually present, only register SW_CAMERA_LENS_COVER if it is actually there. Fixes: 8af9fa37b8a3 ("platform/x86: dell-privacy: Add support for Dell hardware privacy") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221221220724.119594-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/dell/dell-wmi-privacy.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c index c82b3d6867c5..9fbd974b723e 100644 --- a/drivers/platform/x86/dell/dell-wmi-privacy.c +++ b/drivers/platform/x86/dell/dell-wmi-privacy.c @@ -292,7 +292,7 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) { struct privacy_wmi_data *priv; struct key_entry *keymap; - int ret, i; + int ret, i, j; ret = wmi_has_guid(DELL_PRIVACY_GUID); if (!ret) @@ -318,9 +318,20 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) /* remap the keymap code with Dell privacy key type 0x12 as prefix * KEY_MICMUTE scancode will be reported as 0x120001 */ - for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) { - keymap[i] = dell_wmi_keymap_type_0012[i]; - keymap[i].code |= (0x0012 << 16); + for (i = 0, j = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) { + /* + * Unlike keys where only presses matter, userspace may act + * on switches in both of their positions. Only register + * SW_CAMERA_LENS_COVER if it is actually there. + */ + if (dell_wmi_keymap_type_0012[i].type == KE_VSW && + dell_wmi_keymap_type_0012[i].sw.code == SW_CAMERA_LENS_COVER && + !(priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA))) + continue; + + keymap[j] = dell_wmi_keymap_type_0012[i]; + keymap[j].code |= (0x0012 << 16); + j++; } ret = sparse_keymap_setup(priv->input_dev, keymap, NULL); kfree(keymap); From 7221c2a7be6480cbaf2eb3012e564fe07beb55ad Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 2 Dec 2022 23:33:19 +0100 Subject: [PATCH 224/543] platform/surface: aggregator: Ignore command messages not intended for us commit ae0fa0a3126a86c801c3220fcd8eefe03aa39f3e upstream. It is possible that we (the host/kernel driver) receive command messages that are not intended for us. Ignore those for now. The whole story is a bit more complicated: It is possible to enable debug output on SAM, which is sent via SSH command messages. By default this output is sent to a debug connector, with its own target ID (TID=0x03). It is possible to override the target of the debug output and set it to the host/kernel driver. This, however, does not change the original target ID of the message. Meaning, we receive messages with TID=0x03 (debug) but expect to only receive messages with TID=0x00 (host). The problem is that the different target ID also comes with a different scope of request IDs. In particular, these do not follow the standard event rules (i.e. do not fall into a set of small reserved values). Therefore, current message handling interprets them as responses to pending requests and tries to match them up via the request ID. However, these debug output messages are not in fact responses, and therefore this will at best fail to find the request and at worst pass on the wrong data as response for a request. Therefore ignore any command messages not intended for us (host) for now. We can implement support for the debug messages once we have a better understanding of them. Note that this may also provide a bit more stability and avoid some driver confusion in case any other targets want to talk to us in the future, since we don't yet know what to do with those as well. A warning for the dropped messages should suffice for now and also give us a chance of discovering new targets if they come along without any potential for bugs/instabilities. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221202223327.690880-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- .../surface/aggregator/ssh_request_layer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c index f5565570f16c..69132976d297 100644 --- a/drivers/platform/surface/aggregator/ssh_request_layer.c +++ b/drivers/platform/surface/aggregator/ssh_request_layer.c @@ -916,6 +916,20 @@ static void ssh_rtl_rx_command(struct ssh_ptl *p, const struct ssam_span *data) if (sshp_parse_command(dev, data, &command, &command_data)) return; + /* + * Check if the message was intended for us. If not, drop it. + * + * Note: We will need to change this to handle debug messages. On newer + * generation devices, these seem to be sent to tid_out=0x03. We as + * host can still receive them as they can be forwarded via an override + * option on SAM, but doing so does not change tid_out=0x00. + */ + if (command->tid_out != 0x00) { + rtl_warn(rtl, "rtl: dropping message not intended for us (tid = %#04x)\n", + command->tid_out); + return; + } + if (ssh_rqid_is_event(get_unaligned_le16(&command->rqid))) ssh_rtl_rx_event(rtl, command, &command_data); else From c7acfd933d965cbeeb5f2493feea8d6dacde1917 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 11 Jan 2023 21:14:26 +0100 Subject: [PATCH 225/543] platform/x86: int3472/discrete: Ensure the clk/power enable pins are in output mode commit cf5ac2d45f6e4d11ad78e7b10ae9a4121ba5e995 upstream. acpi_get_and_request_gpiod() does not take a gpio_lookup_flags argument specifying that the pins direction should be initialized to a specific value. This means that in some cases the pins might be left in input mode, causing the gpiod_set() calls made to enable the clk / regulator to not work. One example of this problem is the clk-enable GPIO for the ov01a1s sensor on a Dell Latitude 9420 being left in input mode causing the clk to never get enabled. Explicitly set the direction of the pins to output to fix this. Fixes: 5de691bffe57 ("platform/x86: Add intel_skl_int3472 driver") Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Reviewed-by: Daniel Scally Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20230111201426.947853-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel/int3472/clk_and_regulator.c | 3 +++ drivers/platform/x86/intel/int3472/discrete.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c index b2342b3d78c7..74dc2cff799e 100644 --- a/drivers/platform/x86/intel/int3472/clk_and_regulator.c +++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c @@ -181,6 +181,9 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472, return PTR_ERR(int3472->regulator.gpio); } + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->regulator.gpio, 0); + cfg.dev = &int3472->adev->dev; cfg.init_data = &init_data; cfg.ena_gpiod = int3472->regulator.gpio; diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 974a132db651..c42c3faa2c32 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -168,6 +168,8 @@ static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472, return (PTR_ERR(gpio)); int3472->clock.ena_gpio = gpio; + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->clock.ena_gpio, 0); break; case INT3472_GPIO_TYPE_PRIVACY_LED: gpio = acpi_get_and_request_gpiod(path, pin, "int3472,privacy-led"); @@ -175,6 +177,8 @@ static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472, return (PTR_ERR(gpio)); int3472->clock.led_gpio = gpio; + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->clock.led_gpio, 0); break; default: dev_err(int3472->dev, "Invalid GPIO type 0x%02x for clock\n", type); From 127bf1d6bfe3add22136c9dd45f1b7ccf4442bc4 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Thu, 12 Jan 2023 17:12:28 -0500 Subject: [PATCH 226/543] platform/x86: thinkpad_acpi: Fix profile mode display in AMT mode commit fde5f74ccfc771941b018b5415fa9664426e10ad upstream. Recently AMT mode was enabled (somewhat unexpectedly) on the Lenovo Z13 platform. The FW is advertising it is available and the driver tries to use it - unfortunately it reports the profile mode incorrectly. Note, there is also some extra work needed to enable the dynamic aspect of AMT support that I will be following up with; but more testing is needed first. This patch just fixes things so the profiles are reported correctly. Link: https://gitlab.freedesktop.org/hadess/power-profiles-daemon/-/issues/115 Fixes: 46dcbc61b739 ("platform/x86: thinkpad-acpi: Add support for automatic mode transitions") Reviewed-by: Mario Limonciello Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230112221228.490946-1-mpearson-lenovo@squebb.ca Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/thinkpad_acpi.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index a1d91736a03b..4e95d2243161 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10315,9 +10315,11 @@ static DEFINE_MUTEX(dytc_mutex); static int dytc_capabilities; static bool dytc_mmc_get_available; -static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *profile) +static int convert_dytc_to_profile(int funcmode, int dytcmode, + enum platform_profile_option *profile) { - if (dytc_capabilities & BIT(DYTC_FC_MMC)) { + switch (funcmode) { + case DYTC_FUNCTION_MMC: switch (dytcmode) { case DYTC_MODE_MMC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10333,8 +10335,7 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p return -EINVAL; } return 0; - } - if (dytc_capabilities & BIT(DYTC_FC_PSC)) { + case DYTC_FUNCTION_PSC: switch (dytcmode) { case DYTC_MODE_PSC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10348,6 +10349,14 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p default: /* Unknown mode */ return -EINVAL; } + return 0; + case DYTC_FUNCTION_AMT: + /* For now return balanced. It's the closest we have to 'auto' */ + *profile = PLATFORM_PROFILE_BALANCED; + return 0; + default: + /* Unknown function */ + return -EOPNOTSUPP; } return 0; } @@ -10496,6 +10505,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output); if (err) goto unlock; + /* system supports AMT, activate it when on balanced */ if (dytc_capabilities & BIT(DYTC_FC_AMT)) dytc_control_amt(profile == PLATFORM_PROFILE_BALANCED); @@ -10511,7 +10521,7 @@ static void dytc_profile_refresh(void) { enum platform_profile_option profile; int output, err = 0; - int perfmode; + int perfmode, funcmode; mutex_lock(&dytc_mutex); if (dytc_capabilities & BIT(DYTC_FC_MMC)) { @@ -10526,8 +10536,9 @@ static void dytc_profile_refresh(void) if (err) return; + funcmode = (output >> DYTC_GET_FUNCTION_BIT) & 0xF; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(perfmode, &profile); + convert_dytc_to_profile(funcmode, perfmode, &profile); if (profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); From 0e082105895c448ea8b20daf881c3e2759ed7e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 21 Dec 2022 17:59:51 +0000 Subject: [PATCH 227/543] platform/x86: asus-wmi: Don't load fan curves without fan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 01fd7e7851ba2275662f771ee17d1f80e7bbfa52 upstream. If we do not have a fan it does not make sense to load curves for it. This removes the following warnings from the kernel log: asus_wmi: fan_curve_get_factory_default (0x00110024) failed: -19 asus_wmi: fan_curve_get_factory_default (0x00110025) failed: -19 Fixes: a2bdf10ce96e ("platform/x86: asus-wmi: Increase FAN_CURVE_BUF_LEN to 32") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20221221-asus-fan-v1-3-e07f3949725b@weissschuh.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/asus-wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 872efc1d5b36..f051b21653d6 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2436,6 +2436,9 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available, *available = false; + if (asus->fan_type == FAN_TYPE_NONE) + return 0; + err = fan_curve_get_factory_default(asus, fan_dev); if (err) { return 0; From dfa4ff480fe6d7b401a8d2f707b0228c4664de13 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Dec 2022 23:07:23 +0100 Subject: [PATCH 228/543] platform/x86: dell-privacy: Fix SW_CAMERA_LENS_COVER reporting commit 1af7fef0d9d3fa075bf4e850f705df1fe97d33ce upstream. Use KE_VSW instead of KE_SW for the SW_CAMERA_LENS_COVER key_entry and get the value of the switch from the status field when handling SW_CAMERA_LENS_COVER events, instead of always reporting 0. Also correctly set the initial SW_CAMERA_LENS_COVER value. Fixes: 8af9fa37b8a3 ("platform/x86: dell-privacy: Add support for Dell hardware privacy") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221221220724.119594-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/dell/dell-wmi-privacy.c | 22 ++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c index 9fbd974b723e..c517bd45dd32 100644 --- a/drivers/platform/x86/dell/dell-wmi-privacy.c +++ b/drivers/platform/x86/dell/dell-wmi-privacy.c @@ -61,7 +61,7 @@ static const struct key_entry dell_wmi_keymap_type_0012[] = { /* privacy mic mute */ { KE_KEY, 0x0001, { KEY_MICMUTE } }, /* privacy camera mute */ - { KE_SW, 0x0002, { SW_CAMERA_LENS_COVER } }, + { KE_VSW, 0x0002, { SW_CAMERA_LENS_COVER } }, { KE_END, 0}, }; @@ -115,11 +115,15 @@ bool dell_privacy_process_event(int type, int code, int status) switch (code) { case DELL_PRIVACY_AUDIO_EVENT: /* Mic mute */ - case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */ priv->last_status = status; sparse_keymap_report_entry(priv->input_dev, key, 1, true); ret = true; break; + case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */ + priv->last_status = status; + sparse_keymap_report_entry(priv->input_dev, key, !(status & CAMERA_STATUS), false); + ret = true; + break; default: dev_dbg(&priv->wdev->dev, "unknown event type 0x%04x 0x%04x\n", type, code); } @@ -304,6 +308,11 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) dev_set_drvdata(&wdev->dev, priv); priv->wdev = wdev; + + ret = get_current_status(priv->wdev); + if (ret) + return ret; + /* create evdev passing interface */ priv->input_dev = devm_input_allocate_device(&wdev->dev); if (!priv->input_dev) @@ -342,11 +351,12 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) priv->input_dev->name = "Dell Privacy Driver"; priv->input_dev->id.bustype = BUS_HOST; - ret = input_register_device(priv->input_dev); - if (ret) - return ret; + /* Report initial camera-cover status */ + if (priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA)) + input_report_switch(priv->input_dev, SW_CAMERA_LENS_COVER, + !(priv->last_status & CAMERA_STATUS)); - ret = get_current_status(priv->wdev); + ret = input_register_device(priv->input_dev); if (ret) return ret; From 96ec2f82aea0b2e51b10a58b2bedd2ac3ffac5ac Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 23 Dec 2022 02:10:08 +0000 Subject: [PATCH 229/543] dt-bindings: msm: dsi-controller-main: Fix operating-points-v2 constraint commit cdf64343f91a1225e9e3d4ce4261962cd41b4ddd upstream. The existing msm8916.dtsi does not depend on nor require operating points. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/515940/ Link: https://lore.kernel.org/r/20221223021025.1646636-2-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/display/msm/dsi-controller-main.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index 3b609c19e0bc..62849a4f64c6 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -135,7 +135,6 @@ required: - assigned-clocks - assigned-clock-parents - power-domains - - operating-points-v2 - ports additionalProperties: false From b107b08c41b3076a508113fbaaffe15ce1fe7f65 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 5 Jan 2023 03:47:43 +0200 Subject: [PATCH 230/543] drm/msm: another fix for the headless Adreno GPU commit 00dd060ab3cf95ca6ede7853bc14397014971b5e upstream. Fix another oops reproducible when rebooting the board with the Adreno GPU working in the headless mode (e.g. iMX platforms). Unable to handle kernel NULL pointer dereference at virtual address 00000000 when read [00000000] *pgd=74936831, *pte=00000000, *ppte=00000000 Internal error: Oops: 17 [#1] ARM CPU: 0 PID: 51 Comm: reboot Not tainted 6.2.0-rc1-dirty #11 Hardware name: Freescale i.MX53 (Device Tree Support) PC is at msm_atomic_commit_tail+0x50/0x970 LR is at commit_tail+0x9c/0x188 pc : [] lr : [] psr: 600e0013 sp : e0851d30 ip : ee4eb7eb fp : 00090acc r10: 00000058 r9 : c2193014 r8 : c4310000 r7 : c4759380 r6 : 07bef61d r5 : 00000000 r4 : 00000000 r3 : c44cc440 r2 : 00000000 r1 : 00000000 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 74910019 DAC: 00000051 Register r0 information: NULL pointer Register r1 information: NULL pointer Register r2 information: NULL pointer Register r3 information: slab kmalloc-1k start c44cc400 pointer offset 64 size 1024 Register r4 information: NULL pointer Register r5 information: NULL pointer Register r6 information: non-paged memory Register r7 information: slab kmalloc-128 start c4759380 pointer offset 0 size 128 Register r8 information: slab kmalloc-2k start c4310000 pointer offset 0 size 2048 Register r9 information: non-slab/vmalloc memory Register r10 information: non-paged memory Register r11 information: non-paged memory Register r12 information: non-paged memory Process reboot (pid: 51, stack limit = 0xc80046d9) Stack: (0xe0851d30 to 0xe0852000) 1d20: c4759380 fbd77200 000005ff 002b9c70 1d40: c4759380 c4759380 00000000 07bef61d 00000600 c0d6fe7c c2193014 00000058 1d60: 00090acc c067a214 00000000 c4759380 c4310000 00000000 c44cc854 c067a89c 1d80: 00000000 00000000 00000000 c4310468 00000000 c4759380 c4310000 c4310468 1da0: c4310470 c0643258 c4759380 00000000 00000000 c0c4ee24 00000000 c44cc810 1dc0: 00000000 c0c4ee24 00000000 c44cc810 00000000 0347d2a8 e0851e00 e0851e00 1de0: c4759380 c067ad20 c4310000 00000000 c44cc810 c27f8718 c44cc854 c067adb8 1e00: c4933000 00000002 00000001 00000000 00000000 c2130850 00000000 c2130854 1e20: c25fc488 00000000 c0ff162c 00000000 00000001 00000002 00000000 00000000 1e40: c43102c0 c43102c0 00000000 0347d2a8 c44cc810 c44cc814 c2133da8 c06d1a60 1e60: 00000000 00000000 00079028 c2012f24 fee1dead c4933000 00000058 c01431e4 1e80: 01234567 c0143a20 00000000 00000000 00000000 00000000 00000000 00000000 1ea0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1ec0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1ee0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f00: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f20: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f40: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f80: 00000000 00000000 00000000 0347d2a8 00000002 00000004 00000078 00000058 1fa0: c010028c c0100060 00000002 00000004 fee1dead 28121969 01234567 00079028 1fc0: 00000002 00000004 00000078 00000058 0002fdc5 00000000 00000000 00090acc 1fe0: 00000058 becc9c64 b6e97e05 b6e0e5f6 600e0030 fee1dead 00000000 00000000 msm_atomic_commit_tail from commit_tail+0x9c/0x188 commit_tail from drm_atomic_helper_commit+0x160/0x188 drm_atomic_helper_commit from drm_atomic_commit+0xac/0xe0 drm_atomic_commit from drm_atomic_helper_disable_all+0x1b0/0x1c0 drm_atomic_helper_disable_all from drm_atomic_helper_shutdown+0x88/0x140 drm_atomic_helper_shutdown from device_shutdown+0x16c/0x240 device_shutdown from kernel_restart+0x38/0x90 kernel_restart from __do_sys_reboot+0x174/0x224 __do_sys_reboot from ret_fast_syscall+0x0/0x1c Exception stack(0xe0851fa8 to 0xe0851ff0) 1fa0: 00000002 00000004 fee1dead 28121969 01234567 00079028 1fc0: 00000002 00000004 00000078 00000058 0002fdc5 00000000 00000000 00090acc 1fe0: 00000058 becc9c64 b6e97e05 b6e0e5f6 Code: 15922088 1184421c e1500003 1afffff8 (e5953000) ---[ end trace 0000000000000000 ]--- Fixes: 0a58d2ae572a ("drm/msm: Make .remove and .shutdown HW shutdown consistent") Reported-by: kernel test robot Signed-off-by: Dmitry Baryshkov Reviewed-by: Rob Clark Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/516909/ Link: https://lore.kernel.org/r/20230105014743.1478110-1-dmitry.baryshkov@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/msm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 105b5b48e828..681c1b889b31 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -1271,7 +1271,7 @@ void msm_drv_shutdown(struct platform_device *pdev) * msm_drm_init, drm_dev->registered is used as an indicator that the * shutdown will be successful. */ - if (drm && drm->registered) + if (drm && drm->registered && priv->kms) drm_atomic_helper_shutdown(drm); } From a30aafcfba8f9ec2a9f58769c5cdc9a2f3816008 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Nov 2022 10:18:26 +0000 Subject: [PATCH 231/543] firmware/psci: Fix MEM_PROTECT_RANGE function numbers commit f3dc61cde80d48751999c4cb46daf3b2185e6895 upstream. PSCI v1.1 offers 32-bit and 64-bit variants of the MEM_PROTECT_RANGE call using function identifier 20. Fix the incorrect definitions of the MEM_PROTECT_CHECK_RANGE calls in the PSCI UAPI header. Cc: Dmitry Baryshkov Cc: Lorenzo Pieralisi Cc: Arnd Bergmann Fixes: 3137f2e60098 ("firmware/psci: Add debugfs support to ease debugging") Acked-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20221125101826.22404-1-will@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/psci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 3511095c2702..42a40ad3fb62 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -58,7 +58,7 @@ #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) #define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) -#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) +#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(20) #define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) #define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) @@ -67,7 +67,7 @@ #define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) -#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) +#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(20) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff From d0f52562b89155bde80d640ff4ef63bc713fac53 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 5 Jan 2023 09:08:34 +0000 Subject: [PATCH 232/543] firmware/psci: Don't register with debugfs if PSCI isn't available commit cef139299fd86098c6e3dbd389d1d0b2462d7710 upstream. Contrary to popular belief, PSCI is not a universal property of an ARM/arm64 system. There is a garden variety of systems out there that don't (or even cannot) implement it. I'm the first one deplore such a situation, but hey... On such systems, a "cat /sys/kernel/debug/psci" results in fireworks, as no invocation callback is registered. Check for the invoke_psci_fn and psci_ops.get_version pointers before registering with the debugfs subsystem, avoiding the issue altogether. Fixes: 3137f2e60098 ("firmware/psci: Add debugfs support to ease debugging") Reported-by: Hector Martin Signed-off-by: Marc Zyngier Cc: Dmitry Baryshkov Cc: Mark Brown Cc: Ulf Hansson Cc: Arnd Bergmann Cc: Mark Rutland Cc: Lorenzo Pieralisi Reviewed-by: Hector Martin Acked-by: Dmitry Baryshkov Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230105090834.630238-1-maz@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/psci/psci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index e7bcfca4159f..447ee4ea5c90 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -440,6 +440,9 @@ static const struct file_operations psci_debugfs_ops = { static int __init psci_debugfs_init(void) { + if (!invoke_psci_fn || !psci_ops.get_version) + return 0; + return PTR_ERR_OR_ZERO(debugfs_create_file("psci", 0444, NULL, NULL, &psci_debugfs_ops)); } From b72bd13bc50099e5e5f8a7a626a638b95da60548 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 2 Jan 2023 11:02:00 +0100 Subject: [PATCH 233/543] drm/msm/adreno: Make adreno quirks not overwrite each other commit 13ef096e342b00e30b95a90c6c13eee1f0bec4c5 upstream. So far the adreno quirks have all been assigned with an OR operator, which is problematic, because they were assigned consecutive integer values, which makes checking them with an AND operator kind of no bueno.. Switch to using BIT(n) so that only the quirks that the programmer chose are taken into account when evaluating info->quirks & ADRENO_QUIRK_... Fixes: 370063ee427a ("drm/msm/adreno: Add A540 support") Reviewed-by: Dmitry Baryshkov Reviewed-by: Marijn Suijten Reviewed-by: Rob Clark Signed-off-by: Konrad Dybcio Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/516456/ Link: https://lore.kernel.org/r/20230102100201.77286-1-konrad.dybcio@linaro.org Signed-off-by: Rob Clark Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index e7adc5c632d0..3d78efb066b1 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -29,11 +29,9 @@ enum { ADRENO_FW_MAX, }; -enum adreno_quirks { - ADRENO_QUIRK_TWO_PASS_USE_WFI = 1, - ADRENO_QUIRK_FAULT_DETECT_MASK = 2, - ADRENO_QUIRK_LMLOADKILL_DISABLE = 3, -}; +#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0) +#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(1) +#define ADRENO_QUIRK_LMLOADKILL_DISABLE BIT(2) struct adreno_rev { uint8_t core; @@ -65,7 +63,7 @@ struct adreno_info { const char *name; const char *fw[ADRENO_FW_MAX]; uint32_t gmem; - enum adreno_quirks quirks; + u64 quirks; struct msm_gpu *(*init)(struct drm_device *dev); const char *zapfw; u32 inactive_period; From 4fa6b43ba44bd26004b558549f7ad448a6255a64 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 Dec 2022 17:12:06 +0000 Subject: [PATCH 234/543] arm64/signal: Always allocate SVE signal frames on SME only systems commit f26cd7372160da2eba31061d7943348ab9f2c01d upstream. Currently we only allocate space for SVE signal frames on systems that support SVE, meaning that SME only systems do not allocate a signal frame for streaming mode SVE state. Change the check so space is allocated if either feature is supported. Fixes: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221223-arm64-fix-sme-only-v1-3-938d663f69e5@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 9ad911f1647c..5100893be6e4 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -729,7 +729,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } - if (system_supports_sve()) { + if (system_supports_sve() || system_supports_sme()) { unsigned int vq = 0; if (add_all || test_thread_flag(TIF_SVE) || From b5affe7e0b3a4e18cf98f86b9921f442c70c54a3 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 23 Dec 2022 02:10:09 +0000 Subject: [PATCH 235/543] dt-bindings: msm: dsi-controller-main: Fix power-domain constraint commit a6f033938beb31f893302a93f83ec0b6460c6cac upstream. power-domain is required for the sc7180 dispcc GDSC but not every qcom SoC has a similar dependency for example the apq8064. Most Qcom SoC's using mdss-dsi-ctrl seem to have the ability to power-collapse the MDP without collapsing DSI. For example the qcom vendor kernel commit for apq8084, msm8226, msm8916, msm8974. https://review.carbonrom.org/plugins/gitiles/CarbonROM/android_kernel_oneplus_msm8994/+/7b5c011a770daa2811778937ed646237a28a8694 "ARM: dts: msm: add mdss gdsc supply to dsi controller device It is possible for the DSI controller to be active when MDP is power collapsed. DSI controller needs to have it's own vote for mdss gdsc to ensure that gdsc remains on in such cases." This however doesn't appear to be the case for the apq8064 so we shouldn't be marking power-domain as required in yaml checks. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/515958/ Link: https://lore.kernel.org/r/20221223021025.1646636-3-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/display/msm/dsi-controller-main.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index 62849a4f64c6..ca6f71a923ac 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -134,7 +134,6 @@ required: - phy-names - assigned-clocks - assigned-clock-parents - - power-domains - ports additionalProperties: false From 4b855673cf3d699c1603ec07a87191caa9fd5aa9 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 23 Dec 2022 02:10:10 +0000 Subject: [PATCH 236/543] dt-bindings: msm: dsi-controller-main: Fix description of core clock commit 654ffe4b793b42ed6b5909daff0b91809916d94e upstream. There's a typo in describing the core clock as an 'escape' clock. The accurate description is 'core'. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/515938/ Link: https://lore.kernel.org/r/20221223021025.1646636-4-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/display/msm/dsi-controller-main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index ca6f71a923ac..6c5b4783812a 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -32,7 +32,7 @@ properties: - description: Display byte clock - description: Display byte interface clock - description: Display pixel clock - - description: Display escape clock + - description: Display core clock - description: Display AHB clock - description: Display AXI clock From 05458ee3ff82b31a4564836a84ef20d6c233dfc7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 Dec 2022 17:12:05 +0000 Subject: [PATCH 237/543] arm64/signal: Always accept SVE signal frames on SME only systems commit 7dde62f0687c8856b6c0660066c7ee83a6a6f033 upstream. Currently we reject an attempt to restore a SVE signal frame on a system with SME but not SVE supported. This means that it is not possible to disable streaming mode via signal return as this is configured via the flags in the SVE signal context. Instead accept the signal frame, we will require it to have a vector length of 0 specified and no payload since the task will have no SVE vector length configured. Fixes: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221223-arm64-fix-sme-only-v1-2-938d663f69e5@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/signal.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 5100893be6e4..43adbfa5ead7 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -280,7 +280,12 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) vl = task_get_sme_vl(current); } else { - if (!system_supports_sve()) + /* + * A SME only system use SVE for streaming mode so can + * have a SVE formatted context with a zero VL and no + * payload data. + */ + if (!system_supports_sve() && !system_supports_sme()) return -EINVAL; vl = task_get_sve_vl(current); From a006aaffd78ff70efcffa44e325633c0840b2bf2 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 22 Nov 2022 20:31:37 +0800 Subject: [PATCH 238/543] arm64/mm: add pud_user_exec() check in pud_user_accessible_page() commit 730a11f982e61aaef758ab552dfb7c30de79e99b upstream. Add check for the executable case in pud_user_accessible_page() too like what we did for pte and pmd. Fixes: 42b2547137f5 ("arm64/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK") Suggested-by: Will Deacon Signed-off-by: Liu Shixin Link: https://lore.kernel.org/r/20221122123137.429686-1-liushixin2@huawei.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index edf6625ce965..5582360209d5 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -682,7 +682,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) #define pud_valid(pud) pte_valid(pud_pte(pud)) #define pud_user(pud) pte_user(pud_pte(pud)) - +#define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { @@ -868,7 +868,7 @@ static inline bool pmd_user_accessible_page(pmd_t pmd) static inline bool pud_user_accessible_page(pud_t pud) { - return pud_leaf(pud) && pud_user(pud); + return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); } #endif From a9a6715272a7c0c51806f0cc686ad9f621ed2c12 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Thu, 29 Dec 2022 12:44:38 +0000 Subject: [PATCH 239/543] dt-bindings: msm: dsi-phy-28nm: Add missing qcom, dsi-phy-regulator-ldo-mode commit be79f805a1e1b95605c825f1c513bdd2c8b167ed upstream. Add in missing qcom,dsi-phy-regulator-ldo-mode to the 28nm DSI PHY. When converting from .txt to .yaml we missed this one. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/516205/ Link: https://lore.kernel.org/r/20221229124438.504770-2-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/display/msm/dsi-phy-28nm.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml index 3d8540a06fe2..2f1fd140c87d 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml @@ -34,6 +34,10 @@ properties: vddio-supply: description: Phandle to vdd-io regulator device node. + qcom,dsi-phy-regulator-ldo-mode: + type: boolean + description: Indicates if the LDO mode PHY regulator is wanted. + required: - compatible - reg From 06f8be16be51638c8373f148680d8ec0313c73e2 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 14 Dec 2022 21:59:43 +0800 Subject: [PATCH 240/543] arm64: ptrace: Use ARM64_SME to guard the SME register enumerations commit eb9a85261e297292c4cc44b628c1373c996cedc2 upstream. We currently guard REGSET_{SSVE, ZA} using ARM64_SVE for no good reason. Both enumerations would be pointless without ARM64_SME and create two empty entries in aarch64_regsets[] which would then become part of a process's native regset view (they should be ignored though). Switch to use ARM64_SME instead. Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers") Signed-off-by: Zenghui Yu Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20221214135943.379-1-yuzenghui@huawei.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c2fb5755bbec..92bc9a2d702c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1364,7 +1364,7 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_SVE REGSET_SVE, #endif -#ifdef CONFIG_ARM64_SVE +#ifdef CONFIG_ARM64_SME REGSET_SSVE, REGSET_ZA, #endif From 21e5eca0ac9046da9918a919bc92b7b5a78d27e7 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Mon, 21 Nov 2022 15:36:08 +0800 Subject: [PATCH 241/543] arm64/mm: fix incorrect file_map_count for invalid pmd commit 74c2f81054510d45b813548cb0a1c4ebf87cdd5f upstream. The page table check trigger BUG_ON() unexpectedly when split hugepage: ------------[ cut here ]------------ kernel BUG at mm/page_table_check.c:119! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 7 PID: 210 Comm: transhuge-stres Not tainted 6.1.0-rc3+ #748 Hardware name: linux,dummy-virt (DT) pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : page_table_check_set.isra.0+0x398/0x468 lr : page_table_check_set.isra.0+0x1c0/0x468 [...] Call trace: page_table_check_set.isra.0+0x398/0x468 __page_table_check_pte_set+0x160/0x1c0 __split_huge_pmd_locked+0x900/0x1648 __split_huge_pmd+0x28c/0x3b8 unmap_page_range+0x428/0x858 unmap_single_vma+0xf4/0x1c8 zap_page_range+0x2b0/0x410 madvise_vma_behavior+0xc44/0xe78 do_madvise+0x280/0x698 __arm64_sys_madvise+0x90/0xe8 invoke_syscall.constprop.0+0xdc/0x1d8 do_el0_svc+0xf4/0x3f8 el0_svc+0x58/0x120 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x19c/0x1a0 [...] On arm64, pmd_leaf() will return true even if the pmd is invalid due to pmd_present_invalid() check. So in pmdp_invalidate() the file_map_count will not only decrease once but also increase once. Then in set_pte_at(), the file_map_count increase again, and so trigger BUG_ON() unexpectedly. Add !pmd_present_invalid() check in pmd_user_accessible_page() to fix the problem. Fixes: 42b2547137f5 ("arm64/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK") Reported-by: Denys Vlasenko Signed-off-by: Liu Shixin Acked-by: Pasha Tatashin Acked-by: David Hildenbrand Reviewed-by: Kefeng Wang Acked-by: Will Deacon Link: https://lore.kernel.org/r/20221121073608.4183459-1-liushixin2@huawei.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5582360209d5..f1cfc44ef52f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -863,7 +863,7 @@ static inline bool pte_user_accessible_page(pte_t pte) static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) From 4b5d9c95b437e43c788d4fed9c5cc23764ea029d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 15 Dec 2022 16:43:57 +0100 Subject: [PATCH 242/543] platform/x86: ideapad-laptop: Add Legion 5 15ARH05 DMI id to set_fn_lock_led_list[] commit f4b7f8febd4d9b615fbec2a06bf352b9c3729b11 upstream. The Lenovo Legion 5 15ARH05 needs ideapad-laptop to call SALS_FNLOCK_ON / SALS_FNLOCK_OFF on Fn-lock state change to get the LED in the Fn key to correctly reflect the Fn-lock state. Add a DMI match for the Legion 5 15ARH05 to the set_fn_lock_led_list[] table for this. Fixes: 81a5603a0f50 ("platform/x86: ideapad-laptop: Fix interrupt storm on fn-lock toggle on some Yoga laptops") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221215154357.123876-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index fc3d47a75944..4e28c55f0ea5 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1615,6 +1615,12 @@ static const struct dmi_system_id set_fn_lock_led_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion R7000P2020H"), } }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion 5 15ARH05"), + } + }, {} }; From b7dcbca46db3c77fdb02c2a9d6239e5aa3b06a59 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 27 Dec 2022 18:16:24 -0800 Subject: [PATCH 243/543] drm/msm/dp: do not complete dp_aux_cmd_fifo_tx() if irq is not for aux transfer commit 1cba0d150fa102439114a91b3e215909efc9f169 upstream. There are 3 possible interrupt sources are handled by DP controller, HPDstatus, Controller state changes and Aux read/write transaction. At every irq, DP controller have to check isr status of every interrupt sources and service the interrupt if its isr status bits shows interrupts are pending. There is potential race condition may happen at current aux isr handler implementation since it is always complete dp_aux_cmd_fifo_tx() even irq is not for aux read or write transaction. This may cause aux read transaction return premature if host aux data read is in the middle of waiting for sink to complete transferring data to host while irq happen. This will cause host's receiving buffer contains unexpected data. This patch fixes this problem by checking aux isr and return immediately at aux isr handler if there are no any isr status bits set. Current there is a bug report regrading eDP edid corruption happen during system booting up. After lengthy debugging to found that VIDEO_READY interrupt was continuously firing during system booting up which cause dp_aux_isr() to complete dp_aux_cmd_fifo_tx() prematurely to retrieve data from aux hardware buffer which is not yet contains complete data transfer from sink. This cause edid corruption. Follows are the signature at kernel logs when problem happen, EDID has corrupt header panel-simple-dp-aux aux-aea0000.edp: Couldn't identify panel via EDID Changes in v2: -- do complete if (ret == IRQ_HANDLED) ay dp-aux_isr() -- add more commit text Changes in v3: -- add Stephen suggested -- dp_aux_isr() return IRQ_XXX back to caller -- dp_ctrl_isr() return IRQ_XXX back to caller Changes in v4: -- split into two patches Changes in v5: -- delete empty line between tags Changes in v6: -- remove extra "that" and fixed line more than 75 char at commit text Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Kuogee Hsieh Tested-by: Douglas Anderson Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/516121/ Link: https://lore.kernel.org/r/1672193785-11003-2-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/dp/dp_aux.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c index d030a93a08c3..cc3efed593aa 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -423,6 +423,10 @@ void dp_aux_isr(struct drm_dp_aux *dp_aux) isr = dp_catalog_aux_get_irq(aux->catalog); + /* no interrupts pending, return immediately */ + if (!isr) + return; + if (!aux->cmd_busy) return; From 3c0d5bb904a24e56eb27aa0e5fcd3fb34203c4f3 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 16 Nov 2022 17:32:18 +0100 Subject: [PATCH 244/543] dt-bindings: msm/dsi: Don't require vdds-supply on 10nm PHY commit ef11cb7a29c0e13031c968190ea8f86104e7fb6a upstream. On some SoCs (hello SM6350) vdds-supply is not wired to any smd-rpm or rpmh regulator, but instead powered by the VDD_MX/mx.lvl line, which is voted for in the DSI ctrl node. Signed-off-by: Konrad Dybcio Acked-by: Rob Herring Fixes: 8fc939e72ff8 ("dt-bindings: msm: dsi: add yaml schemas for DSI PHY bindings") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/511889/ Link: https://lore.kernel.org/r/20221116163218.42449-1-konrad.dybcio@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml index d9ad8b659f58..3ec466c3ab38 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml @@ -69,7 +69,6 @@ required: - compatible - reg - reg-names - - vdds-supply unevaluatedProperties: false From 0f21e225ff48a6110d3c4b1159e2f6816759705d Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 30 Nov 2022 14:58:07 +0100 Subject: [PATCH 245/543] dt-bindings: msm/dsi: Don't require vcca-supply on 14nm PHY commit a2117773c839a8439a3771e0c040b5c505b083a7 upstream. On some SoCs (hello SM6115) vcca-supply is not wired to any smd-rpm or rpmh regulator, but instead powered by the VDD_MX line, which is voted for in the DSI ctrl node. Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Fixes: 8fc939e72ff8 ("dt-bindings: msm: dsi: add yaml schemas for DSI PHY bindings") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/513555/ Link: https://lore.kernel.org/r/20221130135807.45028-1-konrad.dybcio@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml index 1342d74ecfe0..0a7b5f110d88 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml @@ -38,7 +38,6 @@ required: - compatible - reg - reg-names - - vcca-supply unevaluatedProperties: false From fb8534b7960fe4df3c1014fc2da4ac35a6605b54 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 13 Dec 2022 13:29:43 +0100 Subject: [PATCH 246/543] platform/x86: sony-laptop: Don't turn off 0x153 keyboard backlight during probe commit ad75bd85b1db69c97eefea07b375567821f6ef58 upstream. The 0x153 version of the kbd backlight control SNC handle has no separate address to probe if the backlight is there. This turns the probe call into a set keyboard backlight call with a value of 0 turning off the keyboard backlight. Skip probing when there is no separate probe address to avoid this. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1583752 Fixes: 800f20170dcf ("Keyboard backlight control for some Vaio Fit models") Signed-off-by: Hans de Goede Reviewed-by: Mattia Dongili Link: https://lore.kernel.org/r/20221213122943.11123-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/sony-laptop.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 765fcaba4d12..5ff5aaf92b56 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1888,14 +1888,21 @@ static int sony_nc_kbd_backlight_setup(struct platform_device *pd, break; } - ret = sony_call_snc_handle(handle, probe_base, &result); - if (ret) - return ret; + /* + * Only probe if there is a separate probe_base, otherwise the probe call + * is equivalent to __sony_nc_kbd_backlight_mode_set(0), resulting in + * the keyboard backlight being turned off. + */ + if (probe_base) { + ret = sony_call_snc_handle(handle, probe_base, &result); + if (ret) + return ret; - if ((handle == 0x0137 && !(result & 0x02)) || - !(result & 0x01)) { - dprintk("no backlight keyboard found\n"); - return 0; + if ((handle == 0x0137 && !(result & 0x02)) || + !(result & 0x01)) { + dprintk("no backlight keyboard found\n"); + return 0; + } } kbdbl_ctl = kzalloc(sizeof(*kbdbl_ctl), GFP_KERNEL); From c49996c6aa03590e4ef5add8772cb6068d99fd59 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 29 Nov 2022 09:57:48 +0800 Subject: [PATCH 247/543] ixgbe: fix pci device refcount leak commit b93fb4405fcb5112c5739c5349afb52ec7f15c07 upstream. As the comment of pci_get_domain_bus_and_slot() says, it returns a PCI device with refcount incremented, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). In ixgbe_get_first_secondary_devfn() and ixgbe_x550em_a_has_mii(), pci_dev_put() is called to avoid leak. Fixes: 8fa10ef01260 ("ixgbe: register a mdiobus") Signed-off-by: Yang Yingliang Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 24aa97f993ca..123dca9ce468 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -855,9 +855,11 @@ static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn) rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn); if (rp_pdev && rp_pdev->subordinate) { bus = rp_pdev->subordinate->number; + pci_dev_put(rp_pdev); return pci_get_domain_bus_and_slot(0, bus, 0); } + pci_dev_put(rp_pdev); return NULL; } @@ -874,6 +876,7 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) struct ixgbe_adapter *adapter = hw->back; struct pci_dev *pdev = adapter->pdev; struct pci_dev *func0_pdev; + bool has_mii = false; /* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices * are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0 @@ -884,15 +887,16 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0)); if (func0_pdev) { if (func0_pdev == pdev) - return true; - else - return false; + has_mii = true; + goto out; } func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0)); if (func0_pdev == pdev) - return true; + has_mii = true; - return false; +out: + pci_dev_put(func0_pdev); + return has_mii; } /** From 0afa5f0736584411771299074bbeca8c1f9706d4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jan 2023 08:59:06 +0800 Subject: [PATCH 248/543] ipv6: raw: Deduct extension header length in rawv6_push_pending_frames commit cb3e9864cdbe35ff6378966660edbcbac955fe17 upstream. The total cork length created by ip6_append_data includes extension headers, so we must exclude them when comparing them against the IPV6_CHECKSUM offset which does not include extension headers. Reported-by: Kyle Zeng Fixes: 357b40a18b04 ("[IPV6]: IPV6_CHECKSUM socket option can corrupt kernel memory") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/raw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 722de9dd0ff7..8ffeac745656 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -505,6 +505,7 @@ csum_copy_err: static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { + struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; @@ -522,6 +523,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; + opt = inet6_sk(sk)->cork.opt; + total_len -= opt ? opt->opt_flen : 0; + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); From 5e4194d89ffef609f980af4f5bc0e7eca03497e7 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Tue, 20 Dec 2022 09:32:46 +0300 Subject: [PATCH 249/543] iavf/iavf_main: actually log ->src mask when talking about it commit 6650c8e906ce58404bfdfceceeba7bd10d397d40 upstream. This fixes a copy-paste issue where dev_err would log the dst mask even though it is clearly talking about src. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: 0075fa0fadd0 ("i40evf: Add support to apply cloud filters") Signed-off-by: Daniil Tatianin Reviewed-by: Michal Swiatkowski Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/iavf/iavf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index f71e132ede09..260c55951c28 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3850,7 +3850,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, field_flags |= IAVF_CLOUD_FIELD_IIP; } else { dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", - be32_to_cpu(match.mask->dst)); + be32_to_cpu(match.mask->src)); return -EINVAL; } } From 5c855bcc730656c4b7d30aaddcd0eafc7003e112 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Sep 2022 16:26:51 -0700 Subject: [PATCH 250/543] drm/i915/gt: Cleanup partial engine discovery failures [ Upstream commit 78a033433a5ae4fee85511ee075bc9a48312c79e ] If we abort driver initialisation in the middle of gt/engine discovery, some engines will be fully setup and some not. Those incompletely setup engines only have 'engine->release == NULL' and so will leak any of the common objects allocated. v2: - Drop the destroy_pinned_context() helper for now. It's not really worth it with just a single callsite at the moment. (Janusz) Signed-off-by: Chris Wilson Cc: Janusz Krzysztofik Signed-off-by: Matt Roper Reviewed-by: Janusz Krzysztofik Link: https://patchwork.freedesktop.org/patch/msgid/20220915232654.3283095-2-matthew.d.roper@intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 83bfeb872bda..fcbccd8d244e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1343,8 +1343,13 @@ int intel_engines_init(struct intel_gt *gt) return err; err = setup(engine); - if (err) + if (err) { + intel_engine_cleanup_common(engine); return err; + } + + /* The backend should now be responsible for cleanup */ + GEM_BUG_ON(engine->release == NULL); err = engine_init_common(engine); if (err) From 9bd180b6f9d0a03f66b2813358483ba89234904b Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Mon, 5 Dec 2022 21:15:26 +0100 Subject: [PATCH 251/543] usb: ulpi: defer ulpi_register on ulpi_read_id timeout [ Upstream commit 8a7b31d545d3a15f0e6f5984ae16f0ca4fd76aac ] Since commit 0f0101719138 ("usb: dwc3: Don't switch OTG -> peripheral if extcon is present") Dual Role support on Intel Merrifield platform broke due to rearranging the call to dwc3_get_extcon(). It appears to be caused by ulpi_read_id() on the first test write failing with -ETIMEDOUT. Currently ulpi_read_id() expects to discover the phy via DT when the test write fails and returns 0 in that case, even if DT does not provide the phy. As a result usb probe completes without phy. Make ulpi_read_id() return -ETIMEDOUT to its user if the first test write fails. The user should then handle it appropriately. A follow up patch will make dwc3_core_init() set -EPROBE_DEFER in this case and bail out. Fixes: ef6a7bcfb01c ("usb: ulpi: Support device discovery via DT") Cc: stable@vger.kernel.org Acked-by: Heikki Krogerus Signed-off-by: Ferry Toth Link: https://lore.kernel.org/r/20221205201527.13525-2-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/common/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index d7c8461976ce..60e8174686a1 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -207,7 +207,7 @@ static int ulpi_read_id(struct ulpi *ulpi) /* Test the interface */ ret = ulpi_write(ulpi, ULPI_SCRATCH, 0xaa); if (ret < 0) - goto err; + return ret; ret = ulpi_read(ulpi, ULPI_SCRATCH); if (ret < 0) From e59d46eed1d930318f36a90138898f7fa7730389 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 8 Nov 2022 08:30:36 +0800 Subject: [PATCH 252/543] drm/amd/pm: enable mode1 reset on smu_v13_0_10 [ Upstream commit 60cfad329ab877cb62975ea78ed442c2496990ba ] enable mode1 reset and prioritize debug port on smu_v13_0_10 as a more reliable message processing v2 - move mode1 reset callback to smu_v13_0_0_ppt.c Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher Stable-dep-of: 1794f6a9535b ("drm/amd/pm: enable GPO dynamic control support for SMU13.0.0") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 4 ++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 53 ++++++++++++++++++- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 18 +++++++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 3 ++ 5 files changed, 77 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 8b297ade69a2..f1913d879811 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -322,6 +322,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): + case IP_VERSION(13, 0, 10): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): return AMD_RESET_METHOD_MODE2; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index f816b1dd110e..44bbf17e4bef 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -568,6 +568,10 @@ struct smu_context u32 param_reg; u32 msg_reg; u32 resp_reg; + + u32 debug_param_reg; + u32 debug_msg_reg; + u32 debug_resp_reg; }; struct i2c_adapter; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index be43de9dd496..73bae7eaefa2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -70,6 +70,26 @@ #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000 +#define mmMP1_SMN_C2PMSG_66 0x0282 +#define mmMP1_SMN_C2PMSG_66_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_82 0x0292 +#define mmMP1_SMN_C2PMSG_82_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_90 0x029a +#define mmMP1_SMN_C2PMSG_90_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_75 0x028b +#define mmMP1_SMN_C2PMSG_75_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_53 0x0275 +#define mmMP1_SMN_C2PMSG_53_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_54 0x0276 +#define mmMP1_SMN_C2PMSG_54_BASE_IDX 0 + +#define DEBUGSMC_MSG_Mode1Reset 2 + static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -1879,6 +1899,35 @@ static int smu_v13_0_0_set_df_cstate(struct smu_context *smu, NULL); } +static int smu_v13_0_0_mode1_reset(struct smu_context *smu) +{ + int ret; + struct amdgpu_device *adev = smu->adev; + + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) + ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset); + else + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); + + if (!ret) + msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); + + return ret; +} + +static void smu_v13_0_0_set_smu_mailbox_registers(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); + smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); + smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); + + smu->debug_param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_53); + smu->debug_msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_75); + smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_54); +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -1946,7 +1995,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .baco_enter = smu_v13_0_0_baco_enter, .baco_exit = smu_v13_0_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, - .mode1_reset = smu_v13_0_mode1_reset, + .mode1_reset = smu_v13_0_0_mode1_reset, .set_mp1_state = smu_v13_0_0_set_mp1_state, .set_df_cstate = smu_v13_0_0_set_df_cstate, }; @@ -1960,5 +2009,5 @@ void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) smu->table_map = smu_v13_0_0_table_map; smu->pwr_src_map = smu_v13_0_0_pwr_src_map; smu->workload_map = smu_v13_0_0_workload_map; - smu_v13_0_set_smu_mailbox_registers(smu); + smu_v13_0_0_set_smu_mailbox_registers(smu); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index e4f8f90ac5aa..768b6e7dbd77 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -233,6 +233,18 @@ static void __smu_cmn_send_msg(struct smu_context *smu, WREG32(smu->msg_reg, msg); } +static int __smu_cmn_send_debug_msg(struct smu_context *smu, + u32 msg, + u32 param) +{ + struct amdgpu_device *adev = smu->adev; + + WREG32(smu->debug_param_reg, param); + WREG32(smu->debug_msg_reg, msg); + WREG32(smu->debug_resp_reg, 0); + + return 0; +} /** * smu_cmn_send_msg_without_waiting -- send the message; don't wait for status * @smu: pointer to an SMU context @@ -386,6 +398,12 @@ int smu_cmn_send_smc_msg(struct smu_context *smu, read_arg); } +int smu_cmn_send_debug_smc_msg(struct smu_context *smu, + uint32_t msg) +{ + return __smu_cmn_send_debug_msg(smu, msg, 0); +} + int smu_cmn_to_asic_specific_index(struct smu_context *smu, enum smu_cmn2asic_mapping_type type, uint32_t index) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 1526ce09c399..f82cf76dd3a4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -42,6 +42,9 @@ int smu_cmn_send_smc_msg(struct smu_context *smu, enum smu_message_type msg, uint32_t *read_arg); +int smu_cmn_send_debug_smc_msg(struct smu_context *smu, + uint32_t msg); + int smu_cmn_wait_for_response(struct smu_context *smu); int smu_cmn_to_asic_specific_index(struct smu_context *smu, From 5e594dbf6e76cf0b75ab4cff2e5d42a3f63963a5 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 17 Nov 2022 20:34:15 +0800 Subject: [PATCH 253/543] drm/amd/pm: Enable bad memory page/channel recording support for smu v13_0_0 [ Upstream commit 48aa62f07467c8fcd4b4ec7851e13c83e89a1558 ] Send message to SMU to update bad memory page and bad channel info. Signed-off-by: Candice Li Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Stable-dep-of: 1794f6a9535b ("drm/amd/pm: enable GPO dynamic control support for SMU13.0.0") Signed-off-by: Sasha Levin --- .../pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h | 8 +++- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 4 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 39 +++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h index 9ebb8f39732a..8b8266890a10 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h @@ -131,7 +131,13 @@ #define PPSMC_MSG_EnableAudioStutterWA 0x44 #define PPSMC_MSG_PowerUpUmsch 0x45 #define PPSMC_MSG_PowerDownUmsch 0x46 -#define PPSMC_Message_Count 0x47 +#define PPSMC_MSG_SetDcsArch 0x47 +#define PPSMC_MSG_TriggerVFFLR 0x48 +#define PPSMC_MSG_SetNumBadMemoryPagesRetired 0x49 +#define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A +#define PPSMC_MSG_SetPriorityDeltaGain 0x4B +#define PPSMC_MSG_AllowIHHostInterrupt 0x4C +#define PPSMC_Message_Count 0x4D //Debug Dump Message #define DEBUGSMC_MSG_TestMessage 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 58098b82df66..a4e3425b1027 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -239,7 +239,9 @@ __SMU_DUMMY_MAP(DriverMode2Reset), \ __SMU_DUMMY_MAP(GetGfxOffStatus), \ __SMU_DUMMY_MAP(GetGfxOffEntryCount), \ - __SMU_DUMMY_MAP(LogGfxOffResidency), + __SMU_DUMMY_MAP(LogGfxOffResidency), \ + __SMU_DUMMY_MAP(SetNumBadMemoryPagesRetired), \ + __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 73bae7eaefa2..884d4176b412 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -141,6 +141,9 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), + MSG_MAP(SetNumBadMemoryPagesRetired, PPSMC_MSG_SetNumBadMemoryPagesRetired, 0), + MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel, + PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -1928,6 +1931,40 @@ static void smu_v13_0_0_set_smu_mailbox_registers(struct smu_context *smu) smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_54); } +static int smu_v13_0_0_smu_send_bad_mem_page_num(struct smu_context *smu, + uint32_t size) +{ + int ret = 0; + + /* message SMU to update the bad page number on SMUBUS */ + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetNumBadMemoryPagesRetired, + size, NULL); + if (ret) + dev_err(smu->adev->dev, + "[%s] failed to message SMU to update bad memory pages number\n", + __func__); + + return ret; +} + +static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu, + uint32_t size) +{ + int ret = 0; + + /* message SMU to update the bad channel info on SMUBUS */ + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, + size, NULL); + if (ret) + dev_err(smu->adev->dev, + "[%s] failed to message SMU to update bad memory pages channel info\n", + __func__); + + return ret; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -1998,6 +2035,8 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .mode1_reset = smu_v13_0_0_mode1_reset, .set_mp1_state = smu_v13_0_0_set_mp1_state, .set_df_cstate = smu_v13_0_0_set_df_cstate, + .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num, + .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) From d167ce6c47db12de69d2c2e7bdde9e608183d2a1 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 2 Dec 2022 13:56:35 +0800 Subject: [PATCH 254/543] drm/amd/pm: enable GPO dynamic control support for SMU13.0.0 [ Upstream commit 1794f6a9535bb5234c2b747d1bc6dad03249245a ] To better support UMD pstate profilings, the GPO feature needs to be switched on/off accordingly. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 3 ++- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 3 +++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 15 +++++++++++++++ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 ++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index a4e3425b1027..4180c71d930f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -241,7 +241,8 @@ __SMU_DUMMY_MAP(GetGfxOffEntryCount), \ __SMU_DUMMY_MAP(LogGfxOffResidency), \ __SMU_DUMMY_MAP(SetNumBadMemoryPagesRetired), \ - __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), + __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), \ + __SMU_DUMMY_MAP(AllowGpo), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index a9122b3b1532..e8c6febb8b64 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -273,6 +273,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu); int smu_v13_0_run_btc(struct smu_context *smu); +int smu_v13_0_gpo_control(struct smu_context *smu, + bool enablement); + int smu_v13_0_deep_sleep_control(struct smu_context *smu, bool enablement); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index cfb7f4475c82..9f9f64c5cdd8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2148,6 +2148,21 @@ int smu_v13_0_run_btc(struct smu_context *smu) return res; } +int smu_v13_0_gpo_control(struct smu_context *smu, + bool enablement) +{ + int res; + + res = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_AllowGpo, + enablement ? 1 : 0, + NULL); + if (res) + dev_err(smu->adev->dev, "SetGpoAllow %d failed!\n", enablement); + + return res; +} + int smu_v13_0_deep_sleep_control(struct smu_context *smu, bool enablement) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 884d4176b412..4c20d17e7416 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -144,6 +144,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetNumBadMemoryPagesRetired, PPSMC_MSG_SetNumBadMemoryPagesRetired, 0), MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel, PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, 0), + MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -2037,6 +2038,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .set_df_cstate = smu_v13_0_0_set_df_cstate, .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num, .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag, + .gpo_control = smu_v13_0_gpo_control, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) From d90de26bdc97a51a195116428fbb4776394f79a7 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 2 Dec 2022 14:03:45 +0800 Subject: [PATCH 255/543] drm/amd/pm: enable GPO dynamic control support for SMU13.0.7 [ Upstream commit 62b9f835a6c60171845642afec4ce4b44865f10f ] To better support UMD pstate profilings, the GPO feature needs to be switched on/off accordingly. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 31deec2ce4b3..eea06939e7da 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -123,6 +123,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), + MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), }; static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { @@ -1712,6 +1713,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .mode1_reset = smu_v13_0_mode1_reset, .set_mp1_state = smu_v13_0_7_set_mp1_state, .set_df_cstate = smu_v13_0_7_set_df_cstate, + .gpo_control = smu_v13_0_gpo_control, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) From 6ef4543f1f3b21b108021f3dec9bd02bf9f200ee Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 12:54:52 +0800 Subject: [PATCH 256/543] drm/amdgpu: add soc21 common ip block support for GC 11.0.4 [ Upstream commit 311d52367d0a7985ee1132662bad46f09169eed2 ] Add common soc21 ip block support for GC 11.0.4. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Stable-dep-of: e1d900df63ad ("drm/amdgpu: enable VCN DPG for GC IP v11.0.4") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc21.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index f1913d879811..26f1e4edb4d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -653,6 +653,12 @@ static int soc21_common_early_init(void *handle) } adev->external_rev_id = adev->rev_id + 0x20; break; + case IP_VERSION(11, 0, 4): + adev->cg_flags = 0; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x1; + break; + default: /* FIXME: not supported yet */ return -EINVAL; From e30be0ada6e2a55e8581aa55860afc6cf5183300 Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Tue, 8 Nov 2022 11:24:48 +0530 Subject: [PATCH 257/543] drm/amdgpu: Enable pg/cg flags on GC11_0_4 for VCN [ Upstream commit 2a0fe2ca6e9c9bf9c47a9f9f0d67c13281a13f8c ] This enable VCN PG, CG and JPEG PG, CG Signed-off-by: Saleemkhan Jamadar Reviewed-by: Leo Liu Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Stable-dep-of: e1d900df63ad ("drm/amdgpu: enable VCN DPG for GC IP v11.0.4") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc21.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 26f1e4edb4d5..599ddc28d8e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -654,8 +654,11 @@ static int soc21_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x20; break; case IP_VERSION(11, 0, 4): - adev->cg_flags = 0; - adev->pg_flags = 0; + adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x1; break; From 309278736f35f0937d6338d5986a6a4a48e90d2f Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Tue, 20 Dec 2022 13:21:44 +0530 Subject: [PATCH 258/543] drm/amdgpu: enable VCN DPG for GC IP v11.0.4 [ Upstream commit e1d900df63adcb748905131dd6258e570e11aed1 ] Enable VCN Dynamic Power Gating control for GC IP v11.0.4. Signed-off-by: Saleemkhan Jamadar Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0, 6.1 Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 599ddc28d8e1..909cf9f220c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -657,6 +657,7 @@ static int soc21_common_early_init(void *handle) adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x1; From 68a6f7dbf8a4fd4be9a99892ee3a5809c99778a7 Mon Sep 17 00:00:00 2001 From: Aaron Thompson Date: Fri, 6 Jan 2023 22:22:44 +0000 Subject: [PATCH 259/543] mm: Always release pages to the buddy allocator in memblock_free_late(). commit 115d9d77bb0f9152c60b6e8646369fa7f6167593 upstream. If CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, memblock_free_pages() only releases pages to the buddy allocator if they are not in the deferred range. This is correct for free pages (as defined by for_each_free_mem_pfn_range_in_zone()) because free pages in the deferred range will be initialized and released as part of the deferred init process. memblock_free_pages() is called by memblock_free_late(), which is used to free reserved ranges after memblock_free_all() has run. All pages in reserved ranges have been initialized at that point, and accordingly, those pages are not touched by the deferred init process. This means that currently, if the pages that memblock_free_late() intends to release are in the deferred range, they will never be released to the buddy allocator. They will forever be reserved. In addition, memblock_free_pages() calls kmsan_memblock_free_pages(), which is also correct for free pages but is not correct for reserved pages. KMSAN metadata for reserved pages is initialized by kmsan_init_shadow(), which runs shortly before memblock_free_all(). For both of these reasons, memblock_free_pages() should only be called for free pages, and memblock_free_late() should call __free_pages_core() directly instead. One case where this issue can occur in the wild is EFI boot on x86_64. The x86 EFI code reserves all EFI boot services memory ranges via memblock_reserve() and frees them later via memblock_free_late() (efi_reserve_boot_services() and efi_free_boot_services(), respectively). If any of those ranges happens to fall within the deferred init range, the pages will not be released and that memory will be unavailable. For example, on an Amazon EC2 t3.micro VM (1 GB) booting via EFI: v6.2-rc2: # grep -E 'Node|spanned|present|managed' /proc/zoneinfo Node 0, zone DMA spanned 4095 present 3999 managed 3840 Node 0, zone DMA32 spanned 246652 present 245868 managed 178867 v6.2-rc2 + patch: # grep -E 'Node|spanned|present|managed' /proc/zoneinfo Node 0, zone DMA spanned 4095 present 3999 managed 3840 Node 0, zone DMA32 spanned 246652 present 245868 managed 222816 # +43,949 pages Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set") Signed-off-by: Aaron Thompson Link: https://lore.kernel.org/r/01010185892de53e-e379acfb-7044-4b24-b30a-e2657c1ba989-000000@us-west-2.amazonses.com Signed-off-by: Mike Rapoport (IBM) Signed-off-by: Greg Kroah-Hartman --- mm/memblock.c | 8 +++++++- tools/testing/memblock/internal.h | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index 511d4783dcf1..fc3d8fbd2060 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1640,7 +1640,13 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size) end = PFN_DOWN(base + size); for (; cursor < end; cursor++) { - memblock_free_pages(pfn_to_page(cursor), cursor, 0); + /* + * Reserved pages are always initialized by the end of + * memblock_free_all() (by memmap_init() and, if deferred + * initialization is enabled, memmap_init_reserved_pages()), so + * these pages can be released directly to the buddy allocator. + */ + __free_pages_core(pfn_to_page(cursor), 0); totalram_pages_inc(); } } diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index fdb7f5db7308..85973e55489e 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -15,6 +15,10 @@ bool mirrored_kernelcore = false; struct page {}; +void __free_pages_core(struct page *page, unsigned int order) +{ +} + void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { From 61cbf790e7329ed78877560be7136f0b911bba7f Mon Sep 17 00:00:00 2001 From: Yunfei Wang Date: Wed, 11 Jan 2023 14:38:00 +0800 Subject: [PATCH 260/543] iommu/iova: Fix alloc iova overflows issue commit dcdb3ba7e2a8caae7bfefd603bc22fd0ce9a389c upstream. In __alloc_and_insert_iova_range, there is an issue that retry_pfn overflows. The value of iovad->anchor.pfn_hi is ~0UL, then when iovad->cached_node is iovad->anchor, curr_iova->pfn_hi + 1 will overflow. As a result, if the retry logic is executed, low_pfn is updated to 0, and then new_pfn < low_pfn returns false to make the allocation successful. This issue occurs in the following two situations: 1. The first iova size exceeds the domain size. When initializing iova domain, iovad->cached_node is assigned as iovad->anchor. For example, the iova domain size is 10M, start_pfn is 0x1_F000_0000, and the iova size allocated for the first time is 11M. The following is the log information, new->pfn_lo is smaller than iovad->cached_node. Example log as follows: [ 223.798112][T1705487] sh: [name:iova&]__alloc_and_insert_iova_range start_pfn:0x1f0000,retry_pfn:0x0,size:0xb00,limit_pfn:0x1f0a00 [ 223.799590][T1705487] sh: [name:iova&]__alloc_and_insert_iova_range success start_pfn:0x1f0000,new->pfn_lo:0x1efe00,new->pfn_hi:0x1f08ff 2. The node with the largest iova->pfn_lo value in the iova domain is deleted, iovad->cached_node will be updated to iovad->anchor, and then the alloc iova size exceeds the maximum iova size that can be allocated in the domain. After judging that retry_pfn is less than limit_pfn, call retry_pfn+1 to fix the overflow issue. Signed-off-by: jianjiao zeng Signed-off-by: Yunfei Wang Cc: # 5.15.* Fixes: 4e89dce72521 ("iommu/iova: Retry from last rb tree node if iova search fails") Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20230111063801.25107-1-yf.wang@mediatek.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iova.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index a44ad92fc5eb..fe452ce46642 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -197,7 +197,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, curr = __get_cached_rbnode(iovad, limit_pfn); curr_iova = to_iova(curr); - retry_pfn = curr_iova->pfn_hi + 1; + retry_pfn = curr_iova->pfn_hi; retry: do { @@ -211,7 +211,7 @@ retry: if (high_pfn < size || new_pfn < low_pfn) { if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) { high_pfn = limit_pfn; - low_pfn = retry_pfn; + low_pfn = retry_pfn + 1; curr = iova_find_limit(iovad, limit_pfn); curr_iova = to_iova(curr); goto retry; From ead3e6c79479890444c777fd329afc125fecde48 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Dec 2022 16:12:51 +0200 Subject: [PATCH 261/543] iommu/arm-smmu-v3: Don't unregister on shutdown commit 32ea2c57dc216b6ad8125fa680d31daa5d421c95 upstream. Similar to SMMUv2, this driver calls iommu_device_unregister() from the shutdown path, which removes the IOMMU groups with no coordination whatsoever with their users - shutdown methods are optional in device drivers. This can lead to NULL pointer dereferences in those drivers' DMA API calls, or worse. Instead of calling the full arm_smmu_device_remove() from arm_smmu_device_shutdown(), let's pick only the relevant function call - arm_smmu_device_disable() - more or less the reverse of arm_smmu_device_reset() - and call just that from the shutdown path. Fixes: 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") Suggested-by: Robin Murphy Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20221215141251.3688780-2-vladimir.oltean@nxp.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6d5df91c5c46..d4d8bfee9feb 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3854,7 +3854,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev) static void arm_smmu_device_shutdown(struct platform_device *pdev) { - arm_smmu_device_remove(pdev); + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); + + arm_smmu_device_disable(smmu); } static const struct of_device_id arm_smmu_of_match[] = { From abebd865a8964182fc9b170fd40391565126b305 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 19 Dec 2022 19:06:22 +0100 Subject: [PATCH 262/543] iommu/mediatek-v1: Fix an error handling path in mtk_iommu_v1_probe() commit 142e821f68cf5da79ce722cb9c1323afae30e185 upstream. A clk, prepared and enabled in mtk_iommu_v1_hw_init(), is not released in the error handling path of mtk_iommu_v1_probe(). Add the corresponding clk_disable_unprepare(), as already done in the remove function. Fixes: b17336c55d89 ("iommu/mediatek: add support for mtk iommu generation one HW") Signed-off-by: Christophe JAILLET Reviewed-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/593e7b7d97c6e064b29716b091a9d4fd122241fb.1671473163.git.christophe.jaillet@wanadoo.fr Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/mtk_iommu_v1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 6e0e65831eb7..a978220eb620 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -685,7 +685,7 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev) ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL, dev_name(&pdev->dev)); if (ret) - return ret; + goto out_clk_unprepare; ret = iommu_device_register(&data->iommu, &mtk_iommu_v1_ops, dev); if (ret) @@ -700,6 +700,8 @@ out_dev_unreg: iommu_device_unregister(&data->iommu); out_sysfs_remove: iommu_device_sysfs_remove(&data->iommu); +out_clk_unprepare: + clk_disable_unprepare(data->bclk); return ret; } From a1b9c7b1978aacf4b2f33e34bde1e2bb80b8497a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Dec 2022 16:12:50 +0200 Subject: [PATCH 263/543] iommu/arm-smmu: Don't unregister on shutdown commit ce31e6ca68bd7639bd3e5ef97be215031842bbab upstream. Michael Walle says he noticed the following stack trace while performing a shutdown with "reboot -f". He suggests he got "lucky" and just hit the correct spot for the reboot while there was a packet transmission in flight. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098 CPU: 0 PID: 23 Comm: kworker/0:1 Not tainted 6.1.0-rc5-00088-gf3600ff8e322 #1930 Hardware name: Kontron KBox A-230-LS (DT) pc : iommu_get_dma_domain+0x14/0x20 lr : iommu_dma_map_page+0x9c/0x254 Call trace: iommu_get_dma_domain+0x14/0x20 dma_map_page_attrs+0x1ec/0x250 enetc_start_xmit+0x14c/0x10b0 enetc_xmit+0x60/0xdc dev_hard_start_xmit+0xb8/0x210 sch_direct_xmit+0x11c/0x420 __dev_queue_xmit+0x354/0xb20 ip6_finish_output2+0x280/0x5b0 __ip6_finish_output+0x15c/0x270 ip6_output+0x78/0x15c NF_HOOK.constprop.0+0x50/0xd0 mld_sendpack+0x1bc/0x320 mld_ifc_work+0x1d8/0x4dc process_one_work+0x1e8/0x460 worker_thread+0x178/0x534 kthread+0xe0/0xe4 ret_from_fork+0x10/0x20 Code: d503201f f9416800 d503233f d50323bf (f9404c00) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt This appears to be reproducible when the board has a fixed IP address, is ping flooded from another host, and "reboot -f" is used. The following is one more manifestation of the issue: $ reboot -f kvm: exiting hardware virtualization cfg80211: failed to load regulatory.db arm-smmu 5000000.iommu: disabling translation sdhci-esdhc 2140000.mmc: Removing from iommu group 11 sdhci-esdhc 2150000.mmc: Removing from iommu group 12 fsl-edma 22c0000.dma-controller: Removing from iommu group 17 dwc3 3100000.usb: Removing from iommu group 9 dwc3 3110000.usb: Removing from iommu group 10 ahci-qoriq 3200000.sata: Removing from iommu group 2 fsl-qdma 8380000.dma-controller: Removing from iommu group 20 platform f080000.display: Removing from iommu group 0 etnaviv-gpu f0c0000.gpu: Removing from iommu group 1 etnaviv etnaviv: Removing from iommu group 1 caam_jr 8010000.jr: Removing from iommu group 13 caam_jr 8020000.jr: Removing from iommu group 14 caam_jr 8030000.jr: Removing from iommu group 15 caam_jr 8040000.jr: Removing from iommu group 16 fsl_enetc 0000:00:00.0: Removing from iommu group 4 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x80000002, GFSYNR0 0x00000002, GFSYNR1 0x00000429, GFSYNR2 0x00000000 fsl_enetc 0000:00:00.1: Removing from iommu group 5 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x80000002, GFSYNR0 0x00000002, GFSYNR1 0x00000429, GFSYNR2 0x00000000 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x80000002, GFSYNR0 0x00000000, GFSYNR1 0x00000429, GFSYNR2 0x00000000 fsl_enetc 0000:00:00.2: Removing from iommu group 6 fsl_enetc_mdio 0000:00:00.3: Removing from iommu group 8 mscc_felix 0000:00:00.5: Removing from iommu group 3 fsl_enetc 0000:00:00.6: Removing from iommu group 7 pcieport 0001:00:00.0: Removing from iommu group 18 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x00000002, GFSYNR0 0x00000000, GFSYNR1 0x00000429, GFSYNR2 0x00000000 pcieport 0002:00:00.0: Removing from iommu group 19 Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a8 pc : iommu_get_dma_domain+0x14/0x20 lr : iommu_dma_unmap_page+0x38/0xe0 Call trace: iommu_get_dma_domain+0x14/0x20 dma_unmap_page_attrs+0x38/0x1d0 enetc_unmap_tx_buff.isra.0+0x6c/0x80 enetc_poll+0x170/0x910 __napi_poll+0x40/0x1e0 net_rx_action+0x164/0x37c __do_softirq+0x128/0x368 run_ksoftirqd+0x68/0x90 smpboot_thread_fn+0x14c/0x190 Code: d503201f f9416800 d503233f d50323bf (f9405400) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- The problem seems to be that iommu_group_remove_device() is allowed to run with no coordination whatsoever with the shutdown procedure of the enetc PCI device. In fact, it almost seems as if it implies that the pci_driver :: shutdown() method is mandatory if DMA is used with an IOMMU, otherwise this is inevitable. That was never the case; shutdown methods are optional in device drivers. This is the call stack that leads to iommu_group_remove_device() during reboot: kernel_restart -> device_shutdown -> platform_shutdown -> arm_smmu_device_shutdown -> arm_smmu_device_remove -> iommu_device_unregister -> bus_for_each_dev -> remove_iommu_group -> iommu_release_device -> iommu_group_remove_device I don't know much about the arm_smmu driver, but arm_smmu_device_shutdown() invoking arm_smmu_device_remove() looks suspicious, since it causes the IOMMU device to unregister and that's where everything starts to unravel. It forces all other devices which depend on IOMMU groups to also point their ->shutdown() to ->remove(), which will make reboot slower overall. There are 2 moments relevant to this behavior. First was commit b06c076ea962 ("Revert "iommu/arm-smmu: Make arm-smmu explicitly non-modular"") when arm_smmu_device_shutdown() was made to run the exact same thing as arm_smmu_device_remove(). Prior to that, there was no iommu_device_unregister() call in arm_smmu_device_shutdown(). However, that was benign until commit 57365a04c921 ("iommu: Move bus setup to IOMMU device registration"), which made iommu_device_unregister() call remove_iommu_group(). Restore the old shutdown behavior by making remove() call shutdown(), but shutdown() does not call the remove() specific bits. Fixes: 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") Reported-by: Michael Walle Tested-by: Michael Walle # on kontron-sl28 Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20221215141251.3688780-1-vladimir.oltean@nxp.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 30dab1418e3f..b2cf0871a5c0 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -2188,19 +2188,16 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return 0; } -static int arm_smmu_device_remove(struct platform_device *pdev) +static void arm_smmu_device_shutdown(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); if (!smmu) - return -ENODEV; + return; if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_notice(&pdev->dev, "disabling translation\n"); - iommu_device_unregister(&smmu->iommu); - iommu_device_sysfs_remove(&smmu->iommu); - arm_smmu_rpm_get(smmu); /* Turn the thing off */ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD); @@ -2212,12 +2209,21 @@ static int arm_smmu_device_remove(struct platform_device *pdev) clk_bulk_disable(smmu->num_clks, smmu->clks); clk_bulk_unprepare(smmu->num_clks, smmu->clks); - return 0; } -static void arm_smmu_device_shutdown(struct platform_device *pdev) +static int arm_smmu_device_remove(struct platform_device *pdev) { - arm_smmu_device_remove(pdev); + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); + + if (!smmu) + return -ENODEV; + + iommu_device_unregister(&smmu->iommu); + iommu_device_sysfs_remove(&smmu->iommu); + + arm_smmu_device_shutdown(pdev); + + return 0; } static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) From bad1a2194485fa7c8d727c06d26c70676901db8a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Dec 2022 16:51:55 +0000 Subject: [PATCH 264/543] iommu/arm-smmu: Report IOMMU_CAP_CACHE_COHERENCY even betterer commit ac9c5e92dd15b9927e7355ccf79df76a58b44344 upstream. Although it's vanishingly unlikely that anyone would integrate an SMMU within a coherent interconnect without also making the pagetable walk interface coherent, the same effect happens if a coherent SMMU fails to advertise CTTW correctly. This turns out to be the case on some popular NXP SoCs, where VFIO started failing the IOMMU_CAP_CACHE_COHERENCY test, even though IOMMU_CACHE *was* previously achieving the desired effect anyway thanks to the underlying integration. While those SoCs stand to gain some more general benefits from a firmware update to override CTTW correctly in DT/ACPI, it's also easy to work around this in Linux as well, to avoid imposing too much on affected users - since the upstream client devices *are* correctly marked as coherent, we can trivially infer their coherent paths through the SMMU as well. Reported-by: Vladimir Oltean Fixes: df198b37e72c ("iommu/arm-smmu: Report IOMMU_CAP_CACHE_COHERENCY better") Signed-off-by: Robin Murphy Tested-by: Vladimir Oltean Link: https://lore.kernel.org/r/d6dc41952961e5c7b21acac08a8bf1eb0f69e124.1671123115.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index b2cf0871a5c0..f38b54a88767 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1319,8 +1319,14 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: - /* Assume that a coherent TCU implies coherent TBUs */ - return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK; + /* + * It's overwhelmingly the case in practice that when the pagetable + * walk interface is connected to a coherent interconnect, all the + * translation interfaces are too. Furthermore if the device is + * natively coherent, then its translation interface must also be. + */ + return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK || + device_get_dma_attr(dev) == DEV_DMA_COHERENT; case IOMMU_CAP_NOEXEC: return true; default: From 7b5cc7fd1789ea5dbb942c9f8207b076d365badc Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 30 Dec 2022 23:11:19 -0500 Subject: [PATCH 265/543] sched/core: Fix use-after-free bug in dup_user_cpus_ptr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 87ca4f9efbd7cc649ff43b87970888f2812945b8 upstream. Since commit 07ec77a1d4e8 ("sched: Allow task CPU affinity to be restricted on asymmetric systems"), the setting and clearing of user_cpus_ptr are done under pi_lock for arm64 architecture. However, dup_user_cpus_ptr() accesses user_cpus_ptr without any lock protection. Since sched_setaffinity() can be invoked from another process, the process being modified may be undergoing fork() at the same time. When racing with the clearing of user_cpus_ptr in __set_cpus_allowed_ptr_locked(), it can lead to user-after-free and possibly double-free in arm64 kernel. Commit 8f9ea86fdf99 ("sched: Always preserve the user requested cpumask") fixes this problem as user_cpus_ptr, once set, will never be cleared in a task's lifetime. However, this bug was re-introduced in commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") which allows the clearing of user_cpus_ptr in do_set_cpus_allowed(). This time, it will affect all arches. Fix this bug by always clearing the user_cpus_ptr of the newly cloned/forked task before the copying process starts and check the user_cpus_ptr state of the source task under pi_lock. Note to stable, this patch won't be applicable to stable releases. Just copy the new dup_user_cpus_ptr() function over. Fixes: 07ec77a1d4e8 ("sched: Allow task CPU affinity to be restricted on asymmetric systems") Fixes: 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") Reported-by: David Wang 王标 Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221231041120.440785-2-longman@redhat.com Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 535af9fbea7b..981e41cc4121 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2587,14 +2587,43 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) { - if (!src->user_cpus_ptr) + cpumask_t *user_mask; + unsigned long flags; + + /* + * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's + * may differ by now due to racing. + */ + dst->user_cpus_ptr = NULL; + + /* + * This check is racy and losing the race is a valid situation. + * It is not worth the extra overhead of taking the pi_lock on + * every fork/clone. + */ + if (data_race(!src->user_cpus_ptr)) return 0; - dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); - if (!dst->user_cpus_ptr) + user_mask = kmalloc_node(cpumask_size(), GFP_KERNEL, node); + if (!user_mask) return -ENOMEM; - cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + /* + * Use pi_lock to protect content of user_cpus_ptr + * + * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent + * do_set_cpus_allowed(). + */ + raw_spin_lock_irqsave(&src->pi_lock, flags); + if (src->user_cpus_ptr) { + swap(dst->user_cpus_ptr, user_mask); + cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + } + raw_spin_unlock_irqrestore(&src->pi_lock, flags); + + if (unlikely(user_mask)) + kfree(user_mask); + return 0; } From e88865876d47c790be0d5e23973499d75d034364 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Wed, 11 Jan 2023 11:57:39 +0000 Subject: [PATCH 266/543] netfilter: ipset: Fix overflow before widen in the bitmap_ip_create() function. commit 9ea4b476cea1b7d461d16dda25ca3c7e616e2d15 upstream. When first_ip is 0, last_ip is 0xFFFFFFFF, and netmask is 31, the value of an arithmetic expression 2 << (netmask - mask_bits - 1) is subject to overflow due to a failure casting operands to a larger data type before performing the arithmetic. Note that it's harmless since the value will be checked at the next step. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: b9fed748185a ("netfilter: ipset: Check and reject crazy /0 input parameters") Signed-off-by: Ilia.Gavrilov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipset/ip_set_bitmap_ip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index a8ce04a4bb72..e4fa00abde6a 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -308,8 +308,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_BITMAP_RANGE; pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask); - hosts = 2 << (32 - netmask - 1); - elements = 2 << (netmask - mask_bits - 1); + hosts = 2U << (32 - netmask - 1); + elements = 2UL << (netmask - mask_bits - 1); } if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; From 213b22abdf8f4892e86c07e65d8deb6c3e77c939 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Jan 2023 12:54:42 +0100 Subject: [PATCH 267/543] selftests: netfilter: fix transaction test script timeout handling commit c273289fac370b6488757236cd62cc2cf04830b7 upstream. The kselftest framework uses a default timeout of 45 seconds for all test scripts. Increase the timeout to two minutes for the netfilter tests, this should hopefully be enough, Make sure that, should the script be canceled, the net namespace and the spawned ping instances are removed. Fixes: 25d8bcedbf43 ("selftests: add script to stress-test nft packet path vs. control plane") Reported-by: Mirsad Goran Todorovac Signed-off-by: Florian Westphal Tested-by: Mirsad Goran Todorovac Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- .../selftests/netfilter/nft_trans_stress.sh | 16 +++++++++------- tools/testing/selftests/netfilter/settings | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 tools/testing/selftests/netfilter/settings diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh index a7f62ad4f661..2ffba45a78bf 100755 --- a/tools/testing/selftests/netfilter/nft_trans_stress.sh +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -10,12 +10,20 @@ ksft_skip=4 testns=testns-$(mktemp -u "XXXXXXXX") +tmp="" tables="foo bar baz quux" global_ret=0 eret=0 lret=0 +cleanup() { + ip netns pids "$testns" | xargs kill 2>/dev/null + ip netns del "$testns" + + rm -f "$tmp" +} + check_result() { local r=$1 @@ -43,6 +51,7 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +trap cleanup EXIT tmp=$(mktemp) for table in $tables; do @@ -139,11 +148,4 @@ done check_result $lret "add/delete with nftrace enabled" -pkill -9 ping - -wait - -rm -f "$tmp" -ip netns del "$testns" - exit $global_ret diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/netfilter/settings @@ -0,0 +1 @@ +timeout=120 From 424bcb570cb320d1d15238cd4c933522b90f78fa Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 6 Jan 2023 12:21:57 +0530 Subject: [PATCH 268/543] powerpc/imc-pmu: Fix use of mutex in IRQs disabled section commit 76d588dddc459fefa1da96e0a081a397c5c8e216 upstream. Current imc-pmu code triggers a WARNING with CONFIG_DEBUG_ATOMIC_SLEEP and CONFIG_PROVE_LOCKING enabled, while running a thread_imc event. Command to trigger the warning: # perf stat -e thread_imc/CPM_CS_FROM_L4_MEM_X_DPTEG/ sleep 5 Performance counter stats for 'sleep 5': 0 thread_imc/CPM_CS_FROM_L4_MEM_X_DPTEG/ 5.002117947 seconds time elapsed 0.000131000 seconds user 0.001063000 seconds sys Below is snippet of the warning in dmesg: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2869, name: perf-exec preempt_count: 2, expected: 0 4 locks held by perf-exec/2869: #0: c00000004325c540 (&sig->cred_guard_mutex){+.+.}-{3:3}, at: bprm_execve+0x64/0xa90 #1: c00000004325c5d8 (&sig->exec_update_lock){++++}-{3:3}, at: begin_new_exec+0x460/0xef0 #2: c0000003fa99d4e0 (&cpuctx_lock){-...}-{2:2}, at: perf_event_exec+0x290/0x510 #3: c000000017ab8418 (&ctx->lock){....}-{2:2}, at: perf_event_exec+0x29c/0x510 irq event stamp: 4806 hardirqs last enabled at (4805): [] _raw_spin_unlock_irqrestore+0x94/0xd0 hardirqs last disabled at (4806): [] perf_event_exec+0x394/0x510 softirqs last enabled at (0): [] copy_process+0xc34/0x1ff0 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 36 PID: 2869 Comm: perf-exec Not tainted 6.2.0-rc2-00011-g1247637727f2 #61 Hardware name: 8375-42A POWER9 0x4e1202 opal:v7.0-16-g9b85f7d961 PowerNV Call Trace: dump_stack_lvl+0x98/0xe0 (unreliable) __might_resched+0x2f8/0x310 __mutex_lock+0x6c/0x13f0 thread_imc_event_add+0xf4/0x1b0 event_sched_in+0xe0/0x210 merge_sched_in+0x1f0/0x600 visit_groups_merge.isra.92.constprop.166+0x2bc/0x6c0 ctx_flexible_sched_in+0xcc/0x140 ctx_sched_in+0x20c/0x2a0 ctx_resched+0x104/0x1c0 perf_event_exec+0x340/0x510 begin_new_exec+0x730/0xef0 load_elf_binary+0x3f8/0x1e10 ... do not call blocking ops when !TASK_RUNNING; state=2001 set at [<00000000fd63e7cf>] do_nanosleep+0x60/0x1a0 WARNING: CPU: 36 PID: 2869 at kernel/sched/core.c:9912 __might_sleep+0x9c/0xb0 CPU: 36 PID: 2869 Comm: sleep Tainted: G W 6.2.0-rc2-00011-g1247637727f2 #61 Hardware name: 8375-42A POWER9 0x4e1202 opal:v7.0-16-g9b85f7d961 PowerNV NIP: c000000000194a1c LR: c000000000194a18 CTR: c000000000a78670 REGS: c00000004d2134e0 TRAP: 0700 Tainted: G W (6.2.0-rc2-00011-g1247637727f2) MSR: 9000000000021033 CR: 48002824 XER: 00000000 CFAR: c00000000013fb64 IRQMASK: 1 The above warning triggered because the current imc-pmu code uses mutex lock in interrupt disabled sections. The function mutex_lock() internally calls __might_resched(), which will check if IRQs are disabled and in case IRQs are disabled, it will trigger the warning. Fix the issue by changing the mutex lock to spinlock. Fixes: 8f95faaac56c ("powerpc/powernv: Detect and create IMC device") Reported-by: Michael Petlan Reported-by: Peter Zijlstra Signed-off-by: Kajol Jain [mpe: Fix comments, trim oops in change log, add reported-by tags] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230106065157.182648-1-kjain@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/imc-pmu.h | 2 +- arch/powerpc/perf/imc-pmu.c | 136 ++++++++++++++--------------- 2 files changed, 67 insertions(+), 71 deletions(-) diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 4f897993b710..699a88584ae1 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -137,7 +137,7 @@ struct imc_pmu { * are inited. */ struct imc_pmu_ref { - struct mutex lock; + spinlock_t lock; unsigned int id; int refc; }; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index d517aba94d1b..100e97daf76b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Nest IMC data structures and variables */ @@ -21,7 +22,7 @@ * Used to avoid races in counting the nest-pmu units during hotplug * register and unregister */ -static DEFINE_MUTEX(nest_init_lock); +static DEFINE_SPINLOCK(nest_init_lock); static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); static struct imc_pmu **per_nest_pmu_arr; static cpumask_t nest_imc_cpumask; @@ -50,7 +51,7 @@ static int trace_imc_mem_size; * core and trace-imc */ static struct imc_pmu_ref imc_global_refc = { - .lock = __MUTEX_INITIALIZER(imc_global_refc.lock), + .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock), .id = 0, .refc = 0, }; @@ -400,7 +401,7 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) get_hard_smp_processor_id(cpu)); /* * If this is the last cpu in this chip then, skip the reference - * count mutex lock and make the reference count on this chip zero. + * count lock and make the reference count on this chip zero. */ ref = get_nest_pmu_ref(cpu); if (!ref) @@ -462,15 +463,15 @@ static void nest_imc_counters_release(struct perf_event *event) /* * See if we need to disable the nest PMU. * If no events are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing + * lock to ensure that we don't race with another task doing * enable or disable the nest counters. */ ref = get_nest_pmu_ref(event->cpu); if (!ref) return; - /* Take the mutex lock for this node and then decrement the reference count */ - mutex_lock(&ref->lock); + /* Take the lock for this node and then decrement the reference count */ + spin_lock(&ref->lock); if (ref->refc == 0) { /* * The scenario where this is true is, when perf session is @@ -482,7 +483,7 @@ static void nest_imc_counters_release(struct perf_event *event) * an OPAL call to disable the engine in that node. * */ - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return; } ref->refc--; @@ -490,7 +491,7 @@ static void nest_imc_counters_release(struct perf_event *event) rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id); return; } @@ -498,7 +499,7 @@ static void nest_imc_counters_release(struct perf_event *event) WARN(1, "nest-imc: Invalid event reference count\n"); ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); } static int nest_imc_event_init(struct perf_event *event) @@ -557,26 +558,25 @@ static int nest_imc_event_init(struct perf_event *event) /* * Get the imc_pmu_ref struct for this node. - * Take the mutex lock and then increment the count of nest pmu events - * inited. + * Take the lock and then increment the count of nest pmu events inited. */ ref = get_nest_pmu_ref(event->cpu); if (!ref) return -EINVAL; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("nest-imc: Unable to start the counters for node %d\n", node_id); return rc; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); event->destroy = nest_imc_counters_release; return 0; @@ -612,9 +612,8 @@ static int core_imc_mem_init(int cpu, int size) return -ENOMEM; mem_info->vbase = page_address(page); - /* Init the mutex */ core_imc_refc[core_id].id = core_id; - mutex_init(&core_imc_refc[core_id].lock); + spin_lock_init(&core_imc_refc[core_id].lock); rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE, __pa((void *)mem_info->vbase), @@ -703,9 +702,8 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu); } else { /* - * If this is the last cpu in this core then, skip taking refernce - * count mutex lock for this core and directly zero "refc" for - * this core. + * If this is the last cpu in this core then skip taking reference + * count lock for this core and directly zero "refc" for this core. */ opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(cpu)); @@ -720,11 +718,11 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) * last cpu in this core and core-imc event running * in this cpu. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_CORE) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); } return 0; } @@ -739,7 +737,7 @@ static int core_imc_pmu_cpumask_init(void) static void reset_global_refc(struct perf_event *event) { - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); imc_global_refc.refc--; /* @@ -751,7 +749,7 @@ static void reset_global_refc(struct perf_event *event) imc_global_refc.refc = 0; imc_global_refc.id = 0; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); } static void core_imc_counters_release(struct perf_event *event) @@ -764,17 +762,17 @@ static void core_imc_counters_release(struct perf_event *event) /* * See if we need to disable the IMC PMU. * If no events are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing + * lock to ensure that we don't race with another task doing * enable or disable the core counters. */ core_id = event->cpu / threads_per_core; - /* Take the mutex lock and decrement the refernce count for this core */ + /* Take the lock and decrement the refernce count for this core */ ref = &core_imc_refc[core_id]; if (!ref) return; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { /* * The scenario where this is true is, when perf session is @@ -786,7 +784,7 @@ static void core_imc_counters_release(struct perf_event *event) * an OPAL call to disable the engine in that core. * */ - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return; } ref->refc--; @@ -794,7 +792,7 @@ static void core_imc_counters_release(struct perf_event *event) rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("IMC: Unable to stop the counters for core %d\n", core_id); return; } @@ -802,7 +800,7 @@ static void core_imc_counters_release(struct perf_event *event) WARN(1, "core-imc: Invalid event reference count\n"); ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); reset_global_refc(event); } @@ -840,7 +838,6 @@ static int core_imc_event_init(struct perf_event *event) if ((!pcmi->vbase)) return -ENODEV; - /* Get the core_imc mutex for this core */ ref = &core_imc_refc[core_id]; if (!ref) return -EINVAL; @@ -848,22 +845,22 @@ static int core_imc_event_init(struct perf_event *event) /* * Core pmu units are enabled only when it is used. * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the core counters. + * If yes, take the lock and enable the core counters. * If not, just increment the count in core_imc_refc struct. */ - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("core-imc: Unable to start the counters for core %d\n", core_id); return rc; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); /* * Since the system can run either in accumulation or trace-mode @@ -874,7 +871,7 @@ static int core_imc_event_init(struct perf_event *event) * to know whether any other trace/thread imc * events are running. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) { /* * No other trace/thread imc events are running in @@ -883,10 +880,10 @@ static int core_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_CORE; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); event->destroy = core_imc_counters_release; @@ -958,10 +955,10 @@ static int ppc_thread_imc_cpu_offline(unsigned int cpu) mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); /* Reduce the refc if thread-imc event running on this cpu */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_THREAD) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return 0; } @@ -1001,7 +998,7 @@ static int thread_imc_event_init(struct perf_event *event) if (!target) return -EINVAL; - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); /* * Check if any other trace/core imc events are running in the * system, if not set the global id to thread-imc. @@ -1010,10 +1007,10 @@ static int thread_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_THREAD; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; @@ -1135,25 +1132,25 @@ static int thread_imc_event_add(struct perf_event *event, int flags) /* * imc pmus are enabled only when it is used. * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the counters. + * If yes, take the lock and enable the counters. * If not, just increment the count in ref count struct. */ ref = &core_imc_refc[core_id]; if (!ref) return -EINVAL; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("thread-imc: Unable to start the counter\ for core %d\n", core_id); return -EINVAL; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return 0; } @@ -1170,12 +1167,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags) return; } - mutex_lock(&ref->lock); + spin_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("thread-imc: Unable to stop the counters\ for core %d\n", core_id); return; @@ -1183,7 +1180,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags) } else if (ref->refc < 0) { ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); @@ -1224,9 +1221,8 @@ static int trace_imc_mem_alloc(int cpu_id, int size) } } - /* Init the mutex, if not already */ trace_imc_refc[core_id].id = core_id; - mutex_init(&trace_imc_refc[core_id].lock); + spin_lock_init(&trace_imc_refc[core_id].lock); mtspr(SPRN_LDBAR, 0); return 0; @@ -1246,10 +1242,10 @@ static int ppc_trace_imc_cpu_offline(unsigned int cpu) * Reduce the refc if any trace-imc event running * on this cpu. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_TRACE) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return 0; } @@ -1371,17 +1367,17 @@ static int trace_imc_event_add(struct perf_event *event, int flags) } mtspr(SPRN_LDBAR, ldbar_value); - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); return -EINVAL; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return 0; } @@ -1414,19 +1410,19 @@ static void trace_imc_event_del(struct perf_event *event, int flags) return; } - mutex_lock(&ref->lock); + spin_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id); return; } } else if (ref->refc < 0) { ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); trace_imc_event_stop(event, flags); } @@ -1448,7 +1444,7 @@ static int trace_imc_event_init(struct perf_event *event) * no other thread is running any core/thread imc * events */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) { /* * No core/thread imc events are running in the @@ -1457,10 +1453,10 @@ static int trace_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_TRACE; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->hw.idx = -1; @@ -1533,10 +1529,10 @@ static int init_nest_pmu_ref(void) i = 0; for_each_node(nid) { /* - * Mutex lock to avoid races while tracking the number of + * Take the lock to avoid races while tracking the number of * sessions using the chip's nest pmu units. */ - mutex_init(&nest_imc_refc[i].lock); + spin_lock_init(&nest_imc_refc[i].lock); /* * Loop to init the "id" with the node_id. Variable "i" initialized to @@ -1633,7 +1629,7 @@ static void imc_common_mem_free(struct imc_pmu *pmu_ptr) static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) { if (pmu_ptr->domain == IMC_DOMAIN_NEST) { - mutex_lock(&nest_init_lock); + spin_lock(&nest_init_lock); if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); @@ -1643,7 +1639,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus > 0) nest_pmus--; - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); } /* Free core_imc memory */ @@ -1800,11 +1796,11 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id * rest. To handle the cpuhotplug callback unregister, we track * the number of nest pmus in "nest_pmus". */ - mutex_lock(&nest_init_lock); + spin_lock(&nest_init_lock); if (nest_pmus == 0) { ret = init_nest_pmu_ref(); if (ret) { - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); kfree(per_nest_pmu_arr); per_nest_pmu_arr = NULL; goto err_free_mem; @@ -1812,7 +1808,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id /* Register for cpu hotplug notification. */ ret = nest_pmu_cpumask_init(); if (ret) { - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); kfree(nest_imc_refc); kfree(per_nest_pmu_arr); per_nest_pmu_arr = NULL; @@ -1820,7 +1816,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id } } nest_pmus++; - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); break; case IMC_DOMAIN_CORE: ret = core_imc_pmu_cpumask_init(); From b43d52eeca7d86e09ad683b570eb791d31625a44 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Jan 2023 12:15:40 +0100 Subject: [PATCH 269/543] x86/boot: Avoid using Intel mnemonics in AT&T syntax asm commit 7c6dd961d0c8e7e8f9fdc65071fb09ece702e18d upstream. With 'GNU assembler (GNU Binutils for Debian) 2.39.90.20221231' the build now reports: arch/x86/realmode/rm/../../boot/bioscall.S: Assembler messages: arch/x86/realmode/rm/../../boot/bioscall.S:35: Warning: found `movsd'; assuming `movsl' was meant arch/x86/realmode/rm/../../boot/bioscall.S:70: Warning: found `movsd'; assuming `movsl' was meant arch/x86/boot/bioscall.S: Assembler messages: arch/x86/boot/bioscall.S:35: Warning: found `movsd'; assuming `movsl' was meant arch/x86/boot/bioscall.S:70: Warning: found `movsd'; assuming `movsl' was meant Which is due to: PR gas/29525 Note that with the dropped CMPSD and MOVSD Intel Syntax string insn templates taking operands, mixed IsString/non-IsString template groups (with memory operands) cannot occur anymore. With that maybe_adjust_templates() becomes unnecessary (and is hence being removed). More details: https://sourceware.org/bugzilla/show_bug.cgi?id=29525 Borislav Petkov further explains: " the particular problem here is is that the 'd' suffix is "conflicting" in the sense that you can have SSE mnemonics like movsD %xmm... and the same thing also for string ops (which is the case here) so apparently the agreement in binutils land is to use the always accepted suffixes 'l' or 'q' and phase out 'd' slowly... " Fixes: 7a734e7dd93b ("x86, setup: "glove box" BIOS calls -- infrastructure") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/Y71I3Ex2pvIxMpsP@hirez.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/bioscall.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S index 5521ea12f44e..aa9b96457584 100644 --- a/arch/x86/boot/bioscall.S +++ b/arch/x86/boot/bioscall.S @@ -32,7 +32,7 @@ intcall: movw %dx, %si movw %sp, %di movw $11, %cx - rep; movsd + rep; movsl /* Pop full state from the stack */ popal @@ -67,7 +67,7 @@ intcall: jz 4f movw %sp, %si movw $11, %cx - rep; movsd + rep; movsl 4: addw $44, %sp /* Restore state and return */ From 1dc8c5574cc2491016691b86a70fcfa4d05279ce Mon Sep 17 00:00:00 2001 From: Eliav Farber Date: Thu, 20 Oct 2022 12:44:58 +0000 Subject: [PATCH 270/543] EDAC/device: Fix period calculation in edac_device_reset_delay_period() commit e84077437902ec99eba0a6b516df772653f142c7 upstream. Fix period calculation in case user sets a value of 1000. The input of round_jiffies_relative() should be in jiffies and not in milli-seconds. [ bp: Use the same code pattern as in edac_device_workq_setup() for clarity. ] Fixes: c4cf3b454eca ("EDAC: Rework workqueue handling") Signed-off-by: Eliav Farber Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20221020124458.22153-1-farbere@amazon.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_device.c | 17 ++++++++--------- drivers/edac/edac_module.h | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 19522c568aa5..878deb4880cd 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -394,17 +394,16 @@ static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) * Then restart the workq on the new delay */ void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, - unsigned long value) + unsigned long msec) { - unsigned long jiffs = msecs_to_jiffies(value); + edac_dev->poll_msec = msec; + edac_dev->delay = msecs_to_jiffies(msec); - if (value == 1000) - jiffs = round_jiffies_relative(value); - - edac_dev->poll_msec = value; - edac_dev->delay = jiffs; - - edac_mod_work(&edac_dev->work, jiffs); + /* See comment in edac_device_workq_setup() above */ + if (edac_dev->poll_msec == 1000) + edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); + else + edac_mod_work(&edac_dev->work, edac_dev->delay); } int edac_device_alloc_index(void) diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index 50ed9f2425bb..4ed24d664d83 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -52,7 +52,7 @@ bool edac_stop_work(struct delayed_work *work); bool edac_mod_work(struct delayed_work *work, unsigned long delay); extern void edac_device_reset_delay_period(struct edac_device_ctl_info - *edac_dev, unsigned long value); + *edac_dev, unsigned long msec); extern void edac_mc_reset_delay_period(unsigned long value); /* From c1c59538337ab6d45700cb4a1c9725e67f59bc6e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 10 Jan 2023 07:54:27 +0100 Subject: [PATCH 271/543] x86/pat: Fix pat_x_mtrr_type() for MTRR disabled case commit 90b926e68f500844dff16b5bcea178dc55cf580a upstream. Since 72cbc8f04fe2 ("x86/PAT: Have pat_enabled() properly reflect state when running on Xen") PAT can be enabled without MTRR. This has resulted in problems e.g. for a SEV-SNP guest running under Hyper-V, when trying to establish a new mapping via memremap() with WB caching mode, as pat_x_mtrr_type() will call mtrr_type_lookup(), which in turn is returning MTRR_TYPE_INVALID due to MTRR being disabled in this configuration. The result is a mapping with UC- caching, leading to severe performance degradation. Fix that by handling MTRR_TYPE_INVALID the same way as MTRR_TYPE_WRBACK in pat_x_mtrr_type() because MTRR_TYPE_INVALID means MTRRs are disabled. [ bp: Massage commit message. ] Fixes: 72cbc8f04fe2 ("x86/PAT: Have pat_enabled() properly reflect state when running on Xen") Reported-by: Michael Kelley (LINUX) Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Michael Kelley Tested-by: Michael Kelley Cc: Link: https://lore.kernel.org/r/20230110065427.20767-1-jgross@suse.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pat/memtype.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 66a209f7eb86..2642bc4c8ec0 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -434,7 +434,8 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, u8 mtrr_type, uniform; mtrr_type = mtrr_type_lookup(start, end, &uniform); - if (mtrr_type != MTRR_TYPE_WRBACK) + if (mtrr_type != MTRR_TYPE_WRBACK && + mtrr_type != MTRR_TYPE_INVALID) return _PAGE_CACHE_MODE_UC_MINUS; return _PAGE_CACHE_MODE_WB; From d01c6557478bfe10eeef280abae700def7de3ffd Mon Sep 17 00:00:00 2001 From: Peter Newman Date: Tue, 20 Dec 2022 17:11:23 +0100 Subject: [PATCH 272/543] x86/resctrl: Fix task CLOSID/RMID update race commit fe1f0714385fbcf76b0cbceb02b7277d842014fc upstream. When the user moves a running task to a new rdtgroup using the task's file interface or by deleting its rdtgroup, the resulting change in CLOSID/RMID must be immediately propagated to the PQR_ASSOC MSR on the task(s) CPUs. x86 allows reordering loads with prior stores, so if the task starts running between a task_curr() check that the CPU hoisted before the stores in the CLOSID/RMID update then it can start running with the old CLOSID/RMID until it is switched again because __rdtgroup_move_task() failed to determine that it needs to be interrupted to obtain the new CLOSID/RMID. Refer to the diagram below: CPU 0 CPU 1 ----- ----- __rdtgroup_move_task(): curr <- t1->cpu->rq->curr __schedule(): rq->curr <- t1 resctrl_sched_in(): t1->{closid,rmid} -> {1,1} t1->{closid,rmid} <- {2,2} if (curr == t1) // false IPI(t1->cpu) A similar race impacts rdt_move_group_tasks(), which updates tasks in a deleted rdtgroup. In both cases, use smp_mb() to order the task_struct::{closid,rmid} stores before the loads in task_curr(). In particular, in the rdt_move_group_tasks() case, simply execute an smp_mb() on every iteration with a matching task. It is possible to use a single smp_mb() in rdt_move_group_tasks(), but this would require two passes and a means of remembering which task_structs were updated in the first loop. However, benchmarking results below showed too little performance impact in the simple approach to justify implementing the two-pass approach. Times below were collected using `perf stat` to measure the time to remove a group containing a 1600-task, parallel workload. CPU: Intel(R) Xeon(R) Platinum P-8136 CPU @ 2.00GHz (112 threads) # mkdir /sys/fs/resctrl/test # echo $$ > /sys/fs/resctrl/test/tasks # perf bench sched messaging -g 40 -l 100000 task-clock time ranges collected using: # perf stat rmdir /sys/fs/resctrl/test Baseline: 1.54 - 1.60 ms smp_mb() every matching task: 1.57 - 1.67 ms [ bp: Massage commit message. ] Fixes: ae28d1aae48a ("x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR") Fixes: 0efc89be9471 ("x86/intel_rdt: Update task closid immediately on CPU in rmdir and unmount") Signed-off-by: Peter Newman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Cc: Link: https://lore.kernel.org/r/20221220161123.432120-1-peternewman@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e5a48f05e787..5993da21d822 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -580,8 +580,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk, /* * Ensure the task's closid and rmid are written before determining if * the task is current that will decide if it will be interrupted. + * This pairs with the full barrier between the rq->curr update and + * resctrl_sched_in() during context switch. */ - barrier(); + smp_mb(); /* * By now, the task's closid and rmid are set. If the task is current @@ -2401,6 +2403,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, WRITE_ONCE(t->closid, to->closid); WRITE_ONCE(t->rmid, to->mon.rmid); + /* + * Order the closid/rmid stores above before the loads + * in task_curr(). This pairs with the full barrier + * between the rq->curr update and resctrl_sched_in() + * during context switch. + */ + smp_mb(); + /* * If the task is on a CPU, set the CPU in the mask. * The detection is inaccurate as tasks might move or From 07ac5db2d840e47428c871097809dc70e68f66c3 Mon Sep 17 00:00:00 2001 From: Peter Newman Date: Tue, 20 Dec 2022 17:41:31 +0100 Subject: [PATCH 273/543] x86/resctrl: Fix event counts regression in reused RMIDs commit 2a81160d29d65b5876ab3f824fda99ae0219f05e upstream. When creating a new monitoring group, the RMID allocated for it may have been used by a group which was previously removed. In this case, the hardware counters will have non-zero values which should be deducted from what is reported in the new group's counts. resctrl_arch_reset_rmid() initializes the prev_msr value for counters to 0, causing the initial count to be charged to the new group. Resurrect __rmid_read() and use it to initialize prev_msr correctly. Unlike before, __rmid_read() checks for error bits in the MSR read so that callers don't need to. Fixes: 1d81d15db39c ("x86/resctrl: Move mbm_overflow_count() into resctrl_arch_rmid_read()") Signed-off-by: Peter Newman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221220164132.443083-1-peternewman@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/resctrl/monitor.c | 49 ++++++++++++++++++--------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index efe0c30d3a12..77538abeb72a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid) return entry; } +static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) +{ + u64 msr_val; + + /* + * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured + * with a valid event code for supported resource type and the bits + * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, + * IA32_QM_CTR.data (bits 61:0) reports the monitored data. + * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) + * are error bits. + */ + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + rdmsrl(MSR_IA32_QM_CTR, msr_val); + + if (msr_val & RMID_VAL_ERROR) + return -EIO; + if (msr_val & RMID_VAL_UNAVAIL) + return -EINVAL; + + *val = msr_val; + return 0; +} + static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, u32 rmid, enum resctrl_event_id eventid) @@ -172,8 +196,12 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, struct arch_mbm_state *am; am = get_arch_mbm_state(hw_dom, rmid, eventid); - if (am) + if (am) { memset(am, 0, sizeof(*am)); + + /* Record any initial, non-zero count value. */ + __rmid_read(rmid, eventid, &am->prev_msr); + } } static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) @@ -191,25 +219,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct arch_mbm_state *am; u64 msr_val, chunks; + int ret; if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) return -EINVAL; - /* - * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured - * with a valid event code for supported resource type and the bits - * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, - * IA32_QM_CTR.data (bits 61:0) reports the monitored data. - * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) - * are error bits. - */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); - rdmsrl(MSR_IA32_QM_CTR, msr_val); - - if (msr_val & RMID_VAL_ERROR) - return -EIO; - if (msr_val & RMID_VAL_UNAVAIL) - return -EINVAL; + ret = __rmid_read(rmid, eventid, &msr_val); + if (ret) + return ret; am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) { From ad1336274f733a7cb1f87b5c5908165a2c14df53 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Sun, 27 Nov 2022 22:06:02 +0100 Subject: [PATCH 274/543] regulator: da9211: Use irq handler when ready [ Upstream commit 02228f6aa6a64d588bc31e3267d05ff184d772eb ] If the system does not come from reset (like when it is kexec()), the regulator might have an IRQ waiting for us. If we enable the IRQ handler before its structures are ready, we crash. This patch fixes: [ 1.141839] Unable to handle kernel read from unreadable memory at virtual address 0000000000000078 [ 1.316096] Call trace: [ 1.316101] blocking_notifier_call_chain+0x20/0xa8 [ 1.322757] cpu cpu0: dummy supplies not allowed for exclusive requests [ 1.327823] regulator_notifier_call_chain+0x1c/0x2c [ 1.327825] da9211_irq_handler+0x68/0xf8 [ 1.327829] irq_thread+0x11c/0x234 [ 1.327833] kthread+0x13c/0x154 Signed-off-by: Ricardo Ribalda Reviewed-by: Adam Ward Link: https://lore.kernel.org/r/20221124-da9211-v2-0-1779e3c5d491@chromium.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/da9211-regulator.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index e01b32d1fa17..00828f5baa97 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -498,6 +498,12 @@ static int da9211_i2c_probe(struct i2c_client *i2c) chip->chip_irq = i2c->irq; + ret = da9211_regulator_init(chip); + if (ret < 0) { + dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); + return ret; + } + if (chip->chip_irq != 0) { ret = devm_request_threaded_irq(chip->dev, chip->chip_irq, NULL, da9211_irq_handler, @@ -512,11 +518,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c) dev_warn(chip->dev, "No IRQ configured\n"); } - ret = da9211_regulator_init(chip); - - if (ret < 0) - dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); - return ret; } From 87c71e88f6a6619ffb1ff88f84dff48ef6d57adb Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 4 Dec 2022 11:52:44 -0800 Subject: [PATCH 275/543] scsi: storvsc: Fix swiotlb bounce buffer leak in confidential VM [ Upstream commit 67ff3d0a49f3d445c3922e30a54e03c161da561e ] storvsc_queuecommand() maps the scatter/gather list using scsi_dma_map(), which in a confidential VM allocates swiotlb bounce buffers. If the I/O submission fails in storvsc_do_io(), the I/O is typically retried by higher level code, but the bounce buffer memory is never freed. The mostly like cause of I/O submission failure is a full VMBus channel ring buffer, which is not uncommon under high I/O loads. Eventually enough bounce buffer memory leaks that the confidential VM can't do any I/O. The same problem can arise in a non-confidential VM with kernel boot parameter swiotlb=force. Fix this by doing scsi_dma_unmap() in the case of an I/O submission error, which frees the bounce buffer memory. Fixes: 743b237c3a7b ("scsi: storvsc: Add Isolation VM support for storvsc driver") Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1670183564-76254-1-git-send-email-mikelley@microsoft.com Tested-by: Dexuan Cui Reviewed-by: Dexuan Cui Reviewed-by: Tianyu Lan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/storvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 3c5b7e4227b2..55d6fb452680 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1823,6 +1823,9 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) ret = storvsc_do_io(dev, cmd_request, get_cpu()); put_cpu(); + if (ret) + scsi_dma_unmap(scmnd); + if (ret == -EAGAIN) { /* no more space */ ret = SCSI_MLQUEUE_DEVICE_BUSY; From 9bfa3a61503743c91081004141953da90d2eaca0 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Wed, 7 Dec 2022 11:36:59 +0900 Subject: [PATCH 276/543] scsi: mpi3mr: Refer CONFIG_SCSI_MPI3MR in Makefile [ Upstream commit f0a43ba6c66cc0688e2748d986a1459fdd3442ef ] When Kconfig item CONFIG_SCSI_MPI3MR was introduced for mpi3mr driver, the Makefile of the driver was not modified to refer the Kconfig item. As a result, mpi3mr.ko is built regardless of the Kconfig item value y or m. Also, if 'make localmodconfig' can not find the Kconfig item in the Makefile, then it does not generate CONFIG_SCSI_MPI3MR=m even when mpi3mr.ko is loaded on the system. Refer to the Kconfig item to avoid the issues. Fixes: c4f7ac64616e ("scsi: mpi3mr: Add mpi30 Rev-R headers and Kconfig") Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20221207023659.2411785-1-shinichiro.kawasaki@wdc.com Reviewed-by: Damien Le Moal Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpi3mr/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/Makefile b/drivers/scsi/mpi3mr/Makefile index ef86ca46646b..3bf8cf34e1c3 100644 --- a/drivers/scsi/mpi3mr/Makefile +++ b/drivers/scsi/mpi3mr/Makefile @@ -1,5 +1,5 @@ # mpi3mr makefile -obj-m += mpi3mr.o +obj-$(CONFIG_SCSI_MPI3MR) += mpi3mr.o mpi3mr-y += mpi3mr_os.o \ mpi3mr_fw.o \ mpi3mr_app.o \ From bf5838132d1eeff6bf09d604bedf98778ceac1cf Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Thu, 8 Dec 2022 15:25:20 +0800 Subject: [PATCH 277/543] scsi: ufs: core: WLUN suspend SSU/enter hibern8 fail recovery [ Upstream commit 1a5665fc8d7a000671ebd3fe69c6f9acf1e0dcd9 ] When SSU/enter hibern8 fail in WLUN suspend flow, trigger the error handler and return busy to break the suspend. Otherwise the consumer will get stuck in runtime suspend status. Fixes: b294ff3e3449 ("scsi: ufs: core: Enable power management for wlun") Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20221208072520.26210-1-peter.wang@mediatek.com Reviewed-by: Stanley Chu Reviewed-by: Bart Van Assche Reviewed-by: Adrian Hunter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index d1db6be80156..b048357d21e3 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6094,6 +6094,14 @@ void ufshcd_schedule_eh_work(struct ufs_hba *hba) } } +static void ufshcd_force_error_recovery(struct ufs_hba *hba) +{ + spin_lock_irq(hba->host->host_lock); + hba->force_reset = true; + ufshcd_schedule_eh_work(hba); + spin_unlock_irq(hba->host->host_lock); +} + static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow) { down_write(&hba->clk_scaling_lock); @@ -9066,6 +9074,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (!hba->dev_info.b_rpm_dev_flush_capable) { ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode); + if (ret && pm_op != UFS_SHUTDOWN_PM) { + /* + * If return err in suspend flow, IO will hang. + * Trigger error handler and break suspend for + * error recovery. + */ + ufshcd_force_error_recovery(hba); + ret = -EBUSY; + } if (ret) goto enable_scaling; } @@ -9077,6 +9094,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) */ check_for_bkops = !ufshcd_is_ufs_dev_deepsleep(hba); ret = ufshcd_link_state_transition(hba, req_link_state, check_for_bkops); + if (ret && pm_op != UFS_SHUTDOWN_PM) { + /* + * If return err in suspend flow, IO will hang. + * Trigger error handler and break suspend for + * error recovery. + */ + ufshcd_force_error_recovery(hba); + ret = -EBUSY; + } if (ret) goto set_dev_active; From c8eb7ac95a1eea0d467bcf8f3443be22bee6b9a0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Dec 2022 14:25:48 +0100 Subject: [PATCH 278/543] ASoC: Intel: fix sof-nau8825 link failure [ Upstream commit 63f3d99b7efe4c5404a9388c05780917099cecf4 ] The snd-soc-sof_nau8825.ko module fails to link unless the sof_realtek_common support is also enabled: ERROR: modpost: "sof_rt1015p_codec_conf" [sound/soc/intel/boards/snd-soc-sof_nau8825.ko] undefined! ERROR: modpost: "sof_rt1015p_dai_link" [sound/soc/intel/boards/snd-soc-sof_nau8825.ko] undefined! Fixes: 8d0872f6239f ("ASoC: Intel: add sof-nau8825 machine driver") Signed-off-by: Arnd Bergmann Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20221221132559.2402341-1-arnd@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index aa12d7e3dd2f..ca49cc49c378 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -558,6 +558,7 @@ config SND_SOC_INTEL_SOF_NAU8825_MACH select SND_SOC_HDAC_HDMI select SND_SOC_INTEL_HDA_DSP_COMMON select SND_SOC_INTEL_SOF_MAXIM_COMMON + select SND_SOC_INTEL_SOF_REALTEK_COMMON help This adds support for ASoC machine driver for SOF platforms with nau8825 codec. From bc9b113d3c88689d67cf5fe47b473f96786d8b47 Mon Sep 17 00:00:00 2001 From: Brent Lu Date: Thu, 17 Nov 2022 17:19:19 -0600 Subject: [PATCH 279/543] ASoC: Intel: sof_nau8825: support rt1015p speaker amplifier [ Upstream commit 13c459fa37c9f26e9bf884a832dd67598b5c4d3e ] Add rt1015p speaker amplifier support with a new board info 'adl_rt1015p_nau8825' which supports NAU8825 on SSP0 and ALC1015Q on SSP1. Reviewed-by: Bard Liao Signed-off-by: Brent Lu Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20221117231919.112483-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Stable-dep-of: 3e78986a840d ("ASoC: Intel: sof-nau8825: fix module alias overflow") Signed-off-by: Sasha Levin --- sound/soc/intel/boards/sof_nau8825.c | 16 ++++++++++++++++ .../soc/intel/common/soc-acpi-intel-adl-match.c | 12 ++++++++++++ 2 files changed, 28 insertions(+) diff --git a/sound/soc/intel/boards/sof_nau8825.c b/sound/soc/intel/boards/sof_nau8825.c index 5585c217f78d..27880224359d 100644 --- a/sound/soc/intel/boards/sof_nau8825.c +++ b/sound/soc/intel/boards/sof_nau8825.c @@ -47,6 +47,7 @@ #define SOF_RT1019P_SPEAKER_AMP_PRESENT BIT(14) #define SOF_MAX98373_SPEAKER_AMP_PRESENT BIT(15) #define SOF_MAX98360A_SPEAKER_AMP_PRESENT BIT(16) +#define SOF_RT1015P_SPEAKER_AMP_PRESENT BIT(17) static unsigned long sof_nau8825_quirk = SOF_NAU8825_SSP_CODEC(0); @@ -483,6 +484,8 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev, } else if (sof_nau8825_quirk & SOF_MAX98360A_SPEAKER_AMP_PRESENT) { max_98360a_dai_link(&links[id]); + } else if (sof_nau8825_quirk & SOF_RT1015P_SPEAKER_AMP_PRESENT) { + sof_rt1015p_dai_link(&links[id]); } else { goto devm_err; } @@ -576,6 +579,8 @@ static int sof_audio_probe(struct platform_device *pdev) if (sof_nau8825_quirk & SOF_MAX98373_SPEAKER_AMP_PRESENT) max_98373_set_codec_conf(&sof_audio_card_nau8825); + else if (sof_nau8825_quirk & SOF_RT1015P_SPEAKER_AMP_PRESENT) + sof_rt1015p_codec_conf(&sof_audio_card_nau8825); if (sof_nau8825_quirk & SOF_SSP_BT_OFFLOAD_PRESENT) sof_audio_card_nau8825.num_links++; @@ -642,6 +647,16 @@ static const struct platform_device_id board_ids[] = { SOF_SSP_BT_OFFLOAD_PRESENT), }, + { + .name = "adl_rt1015p_nau8825", + .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | + SOF_SPEAKER_AMP_PRESENT | + SOF_RT1015P_SPEAKER_AMP_PRESENT | + SOF_NAU8825_SSP_AMP(1) | + SOF_NAU8825_NUM_HDMIDEV(4) | + SOF_BT_OFFLOAD_SSP(2) | + SOF_SSP_BT_OFFLOAD_PRESENT), + }, { } }; MODULE_DEVICE_TABLE(platform, board_ids); @@ -663,3 +678,4 @@ MODULE_AUTHOR("Mac Chiang "); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS(SND_SOC_INTEL_HDA_DSP_COMMON); MODULE_IMPORT_NS(SND_SOC_INTEL_SOF_MAXIM_COMMON); +MODULE_IMPORT_NS(SND_SOC_INTEL_SOF_REALTEK_COMMON); diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index 9990d5502d26..ce4d8ec86f2c 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -430,6 +430,11 @@ static const struct snd_soc_acpi_codecs adl_rt5682_rt5682s_hp = { .codecs = {"10EC5682", "RTL5682"}, }; +static const struct snd_soc_acpi_codecs adl_rt1015p_amp = { + .num_codecs = 1, + .codecs = {"RTL1015"} +}; + static const struct snd_soc_acpi_codecs adl_rt1019p_amp = { .num_codecs = 1, .codecs = {"RTL1019"} @@ -495,6 +500,13 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { .quirk_data = &adl_rt1019p_amp, .sof_tplg_filename = "sof-adl-rt1019-rt5682.tplg", }, + { + .id = "10508825", + .drv_name = "adl_rt1015p_nau8825", + .machine_quirk = snd_soc_acpi_codec_list, + .quirk_data = &adl_rt1015p_amp, + .sof_tplg_filename = "sof-adl-rt1015-nau8825.tplg", + }, { .id = "10508825", .drv_name = "sof_nau8825", From fba1b23befd88366fe646787b3797e64d7338fd2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Dec 2022 14:24:56 +0100 Subject: [PATCH 280/543] ASoC: Intel: sof-nau8825: fix module alias overflow [ Upstream commit 3e78986a840d59dd27e636eae3f52dc11125c835 ] The maximum name length for a platform_device_id entry is 20 characters including the trailing NUL byte. The sof_nau8825.c file exceeds that, which causes an obscure error message: sound/soc/intel/boards/snd-soc-sof_nau8825.mod.c:35:45: error: illegal character encoding in string literal [-Werror,-Winvalid-source-encoding] MODULE_ALIAS("platform:adl_max98373_nau8825"); ^~~~ include/linux/module.h:168:49: note: expanded from macro 'MODULE_ALIAS' ^~~~~~ include/linux/module.h:165:56: note: expanded from macro 'MODULE_INFO' ^~~~ include/linux/moduleparam.h:26:47: note: expanded from macro '__MODULE_INFO' = __MODULE_INFO_PREFIX __stringify(tag) "=" info I could not figure out how to make the module handling robust enough to handle this better, but as a quick fix, using slightly shorter names that are still unique avoids the build issue. Fixes: 8d0872f6239f ("ASoC: Intel: add sof-nau8825 machine driver") Signed-off-by: Arnd Bergmann Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20221221132515.2363276-1-arnd@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/sof_nau8825.c | 8 ++++---- sound/soc/intel/common/soc-acpi-intel-adl-match.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/intel/boards/sof_nau8825.c b/sound/soc/intel/boards/sof_nau8825.c index 27880224359d..009a41fbefa1 100644 --- a/sound/soc/intel/boards/sof_nau8825.c +++ b/sound/soc/intel/boards/sof_nau8825.c @@ -618,7 +618,7 @@ static const struct platform_device_id board_ids[] = { }, { - .name = "adl_rt1019p_nau8825", + .name = "adl_rt1019p_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_RT1019P_SPEAKER_AMP_PRESENT | @@ -626,7 +626,7 @@ static const struct platform_device_id board_ids[] = { SOF_NAU8825_NUM_HDMIDEV(4)), }, { - .name = "adl_max98373_nau8825", + .name = "adl_max98373_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_MAX98373_SPEAKER_AMP_PRESENT | @@ -637,7 +637,7 @@ static const struct platform_device_id board_ids[] = { }, { /* The limitation of length of char array, shorten the name */ - .name = "adl_mx98360a_nau8825", + .name = "adl_mx98360a_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_MAX98360A_SPEAKER_AMP_PRESENT | @@ -648,7 +648,7 @@ static const struct platform_device_id board_ids[] = { }, { - .name = "adl_rt1015p_nau8825", + .name = "adl_rt1015p_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_RT1015P_SPEAKER_AMP_PRESENT | diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index ce4d8ec86f2c..68b4fa352354 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -474,21 +474,21 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { }, { .id = "10508825", - .drv_name = "adl_rt1019p_nau8825", + .drv_name = "adl_rt1019p_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_rt1019p_amp, .sof_tplg_filename = "sof-adl-rt1019-nau8825.tplg", }, { .id = "10508825", - .drv_name = "adl_max98373_nau8825", + .drv_name = "adl_max98373_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_max98373_amp, .sof_tplg_filename = "sof-adl-max98373-nau8825.tplg", }, { .id = "10508825", - .drv_name = "adl_mx98360a_nau8825", + .drv_name = "adl_mx98360a_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_max98360a_amp, .sof_tplg_filename = "sof-adl-max98360a-nau8825.tplg", @@ -502,7 +502,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { }, { .id = "10508825", - .drv_name = "adl_rt1015p_nau8825", + .drv_name = "adl_rt1015p_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_rt1015p_amp, .sof_tplg_filename = "sof-adl-rt1015-nau8825.tplg", From 7928737dd1498bc365068d766ccf4eaeaff8ddf4 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 15 Nov 2022 09:49:02 +0800 Subject: [PATCH 281/543] drm/msm/dpu: Fix some kernel-doc comments [ Upstream commit 1bdeb321d1f856346fe0078af09c9e7ffbd2ca7a ] Make the description of @init to @p in dpu_encoder_phys_wb_init() and remove @wb_roi in dpu_encoder_phys_wb_setup_fb() to clear the below warnings: drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c:139: warning: Excess function parameter 'wb_roi' description in 'dpu_encoder_phys_wb_setup_fb' drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c:699: warning: Function parameter or member 'p' not described in 'dpu_encoder_phys_wb_init' drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c:699: warning: Excess function parameter 'init' description in 'dpu_encoder_phys_wb_init' Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3067 Reported-by: Abaci Robot Signed-off-by: Yang Li Fixes: d7d0e73f7de3 ("drm/msm/dpu: introduce the dpu_encoder_phys_* for writeback") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/511605/ Link: https://lore.kernel.org/r/20221115014902.45240-1-yang.lee@linux.alibaba.com Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 7cbcef6efe17..62f6ff6abf41 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -132,7 +132,6 @@ static void dpu_encoder_phys_wb_set_qos(struct dpu_encoder_phys *phys_enc) * dpu_encoder_phys_wb_setup_fb - setup output framebuffer * @phys_enc: Pointer to physical encoder * @fb: Pointer to output framebuffer - * @wb_roi: Pointer to output region of interest */ static void dpu_encoder_phys_wb_setup_fb(struct dpu_encoder_phys *phys_enc, struct drm_framebuffer *fb) @@ -692,7 +691,7 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops) /** * dpu_encoder_phys_wb_init - initialize writeback encoder - * @init: Pointer to init info structure with initialization params + * @p: Pointer to init info structure with initialization params */ struct dpu_encoder_phys *dpu_encoder_phys_wb_init( struct dpu_enc_phys_init_params *p) From c6fa1de83fd87267ab24359e6fa52f98f5cee3f9 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 7 Dec 2022 10:59:22 +0400 Subject: [PATCH 282/543] drm/msm/dpu: Fix memory leak in msm_mdss_parse_data_bus_icc_path [ Upstream commit 45dac1352b55b1d8cb17f218936b2bc2bc1fb4ee ] of_icc_get() alloc resources for path1, we should release it when not need anymore. Early return when IS_ERR_OR_NULL(path0) may leak path1. Defer getting path1 to fix this. Fixes: b9364eed9232 ("drm/msm/dpu: Move min BW request and full BW disable back to mdss") Signed-off-by: Miaoqian Lin Reviewed-by: Douglas Anderson Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/514264/ Link: https://lore.kernel.org/r/20221207065922.2086368-1-linmq006@gmail.com Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_mdss.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index e13c5c12b775..3b8d6991b04e 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -46,15 +46,17 @@ struct msm_mdss { static int msm_mdss_parse_data_bus_icc_path(struct device *dev, struct msm_mdss *msm_mdss) { - struct icc_path *path0 = of_icc_get(dev, "mdp0-mem"); - struct icc_path *path1 = of_icc_get(dev, "mdp1-mem"); + struct icc_path *path0; + struct icc_path *path1; + path0 = of_icc_get(dev, "mdp0-mem"); if (IS_ERR_OR_NULL(path0)) return PTR_ERR_OR_ZERO(path0); msm_mdss->path[0] = path0; msm_mdss->num_paths = 1; + path1 = of_icc_get(dev, "mdp1-mem"); if (!IS_ERR_OR_NULL(path1)) { msm_mdss->path[1] = path1; msm_mdss->num_paths++; From c5d032482c23cf7aaec1490a77f7d85dbea1f3aa Mon Sep 17 00:00:00 2001 From: Emanuele Ghidoli Date: Fri, 23 Dec 2022 09:02:47 +0100 Subject: [PATCH 283/543] ASoC: wm8904: fix wrong outputs volume after power reactivation [ Upstream commit 472a6309c6467af89dbf660a8310369cc9cb041f ] Restore volume after charge pump and PGA activation to ensure that volume settings are correctly applied when re-enabling codec from SND_SOC_BIAS_OFF state. CLASS_W, CHARGE_PUMP and POWER_MANAGEMENT_2 register configuration affect how the volume register are applied and must be configured first. Fixes: a91eb199e4dc ("ASoC: Initial WM8904 CODEC driver") Link: https://lore.kernel.org/all/c7864c35-738c-a867-a6a6-ddf9f98df7e7@gmail.com/ Signed-off-by: Emanuele Ghidoli Signed-off-by: Francesco Dolcini Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20221223080247.7258-1-francesco@dolcini.it Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8904.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c index ca6a01a230af..791d8738d1c0 100644 --- a/sound/soc/codecs/wm8904.c +++ b/sound/soc/codecs/wm8904.c @@ -697,6 +697,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, int dcs_mask; int dcs_l, dcs_r; int dcs_l_reg, dcs_r_reg; + int an_out_reg; int timeout; int pwr_reg; @@ -712,6 +713,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, dcs_mask = WM8904_DCS_ENA_CHAN_0 | WM8904_DCS_ENA_CHAN_1; dcs_r_reg = WM8904_DC_SERVO_8; dcs_l_reg = WM8904_DC_SERVO_9; + an_out_reg = WM8904_ANALOGUE_OUT1_LEFT; dcs_l = 0; dcs_r = 1; break; @@ -720,6 +722,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, dcs_mask = WM8904_DCS_ENA_CHAN_2 | WM8904_DCS_ENA_CHAN_3; dcs_r_reg = WM8904_DC_SERVO_6; dcs_l_reg = WM8904_DC_SERVO_7; + an_out_reg = WM8904_ANALOGUE_OUT2_LEFT; dcs_l = 2; dcs_r = 3; break; @@ -792,6 +795,10 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, snd_soc_component_update_bits(component, reg, WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP, WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP); + + /* Update volume, requires PGA to be powered */ + val = snd_soc_component_read(component, an_out_reg); + snd_soc_component_write(component, an_out_reg, val); break; case SND_SOC_DAPM_POST_PMU: From 6e4131245240910ba6810927dc555df947017d8a Mon Sep 17 00:00:00 2001 From: Mikhail Zhilkin Date: Thu, 8 Dec 2022 23:28:29 +0300 Subject: [PATCH 284/543] mtd: parsers: scpart: fix __udivdi3 undefined on mips [ Upstream commit 105c14b84d93168431abba5d55e6c26fa4b65abb ] This fixes the following compile error on mips architecture with clang version 16.0.0 reported by the 0-DAY CI Kernel Test Service: ld.lld: error: undefined symbol: __udivdi3 referenced by scpart.c mtd/parsers/scpart.o:(scpart_parse) in archive drivers/built-in.a As a workaround this makes 'offs' a 32-bit type. This is enough, because the mtd containing partition table practically does not exceed 1 MB. We can revert this when the [Link] has been resolved. Link: https://github.com/ClangBuiltLinux/linux/issues/1635 Fixes: 9b78ef0c7997 ("mtd: parsers: add support for Sercomm partitions") Reported-by: kernel test robot Suggested-by: Arnd Bergmann Signed-off-by: Mikhail Zhilkin Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/805fe58e-690f-6a3f-5ebf-2f6f6e6e4599@gmail.com Signed-off-by: Sasha Levin --- drivers/mtd/parsers/scpart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/parsers/scpart.c b/drivers/mtd/parsers/scpart.c index 02601bb33de4..6e5e11c37078 100644 --- a/drivers/mtd/parsers/scpart.c +++ b/drivers/mtd/parsers/scpart.c @@ -50,7 +50,7 @@ static int scpart_scan_partmap(struct mtd_info *master, loff_t partmap_offs, int cnt = 0; int res = 0; int res2; - loff_t offs; + uint32_t offs; size_t retlen; struct sc_part_desc *pdesc = NULL; struct sc_part_desc *tmpdesc; From fc78cb85abd0c5dbe5c057057d5dca94a1dfa808 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Dec 2022 15:13:34 +0100 Subject: [PATCH 285/543] mtd: cfi: allow building spi-intel standalone [ Upstream commit d19ab1f785d0b6b9f709799f0938658903821ba1 ] When MTD or MTD_CFI_GEOMETRY is disabled, the spi-intel driver fails to build, as it includes the shared CFI header: include/linux/mtd/cfi.h:62:2: error: #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. [-Werror=cpp] 62 | #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. linux/mtd/spi-nor.h does not actually need to include cfi.h, so remove the inclusion here to fix the warning. This uncovers a missing #include in spi-nor/core.c so add that there to prevent a different build issue. Fixes: e23e5a05d1fd ("mtd: spi-nor: intel-spi: Convert to SPI MEM") Signed-off-by: Arnd Bergmann Reviewed-by: Mika Westerberg Reviewed-by: Tokunori Ikegami Acked-by: Pratyush Yadav Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221220141352.1486360-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/core.c | 1 + include/linux/mtd/spi-nor.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 2e0655c0b606..5dbf52aa0355 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 42218a1164f6..f92bf7f7a754 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -7,7 +7,6 @@ #define __LINUX_MTD_SPI_NOR_H #include -#include #include #include From 254328ba4cd3da2b4fb0afdc96e5033315e64441 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 2 Jan 2023 18:07:57 +0100 Subject: [PATCH 286/543] ALSA: usb-audio: Make sure to stop endpoints before closing EPs [ Upstream commit 0599313e26666e79f6e7fe1450588431b8cb25d5 ] At the PCM hw params, we may re-configure the endpoints and it's done by a temporary EP close followed by re-open. A potential problem there is that the EP might be already running internally at the PCM prepare stage; it's seen typically in the playback stream with the implicit feedback sync. As this stream start isn't tracked by the core PCM layer, we'd need to stop it explicitly, and that's the missing piece. This patch adds the stop_endpoints() call at snd_usb_hw_params() to assure the stream stop before closing the EPs. Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Link: https://lore.kernel.org/r/4e509aea-e563-e592-e652-ba44af6733fe@veniogames.com Link: https://lore.kernel.org/r/20230102170759.29610-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index d2c652aa1385..535eb95bc9ee 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -527,6 +527,8 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream, if (snd_usb_endpoint_compatible(chip, subs->data_endpoint, fmt, hw_params)) goto unlock; + if (stop_endpoints(subs, false)) + sync_pending_stops(subs); close_endpoints(chip, subs); } From 0d81b8e86e7562da9961537519920b4974de513c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 2 Jan 2023 18:07:58 +0100 Subject: [PATCH 287/543] ALSA: usb-audio: Relax hw constraints for implicit fb sync [ Upstream commit d463ac1acb454fafed58f695cb3067fbf489f3a0 ] The fix commit the commit e4ea77f8e53f ("ALSA: usb-audio: Always apply the hw constraints for implicit fb sync") tried to address the bug where an incorrect PCM parameter is chosen when two (implicit fb) streams are set up at the same time. This change had, however, some side effect: once when the sync endpoint is chosen and set up, this restriction is applied at the next hw params unless it's freed via hw free explicitly. This patch is a workaround for the problem by relaxing the hw constraints a bit for the implicit fb sync. We still keep applying the hw constraints for implicit fb sync, but only when the matching sync EP is being used by other streams. Fixes: e4ea77f8e53f ("ALSA: usb-audio: Always apply the hw constraints for implicit fb sync") Reported-by: Ruud van Asseldonk Link: https://lore.kernel.org/r/4e509aea-e563-e592-e652-ba44af6733fe@veniogames.com Link: https://lore.kernel.org/r/20230102170759.29610-3-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/pcm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 535eb95bc9ee..29838000eee0 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -939,8 +939,13 @@ get_sync_ep_from_substream(struct snd_usb_substream *subs) continue; /* for the implicit fb, check the sync ep as well */ ep = snd_usb_get_endpoint(chip, fp->sync_ep); - if (ep && ep->cur_audiofmt) - return ep; + if (ep && ep->cur_audiofmt) { + /* ditto, if the sync (data) ep is used by others, + * this stream is restricted by the sync ep + */ + if (ep != subs->sync_endpoint || ep->opened > 1) + return ep; + } } return NULL; } From 34aed3646e82082ce1685e1630911d9559d464f2 Mon Sep 17 00:00:00 2001 From: Biao Huang Date: Thu, 5 Jan 2023 09:07:11 +0800 Subject: [PATCH 288/543] stmmac: dwmac-mediatek: remove the dwmac_fix_mac_speed [ Upstream commit c26de7507d1f5ffa5daf6a4980ef7896889691a9 ] In current driver, MAC will always enable 2ns delay in RGMII mode, but that's not the correct usage. Remove the dwmac_fix_mac_speed() in driver, and recommend "rgmii-id" for phy-mode in device tree. Fixes: f2d356a6ab71 ("stmmac: dwmac-mediatek: add support for mt8195") Reviewed-by: Andrew Lunn Signed-off-by: Biao Huang Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../ethernet/stmicro/stmmac/dwmac-mediatek.c | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index d42e1afb6521..2f7d8e4561d9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -90,7 +90,6 @@ struct mediatek_dwmac_plat_data { struct mediatek_dwmac_variant { int (*dwmac_set_phy_interface)(struct mediatek_dwmac_plat_data *plat); int (*dwmac_set_delay)(struct mediatek_dwmac_plat_data *plat); - void (*dwmac_fix_mac_speed)(void *priv, unsigned int speed); /* clock ids to be requested */ const char * const *clk_list; @@ -443,32 +442,9 @@ static int mt8195_set_delay(struct mediatek_dwmac_plat_data *plat) return 0; } -static void mt8195_fix_mac_speed(void *priv, unsigned int speed) -{ - struct mediatek_dwmac_plat_data *priv_plat = priv; - - if ((phy_interface_mode_is_rgmii(priv_plat->phy_mode))) { - /* prefer 2ns fixed delay which is controlled by TXC_PHASE_CTRL, - * when link speed is 1Gbps with RGMII interface, - * Fall back to delay macro circuit for 10/100Mbps link speed. - */ - if (speed == SPEED_1000) - regmap_update_bits(priv_plat->peri_regmap, - MT8195_PERI_ETH_CTRL0, - MT8195_RGMII_TXC_PHASE_CTRL | - MT8195_DLY_GTXC_ENABLE | - MT8195_DLY_GTXC_INV | - MT8195_DLY_GTXC_STAGES, - MT8195_RGMII_TXC_PHASE_CTRL); - else - mt8195_set_delay(priv_plat); - } -} - static const struct mediatek_dwmac_variant mt8195_gmac_variant = { .dwmac_set_phy_interface = mt8195_set_interface, .dwmac_set_delay = mt8195_set_delay, - .dwmac_fix_mac_speed = mt8195_fix_mac_speed, .clk_list = mt8195_dwmac_clk_l, .num_clks = ARRAY_SIZE(mt8195_dwmac_clk_l), .dma_bit_mask = 35, @@ -619,8 +595,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->bsp_priv = priv_plat; plat->init = mediatek_dwmac_init; plat->clks_config = mediatek_dwmac_clks_config; - if (priv_plat->variant->dwmac_fix_mac_speed) - plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed; plat->safety_feat_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->safety_feat_cfg), From 5ac01865a2b99d6da24e66d12cfea2f7c185c610 Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Thu, 5 Jan 2023 06:02:51 +0000 Subject: [PATCH 289/543] tipc: fix unexpected link reset due to discovery messages [ Upstream commit c244c092f1ed2acfb5af3d3da81e22367d3dd733 ] This unexpected behavior is observed: node 1 | node 2 ------ | ------ link is established | link is established reboot | link is reset up | send discovery message receive discovery message | link is established | link is established send discovery message | | receive discovery message | link is reset (unexpected) | send reset message link is reset | It is due to delayed re-discovery as described in function tipc_node_check_dest(): "this link endpoint has already reset and re-established contact with the peer, before receiving a discovery message from that node." However, commit 598411d70f85 has changed the condition for calling tipc_node_link_down() which was the acceptance of new media address. This commit fixes this by restoring the old and correct behavior. Fixes: 598411d70f85 ("tipc: make resetting of links non-atomic") Acked-by: Jon Maloy Signed-off-by: Tung Nguyen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/node.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 49ddc484c4fe..5e000fde8067 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1179,8 +1179,9 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool addr_match = false; bool sign_match = false; bool link_up = false; + bool link_is_reset = false; bool accept_addr = false; - bool reset = true; + bool reset = false; char *if_name; unsigned long intv; u16 session; @@ -1200,14 +1201,14 @@ void tipc_node_check_dest(struct net *net, u32 addr, /* Prepare to validate requesting node's signature and media address */ l = le->link; link_up = l && tipc_link_is_up(l); + link_is_reset = l && tipc_link_is_reset(l); addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); sign_match = (signature == n->signature); /* These three flags give us eight permutations: */ if (sign_match && addr_match && link_up) { - /* All is fine. Do nothing. */ - reset = false; + /* All is fine. Ignore requests. */ /* Peer node is not a container/local namespace */ if (!n->peer_hash_mix) n->peer_hash_mix = hash_mixes; @@ -1232,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, */ accept_addr = true; *respond = true; + reset = true; } else if (!sign_match && addr_match && link_up) { /* Peer node rebooted. Two possibilities: * - Delayed re-discovery; this link endpoint has already @@ -1263,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, n->signature = signature; accept_addr = true; *respond = true; + reset = true; } if (!accept_addr) @@ -1291,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, tipc_link_fsm_evt(l, LINK_RESET_EVT); if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + link_is_reset = tipc_link_is_reset(l); le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); @@ -1303,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: tipc_node_write_unlock(n); - if (reset && l && !tipc_link_is_reset(l)) + if (reset && !link_is_reset) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); } From 90d5095b76f4ee50cf25cb80cda2def4a403c951 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:46:38 -0400 Subject: [PATCH 290/543] NFSD: Pass the target nfsd_file to nfsd_commit() [ Upstream commit c252849082ff525af18b4f253b3c9ece94e951ed ] In a moment I'm going to introduce separate nfsd_file types, one of which is garbage-collected; the other, not. The garbage-collected variety is to be used by NFSv2 and v3, and the non-garbage-collected variety is to be used by NFSv4. nfsd_commit() is invoked by both NFSv3 and NFSv4 consumers. We want nfsd_commit() to find and use the correct variety of cached nfsd_file object for the NFS version that is in use. Signed-off-by: Chuck Lever Tested-by: Jeff Layton Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/nfs3proc.c | 10 +++++++++- fs/nfsd/nfs4proc.c | 11 ++++++++++- fs/nfsd/vfs.c | 15 ++++----------- fs/nfsd/vfs.h | 3 ++- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 923d9a80df92..ff2920546333 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -13,6 +13,7 @@ #include "cache.h" #include "xdr3.h" #include "vfs.h" +#include "filecache.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -763,6 +764,7 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) { struct nfsd3_commitargs *argp = rqstp->rq_argp; struct nfsd3_commitres *resp = rqstp->rq_resp; + struct nfsd_file *nf; dprintk("nfsd: COMMIT(3) %s %u@%Lu\n", SVCFH_fmt(&argp->fh), @@ -770,8 +772,14 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) (unsigned long long) argp->offset); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset, + resp->status = nfsd_file_acquire(rqstp, &resp->fh, NFSD_MAY_WRITE | + NFSD_MAY_NOT_BREAK_LEASE, &nf); + if (resp->status) + goto out; + resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset, argp->count, resp->verf); + nfsd_file_put(nf); +out: return rpc_success; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c7329523a10f..30a08ec31a70 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -731,10 +731,19 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_commit *commit = &u->commit; + struct nfsd_file *nf; + __be32 status; - return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, + status = nfsd_file_acquire(rqstp, &cstate->current_fh, NFSD_MAY_WRITE | + NFSD_MAY_NOT_BREAK_LEASE, &nf); + if (status != nfs_ok) + return status; + + status = nfsd_commit(rqstp, &cstate->current_fh, nf, commit->co_offset, commit->co_count, (__be32 *)commit->co_verf.data); + nfsd_file_put(nf); + return status; } static __be32 diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 849a720ab43f..f1919834a99d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1133,6 +1133,7 @@ out: * nfsd_commit - Commit pending writes to stable storage * @rqstp: RPC request being processed * @fhp: NFS filehandle + * @nf: target file * @offset: raw offset from beginning of file * @count: raw count of bytes to sync * @verf: filled in with the server's current write verifier @@ -1149,19 +1150,13 @@ out: * An nfsstat value in network byte order. */ __be32 -nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, - u32 count, __be32 *verf) +nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, + u64 offset, u32 count, __be32 *verf) { + __be32 err = nfs_ok; u64 maxbytes; loff_t start, end; struct nfsd_net *nn; - struct nfsd_file *nf; - __be32 err; - - err = nfsd_file_acquire(rqstp, fhp, - NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); - if (err) - goto out; /* * Convert the client-provided (offset, count) range to a @@ -1202,8 +1197,6 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, } else nfsd_copy_write_verifier(verf, nn); - nfsd_file_put(nf); -out: return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 120521bc7b24..9744b041105b 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -88,7 +88,8 @@ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_fh *resfhp, struct nfsd_attrs *iap); __be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, - u64 offset, u32 count, __be32 *verf); + struct nfsd_file *nf, u64 offset, u32 count, + __be32 *verf); #ifdef CONFIG_NFSD_V4 __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, void **bufp, int *lenp); From 7e4d3d500458aec3bb58c9429999dd040077326b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:46:44 -0400 Subject: [PATCH 291/543] NFSD: Revert "NFSD: NFSv4 CLOSE should release an nfsd_file immediately" [ Upstream commit dcf3f80965ca787c70def402cdf1553c93c75529 ] This reverts commit 5e138c4a750dc140d881dab4a8804b094bbc08d2. That commit attempted to make files available to other users as soon as all NFSv4 clients were done with them, rather than waiting until the filecache LRU had garbage collected them. It gets the reference counting wrong, for one thing. But it also misses that DELEGRETURN should release a file in the same fashion. In fact, any nfsd_file_put() on an file held open by an NFSv4 client needs potentially to release the file immediately... Clear the way for implementing that idea. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 18 ------------------ fs/nfsd/filecache.h | 1 - fs/nfsd/nfs4state.c | 4 ++-- 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ec3fceb92236..babea79d3f6f 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -444,24 +444,6 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_put_noref(nf); } -/** - * nfsd_file_close - Close an nfsd_file - * @nf: nfsd_file to close - * - * If this is the final reference for @nf, free it immediately. - * This reflects an on-the-wire CLOSE or DELEGRETURN into the - * VFS and exported filesystem. - */ -void nfsd_file_close(struct nfsd_file *nf) -{ - nfsd_file_put(nf); - if (refcount_dec_if_one(&nf->nf_ref)) { - nfsd_file_unhash(nf); - nfsd_file_lru_remove(nf); - nfsd_file_free(nf); - } -} - struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 357832bac736..6b012ea4bd9d 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -52,7 +52,6 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); -void nfsd_file_close(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 52b5552d0d70..16c3e991ddcc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -842,9 +842,9 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) swap(f2, fp->fi_fds[O_RDWR]); spin_unlock(&fp->fi_lock); if (f1) - nfsd_file_close(f1); + nfsd_file_put(f1); if (f2) - nfsd_file_close(f2); + nfsd_file_put(f2); } } From 1c36dc563e1c9649b040c2af00355b21d905a958 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:46:51 -0400 Subject: [PATCH 292/543] NFSD: Add an NFSD_FILE_GC flag to enable nfsd_file garbage collection [ Upstream commit 4d1ea8455716ca070e3cd85767e6f6a562a58b1b ] NFSv4 operations manage the lifetime of nfsd_file items they use by means of NFSv4 OPEN and CLOSE. Hence there's no need for them to be garbage collected. Introduce a mechanism to enable garbage collection for nfsd_file items used only by NFSv2/3 callers. Note that the change in nfsd_file_put() ensures that both CLOSE and DELEGRETURN will actually close out and free an nfsd_file on last reference of a non-garbage-collected file. Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=394 Suggested-by: Trond Myklebust Signed-off-by: Chuck Lever Tested-by: Jeff Layton Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 63 +++++++++++++++++++++++++++++++++++++++------ fs/nfsd/filecache.h | 3 +++ fs/nfsd/nfs3proc.c | 4 +-- fs/nfsd/trace.h | 3 ++- fs/nfsd/vfs.c | 4 +-- 5 files changed, 64 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index babea79d3f6f..cee44405cf7d 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -63,6 +63,7 @@ struct nfsd_file_lookup_key { struct net *net; const struct cred *cred; unsigned char need; + bool gc; enum nfsd_file_lookup_type type; }; @@ -162,6 +163,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, return 1; if (!nfsd_match_cred(nf->nf_cred, key->cred)) return 1; + if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) + return 1; if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) return 1; break; @@ -297,6 +300,8 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) nf->nf_flags = 0; __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); + if (key->gc) + __set_bit(NFSD_FILE_GC, &nf->nf_flags); nf->nf_inode = key->inode; /* nf_ref is pre-incremented for hash table */ refcount_set(&nf->nf_ref, 2); @@ -428,16 +433,27 @@ nfsd_file_put_noref(struct nfsd_file *nf) } } +static void +nfsd_file_unhash_and_put(struct nfsd_file *nf) +{ + if (nfsd_file_unhash(nf)) + nfsd_file_put_noref(nf); +} + void nfsd_file_put(struct nfsd_file *nf) { might_sleep(); - nfsd_file_lru_add(nf); - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { + if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) + nfsd_file_lru_add(nf); + else if (refcount_read(&nf->nf_ref) == 2) + nfsd_file_unhash_and_put(nf); + + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { nfsd_file_flush(nf); nfsd_file_put_noref(nf); - } else if (nf->nf_file) { + } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { nfsd_file_put_noref(nf); nfsd_file_schedule_laundrette(); } else @@ -1016,12 +1032,14 @@ nfsd_file_is_cached(struct inode *inode) static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf, bool open) + unsigned int may_flags, struct nfsd_file **pnf, + bool open, bool want_gc) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_FULL, .need = may_flags & NFSD_FILE_MAY_MASK, .net = SVC_NET(rqstp), + .gc = want_gc, }; bool open_retry = true; struct nfsd_file *nf; @@ -1117,14 +1135,35 @@ open_file: * then unhash. */ if (status != nfs_ok || key.inode->i_nlink == 0) - if (nfsd_file_unhash(nf)) - nfsd_file_put_noref(nf); + nfsd_file_unhash_and_put(nf); clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); goto out; } +/** + * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file + * @rqstp: the RPC transaction being executed + * @fhp: the NFS filehandle of the file to be opened + * @may_flags: NFSD_MAY_ settings for the file + * @pnf: OUT: new or found "struct nfsd_file" object + * + * The nfsd_file object returned by this API is reference-counted + * and garbage-collected. The object is retained for a few + * seconds after the final nfsd_file_put() in case the caller + * wants to re-use it. + * + * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in + * network byte order is returned. + */ +__be32 +nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) +{ + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); +} + /** * nfsd_file_acquire - Get a struct nfsd_file with an open file * @rqstp: the RPC transaction being executed @@ -1132,6 +1171,10 @@ open_file: * @may_flags: NFSD_MAY_ settings for the file * @pnf: OUT: new or found "struct nfsd_file" object * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is unhashed after the + * final nfsd_file_put(). + * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ @@ -1139,7 +1182,7 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); } /** @@ -1149,6 +1192,10 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * @may_flags: NFSD_MAY_ settings for the file * @pnf: OUT: new or found "struct nfsd_file" object * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is released immediately + * one RCU grace period after the final nfsd_file_put(). + * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ @@ -1156,7 +1203,7 @@ __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 6b012ea4bd9d..b7efb2c3ddb1 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -38,6 +38,7 @@ struct nfsd_file { #define NFSD_FILE_HASHED (0) #define NFSD_FILE_PENDING (1) #define NFSD_FILE_REFERENCED (2) +#define NFSD_FILE_GC (3) unsigned long nf_flags; struct inode *nf_inode; /* don't deref */ refcount_t nf_ref; @@ -55,6 +56,8 @@ void nfsd_file_put(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode); +__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index ff2920546333..d01b29aba662 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -772,8 +772,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) (unsigned long long) argp->offset); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_file_acquire(rqstp, &resp->fh, NFSD_MAY_WRITE | - NFSD_MAY_NOT_BREAK_LEASE, &nf); + resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE | + NFSD_MAY_NOT_BREAK_LEASE, &nf); if (resp->status) goto out; resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset, diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index d4b6839bb459..3fcfeb7b560f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -817,7 +817,8 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) + { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ + { 1 << NFSD_FILE_GC, "GC"}) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f1919834a99d..2934ab1d9862 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1085,7 +1085,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err; trace_nfsd_read_start(rqstp, fhp, offset, *count); - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; @@ -1117,7 +1117,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, trace_nfsd_write_start(rqstp, fhp, offset, *cnt); - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf); + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf); if (err) goto out; From d7a6c190012af6d5821356343b4e0eae9c164000 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Nov 2022 14:44:47 -0400 Subject: [PATCH 293/543] nfsd: remove the pages_flushed statistic from filecache [ Upstream commit 1f696e230ea5198e393368b319eb55651828d687 ] We're counting mapping->nrpages, but not all of those are necessarily dirty. We don't really have a simple way to count just the dirty pages, so just remove this stat since it's not accurate. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index cee44405cf7d..28fff3672df9 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -33,7 +33,6 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); -static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed); static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { @@ -371,7 +370,6 @@ nfsd_file_flush(struct nfsd_file *nf) if (!file || !(file->f_mode & FMODE_WRITE)) return; - this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages); if (vfs_fsync(file, 1) != 0) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } @@ -998,7 +996,6 @@ nfsd_file_cache_shutdown(void) per_cpu(nfsd_file_acquisitions, i) = 0; per_cpu(nfsd_file_releases, i) = 0; per_cpu(nfsd_file_total_age, i) = 0; - per_cpu(nfsd_file_pages_flushed, i) = 0; per_cpu(nfsd_file_evictions, i) = 0; } } @@ -1213,7 +1210,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned long releases = 0, pages_flushed = 0, evictions = 0; + unsigned long releases = 0, evictions = 0; unsigned long hits = 0, acquisitions = 0; unsigned int i, count = 0, buckets = 0; unsigned long lru = 0, total_age = 0; @@ -1241,7 +1238,6 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) releases += per_cpu(nfsd_file_releases, i); total_age += per_cpu(nfsd_file_total_age, i); evictions += per_cpu(nfsd_file_evictions, i); - pages_flushed += per_cpu(nfsd_file_pages_flushed, i); } seq_printf(m, "total entries: %u\n", count); @@ -1255,6 +1251,5 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "mean age (ms): %ld\n", total_age / releases); else seq_printf(m, "mean age (ms): -\n"); - seq_printf(m, "pages flushed: %lu\n", pages_flushed); return 0; } From 4ec7a0f277d07dceabb591e927a5d8279d980da2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Nov 2022 14:44:48 -0400 Subject: [PATCH 294/543] nfsd: reorganize filecache.c [ Upstream commit 8214118589881b2d390284410c5ff275e7a5e03c ] In a coming patch, we're going to rework how the filecache refcounting works. Move some code around in the function to reduce the churn in the later patches, and rename some of the functions with (hopefully) clearer names: nfsd_file_flush becomes nfsd_file_fsync, and nfsd_file_unhash_and_dispose is renamed to nfsd_file_unhash_and_queue. Also, the nfsd_file_put_final tracepoint is renamed to nfsd_file_free, to better match the name of the function from which it's called. Signed-off-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 111 ++++++++++++++++++++++---------------------- fs/nfsd/trace.h | 4 +- 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 28fff3672df9..f54dd6695741 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -310,16 +310,59 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) return nf; } +static void +nfsd_file_fsync(struct nfsd_file *nf) +{ + struct file *file = nf->nf_file; + + if (!file || !(file->f_mode & FMODE_WRITE)) + return; + if (vfs_fsync(file, 1) != 0) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); +} + +static int +nfsd_file_check_write_error(struct nfsd_file *nf) +{ + struct file *file = nf->nf_file; + + if (!file || !(file->f_mode & FMODE_WRITE)) + return 0; + return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); +} + +static void +nfsd_file_hash_remove(struct nfsd_file *nf) +{ + trace_nfsd_file_unhash(nf); + + if (nfsd_file_check_write_error(nf)) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); + rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, + nfsd_file_rhash_params); +} + +static bool +nfsd_file_unhash(struct nfsd_file *nf) +{ + if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + nfsd_file_hash_remove(nf); + return true; + } + return false; +} + static bool nfsd_file_free(struct nfsd_file *nf) { s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); bool flush = false; + trace_nfsd_file_free(nf); + this_cpu_inc(nfsd_file_releases); this_cpu_add(nfsd_file_total_age, age); - trace_nfsd_file_put_final(nf); if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { @@ -353,27 +396,6 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } -static int -nfsd_file_check_write_error(struct nfsd_file *nf) -{ - struct file *file = nf->nf_file; - - if (!file || !(file->f_mode & FMODE_WRITE)) - return 0; - return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); -} - -static void -nfsd_file_flush(struct nfsd_file *nf) -{ - struct file *file = nf->nf_file; - - if (!file || !(file->f_mode & FMODE_WRITE)) - return; - if (vfs_fsync(file, 1) != 0) - nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); -} - static void nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); @@ -387,31 +409,18 @@ static void nfsd_file_lru_remove(struct nfsd_file *nf) trace_nfsd_file_lru_del(nf); } -static void -nfsd_file_hash_remove(struct nfsd_file *nf) +struct nfsd_file * +nfsd_file_get(struct nfsd_file *nf) { - trace_nfsd_file_unhash(nf); - - if (nfsd_file_check_write_error(nf)) - nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); - rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, - nfsd_file_rhash_params); -} - -static bool -nfsd_file_unhash(struct nfsd_file *nf) -{ - if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_hash_remove(nf); - return true; - } - return false; + if (likely(refcount_inc_not_zero(&nf->nf_ref))) + return nf; + return NULL; } static void -nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose) +nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose) { - trace_nfsd_file_unhash_and_dispose(nf); + trace_nfsd_file_unhash_and_queue(nf); if (nfsd_file_unhash(nf)) { /* caller must call nfsd_file_dispose_list() later */ nfsd_file_lru_remove(nf); @@ -449,7 +458,7 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_unhash_and_put(nf); if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_flush(nf); + nfsd_file_fsync(nf); nfsd_file_put_noref(nf); } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { nfsd_file_put_noref(nf); @@ -458,14 +467,6 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_put_noref(nf); } -struct nfsd_file * -nfsd_file_get(struct nfsd_file *nf) -{ - if (likely(refcount_inc_not_zero(&nf->nf_ref))) - return nf; - return NULL; -} - static void nfsd_file_dispose_list(struct list_head *dispose) { @@ -474,7 +475,7 @@ nfsd_file_dispose_list(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del_init(&nf->nf_lru); - nfsd_file_flush(nf); + nfsd_file_fsync(nf); nfsd_file_put_noref(nf); } } @@ -488,7 +489,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del_init(&nf->nf_lru); - nfsd_file_flush(nf); + nfsd_file_fsync(nf); if (!refcount_dec_and_test(&nf->nf_ref)) continue; if (nfsd_file_free(nf)) @@ -688,7 +689,7 @@ __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) nfsd_file_rhash_params); if (!nf) break; - nfsd_file_unhash_and_dispose(nf, dispose); + nfsd_file_unhash_and_queue(nf, dispose); count++; } while (1); rcu_read_unlock(); @@ -890,7 +891,7 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { if (!net || nf->nf_net == net) - nfsd_file_unhash_and_dispose(nf, &dispose); + nfsd_file_unhash_and_queue(nf, &dispose); nf = rhashtable_walk_next(&iter); } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 3fcfeb7b560f..55e9e19cb1ec 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -850,10 +850,10 @@ DEFINE_EVENT(nfsd_file_class, name, \ TP_PROTO(struct nfsd_file *nf), \ TP_ARGS(nf)) -DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); +DEFINE_NFSD_FILE_EVENT(nfsd_file_free); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); -DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose); +DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); TRACE_EVENT(nfsd_file_alloc, TP_PROTO( From 65469b10522fcd9ab30543c84a66d927df8c6828 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Nov 2022 16:22:48 -0400 Subject: [PATCH 295/543] NFSD: Add an nfsd_file_fsync tracepoint [ Upstream commit d7064eaf688cfe454c50db9f59298463d80d403c ] Add a tracepoint to capture the number of filecache-triggered fsync calls and which files needed it. Also, record when an fsync triggers a write verifier reset. Examples: <...>-97 [007] 262.505611: nfsd_file_free: inode=0xffff888171e08140 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d2400 <...>-97 [007] 262.505612: nfsd_file_fsync: inode=0xffff888171e08140 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d2400 ret=0 <...>-97 [007] 262.505623: nfsd_file_free: inode=0xffff888171e08dc0 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d1e00 <...>-97 [007] 262.505624: nfsd_file_fsync: inode=0xffff888171e08dc0 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d1e00 ret=0 Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 5 ++++- fs/nfsd/trace.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index f54dd6695741..7c673f98f95c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -314,10 +314,13 @@ static void nfsd_file_fsync(struct nfsd_file *nf) { struct file *file = nf->nf_file; + int ret; if (!file || !(file->f_mode & FMODE_WRITE)) return; - if (vfs_fsync(file, 1) != 0) + ret = vfs_fsync(file, 1); + trace_nfsd_file_fsync(nf, ret); + if (ret) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 55e9e19cb1ec..08e2738adf8f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -1182,6 +1182,37 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); +TRACE_EVENT(nfsd_file_fsync, + TP_PROTO( + const struct nfsd_file *nf, + int ret + ), + TP_ARGS(nf, ret), + TP_STRUCT__entry( + __field(void *, nf_inode) + __field(int, nf_ref) + __field(int, ret) + __field(unsigned long, nf_flags) + __field(unsigned char, nf_may) + __field(struct file *, nf_file) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->ret = ret; + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_file = nf->nf_file; + ), + TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p ret=%d", + __entry->nf_inode, + __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), + __entry->nf_file, __entry->ret + ) +); + #include "cache.h" TRACE_DEFINE_ENUM(RC_DROPIT); From 0aba4dd7f269bf50407d8d1d776148e8246b2dcb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sun, 11 Dec 2022 06:19:33 -0500 Subject: [PATCH 296/543] nfsd: rework refcounting in filecache [ Upstream commit ac3a2585f018f10039b4a856dcb122da88c1c1c9 ] The filecache refcounting is a bit non-standard for something searchable by RCU, in that we maintain a sentinel reference while it's hashed. This in turn requires that we have to do things differently in the "put" depending on whether its hashed, which we believe to have led to races. There are other problems in here too. nfsd_file_close_inode_sync can end up freeing an nfsd_file while there are still outstanding references to it, and there are a number of subtle ToC/ToU races. Rework the code so that the refcount is what drives the lifecycle. When the refcount goes to zero, then unhash and rcu free the object. A task searching for a nfsd_file is allowed to bump its refcount, but only if it's not already 0. Ensure that we don't make any other changes to it until a reference is held. With this change, the LRU carries a reference. Take special care to deal with it when removing an entry from the list, and ensure that we only repurpose the nf_lru list_head when the refcount is 0 to ensure exclusive access to it. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 328 +++++++++++++++++++++++--------------------- fs/nfsd/trace.h | 51 +++---- 2 files changed, 194 insertions(+), 185 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 7c673f98f95c..9bf78506d071 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -302,8 +302,7 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) if (key->gc) __set_bit(NFSD_FILE_GC, &nf->nf_flags); nf->nf_inode = key->inode; - /* nf_ref is pre-incremented for hash table */ - refcount_set(&nf->nf_ref, 2); + refcount_set(&nf->nf_ref, 1); nf->nf_may = key->need; nf->nf_mark = NULL; } @@ -355,24 +354,35 @@ nfsd_file_unhash(struct nfsd_file *nf) return false; } -static bool +static void nfsd_file_free(struct nfsd_file *nf) { s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); - bool flush = false; trace_nfsd_file_free(nf); this_cpu_inc(nfsd_file_releases); this_cpu_add(nfsd_file_total_age, age); + nfsd_file_unhash(nf); + + /* + * We call fsync here in order to catch writeback errors. It's not + * strictly required by the protocol, but an nfsd_file could get + * evicted from the cache before a COMMIT comes in. If another + * task were to open that file in the interim and scrape the error, + * then the client may never see it. By calling fsync here, we ensure + * that writeback happens before the entry is freed, and that any + * errors reported result in the write verifier changing. + */ + nfsd_file_fsync(nf); + if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { get_file(nf->nf_file); filp_close(nf->nf_file, NULL); fput(nf->nf_file); - flush = true; } /* @@ -380,10 +390,9 @@ nfsd_file_free(struct nfsd_file *nf) * WARN and leak it to preserve system stability. */ if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) - return flush; + return; call_rcu(&nf->nf_rcu, nfsd_file_slab_free); - return flush; } static bool @@ -399,17 +408,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } -static void nfsd_file_lru_add(struct nfsd_file *nf) +static bool nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_add(nf); + return true; + } + return false; } -static void nfsd_file_lru_remove(struct nfsd_file *nf) +static bool nfsd_file_lru_remove(struct nfsd_file *nf) { - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_del(nf); + return true; + } + return false; } struct nfsd_file * @@ -420,54 +435,48 @@ nfsd_file_get(struct nfsd_file *nf) return NULL; } -static void -nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose) -{ - trace_nfsd_file_unhash_and_queue(nf); - if (nfsd_file_unhash(nf)) { - /* caller must call nfsd_file_dispose_list() later */ - nfsd_file_lru_remove(nf); - list_add(&nf->nf_lru, dispose); - } -} - -static void -nfsd_file_put_noref(struct nfsd_file *nf) -{ - trace_nfsd_file_put(nf); - - if (refcount_dec_and_test(&nf->nf_ref)) { - WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); - nfsd_file_lru_remove(nf); - nfsd_file_free(nf); - } -} - -static void -nfsd_file_unhash_and_put(struct nfsd_file *nf) -{ - if (nfsd_file_unhash(nf)) - nfsd_file_put_noref(nf); -} - +/** + * nfsd_file_put - put the reference to a nfsd_file + * @nf: nfsd_file of which to put the reference + * + * Put a reference to a nfsd_file. In the non-GC case, we just put the + * reference immediately. In the GC case, if the reference would be + * the last one, the put it on the LRU instead to be cleaned up later. + */ void nfsd_file_put(struct nfsd_file *nf) { might_sleep(); + trace_nfsd_file_put(nf); - if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) - nfsd_file_lru_add(nf); - else if (refcount_read(&nf->nf_ref) == 2) - nfsd_file_unhash_and_put(nf); + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && + test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + /* + * If this is the last reference (nf_ref == 1), then try to + * transfer it to the LRU. + */ + if (refcount_dec_not_one(&nf->nf_ref)) + return; - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); - } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { - nfsd_file_put_noref(nf); - nfsd_file_schedule_laundrette(); - } else - nfsd_file_put_noref(nf); + /* Try to add it to the LRU. If that fails, decrement. */ + if (nfsd_file_lru_add(nf)) { + /* If it's still hashed, we're done */ + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + nfsd_file_schedule_laundrette(); + return; + } + + /* + * We're racing with unhashing, so try to remove it from + * the LRU. If removal fails, then someone else already + * has our reference. + */ + if (!nfsd_file_lru_remove(nf)) + return; + } + } + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); } static void @@ -475,33 +484,13 @@ nfsd_file_dispose_list(struct list_head *dispose) { struct nfsd_file *nf; - while(!list_empty(dispose)) { + while (!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); + nfsd_file_free(nf); } } -static void -nfsd_file_dispose_list_sync(struct list_head *dispose) -{ - bool flush = false; - struct nfsd_file *nf; - - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - if (!refcount_dec_and_test(&nf->nf_ref)) - continue; - if (nfsd_file_free(nf)) - flush = true; - } - if (flush) - flush_delayed_fput(); -} - static void nfsd_file_list_remove_disposal(struct list_head *dst, struct nfsd_fcache_disposal *l) @@ -569,21 +558,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, struct list_head *head = arg; struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); - /* - * Do a lockless refcount check. The hashtable holds one reference, so - * we look to see if anything else has a reference, or if any have - * been put since the shrinker last ran. Those don't get unhashed and - * released. - * - * Note that in the put path, we set the flag and then decrement the - * counter. Here we check the counter and then test and clear the flag. - * That order is deliberate to ensure that we can do this locklessly. - */ - if (refcount_read(&nf->nf_ref) > 1) { - list_lru_isolate(lru, &nf->nf_lru); - trace_nfsd_file_gc_in_use(nf); - return LRU_REMOVED; - } + /* We should only be dealing with GC entries here */ + WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); /* * Don't throw out files that are still undergoing I/O or @@ -594,40 +570,30 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, return LRU_SKIP; } + /* If it was recently added to the list, skip it */ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { trace_nfsd_file_gc_referenced(nf); return LRU_ROTATE; } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - trace_nfsd_file_gc_hashed(nf); - return LRU_SKIP; + /* + * Put the reference held on behalf of the LRU. If it wasn't the last + * one, then just remove it from the LRU and ignore it. + */ + if (!refcount_dec_and_test(&nf->nf_ref)) { + trace_nfsd_file_gc_in_use(nf); + list_lru_isolate(lru, &nf->nf_lru); + return LRU_REMOVED; } + /* Refcount went to zero. Unhash it and queue it to the dispose list */ + nfsd_file_unhash(nf); list_lru_isolate_move(lru, &nf->nf_lru, head); this_cpu_inc(nfsd_file_evictions); trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; } -/* - * Unhash items on @dispose immediately, then queue them on the - * disposal workqueue to finish releasing them in the background. - * - * cel: Note that between the time list_lru_shrink_walk runs and - * now, these items are in the hash table but marked unhashed. - * Why release these outside of lru_cb ? There's no lock ordering - * problem since lru_cb currently takes no lock. - */ -static void nfsd_file_gc_dispose_list(struct list_head *dispose) -{ - struct nfsd_file *nf; - - list_for_each_entry(nf, dispose, nf_lru) - nfsd_file_hash_remove(nf); - nfsd_file_dispose_list_delayed(dispose); -} - static void nfsd_file_gc(void) { @@ -637,7 +603,7 @@ nfsd_file_gc(void) ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &dispose, list_lru_count(&nfsd_file_lru)); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); } static void @@ -662,7 +628,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); return ret; } @@ -672,72 +638,111 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -/* - * Find all cache items across all net namespaces that match @inode and - * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). +/** + * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode + * @inode: inode on which to close out nfsd_files + * @dispose: list on which to gather nfsd_files to close out + * + * An nfsd_file represents a struct file being held open on behalf of nfsd. An + * open file however can block other activity (such as leases), or cause + * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). + * + * This function is intended to find open nfsd_files when this sort of + * conflicting access occurs and then attempt to close those files out. + * + * Populates the dispose list with entries that have already had their + * refcounts go to zero. The actual free of an nfsd_file can be expensive, + * so we leave it up to the caller whether it wants to wait or not. */ -static unsigned int -__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) +static void +nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_INODE, .inode = inode, }; - unsigned int count = 0; struct nfsd_file *nf; rcu_read_lock(); do { + int decrement = 1; + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); if (!nf) break; - nfsd_file_unhash_and_queue(nf, dispose); - count++; + + /* If we raced with someone else unhashing, ignore it */ + if (!nfsd_file_unhash(nf)) + continue; + + /* If we can't get a reference, ignore it */ + if (!nfsd_file_get(nf)) + continue; + + /* Extra decrement if we remove from the LRU */ + if (nfsd_file_lru_remove(nf)) + ++decrement; + + /* If refcount goes to 0, then put on the dispose list */ + if (refcount_sub_and_test(decrement, &nf->nf_ref)) { + list_add(&nf->nf_lru, dispose); + trace_nfsd_file_closing(nf); + } } while (1); rcu_read_unlock(); - return count; -} - -/** - * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file - * @inode: inode of the file to attempt to remove - * - * Unhash and put, then flush and fput all cache items associated with @inode. - */ -void -nfsd_file_close_inode_sync(struct inode *inode) -{ - LIST_HEAD(dispose); - unsigned int count; - - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode_sync(inode, count); - nfsd_file_dispose_list_sync(&dispose); } /** * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Unhash and put all cache item associated with @inode. + * Close out any open nfsd_files that can be reaped for @inode. The + * actual freeing is deferred to the dispose_list_delayed infrastructure. + * + * This is used by the fsnotify callbacks and setlease notifier. */ static void nfsd_file_close_inode(struct inode *inode) { LIST_HEAD(dispose); - unsigned int count; - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode(inode, count); + nfsd_file_queue_for_close(inode, &dispose); nfsd_file_dispose_list_delayed(&dispose); } +/** + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file + * @inode: inode of the file to attempt to remove + * + * Close out any open nfsd_files that can be reaped for @inode. The + * nfsd_files are closed out synchronously. + * + * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames + * when reexporting NFS. + */ +void +nfsd_file_close_inode_sync(struct inode *inode) +{ + struct nfsd_file *nf; + LIST_HEAD(dispose); + + trace_nfsd_file_close(inode); + + nfsd_file_queue_for_close(inode, &dispose); + while (!list_empty(&dispose)) { + nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); + list_del_init(&nf->nf_lru); + nfsd_file_free(nf); + } + flush_delayed_fput(); +} + /** * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Walk the LRU list and close any entries that have not been used since + * Walk the LRU list and destroy any entries that have not been used since * the last scan. */ static void @@ -759,7 +764,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, /* Only close files for F_SETLEASE leases */ if (fl->fl_flags & FL_LEASE) - nfsd_file_close_inode_sync(file_inode(fl->fl_file)); + nfsd_file_close_inode(file_inode(fl->fl_file)); return 0; } @@ -880,6 +885,13 @@ out_err: goto out; } +/** + * __nfsd_file_cache_purge: clean out the cache for shutdown + * @net: net-namespace to shut down the cache (may be NULL) + * + * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, + * then close out everything. Called when an nfsd instance is being shut down. + */ static void __nfsd_file_cache_purge(struct net *net) { @@ -893,8 +905,11 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { - if (!net || nf->nf_net == net) - nfsd_file_unhash_and_queue(nf, &dispose); + if (!net || nf->nf_net == net) { + nfsd_file_unhash(nf); + nfsd_file_lru_remove(nf); + list_add(&nf->nf_lru, &dispose); + } nf = rhashtable_walk_next(&iter); } @@ -1061,8 +1076,12 @@ retry: if (nf) nf = nfsd_file_get(nf); rcu_read_unlock(); - if (nf) + + if (nf) { + if (nfsd_file_lru_remove(nf)) + WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); goto wait_for_construction; + } nf = nfsd_file_alloc(&key, may_flags); if (!nf) { @@ -1095,11 +1114,11 @@ wait_for_construction: goto out; } open_retry = false; - nfsd_file_put_noref(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); goto retry; } - nfsd_file_lru_remove(nf); this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); @@ -1109,7 +1128,8 @@ out: this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { - nfsd_file_put(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); nf = NULL; } @@ -1135,8 +1155,10 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || key.inode->i_nlink == 0) - nfsd_file_unhash_and_put(nf); + if (status == nfs_ok && key.inode->i_nlink == 0) + status = nfserr_jukebox; + if (status != nfs_ok) + nfsd_file_unhash(nf); clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 08e2738adf8f..4feeaed32541 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -817,8 +817,8 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ - { 1 << NFSD_FILE_GC, "GC"}) + { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ + { 1 << NFSD_FILE_GC, "GC" }) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), @@ -853,6 +853,7 @@ DEFINE_EVENT(nfsd_file_class, name, \ DEFINE_NFSD_FILE_EVENT(nfsd_file_free); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); +DEFINE_NFSD_FILE_EVENT(nfsd_file_closing); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); TRACE_EVENT(nfsd_file_alloc, @@ -1044,35 +1045,6 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) -DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO( - const struct inode *inode, - unsigned int count - ), - TP_ARGS(inode, count), - TP_STRUCT__entry( - __field(const struct inode *, inode) - __field(unsigned int, count) - ), - TP_fast_assign( - __entry->inode = inode; - __entry->count = count; - ), - TP_printk("inode=%p count=%u", - __entry->inode, __entry->count) -); - -#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ -DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO( \ - const struct inode *inode, \ - unsigned int count \ - ), \ - TP_ARGS(inode, count)) - -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); - TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, @@ -1150,7 +1122,6 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, @@ -1182,6 +1153,22 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); +TRACE_EVENT(nfsd_file_close, + TP_PROTO( + const struct inode *inode + ), + TP_ARGS(inode), + TP_STRUCT__entry( + __field(const void *, inode) + ), + TP_fast_assign( + __entry->inode = inode; + ), + TP_printk("inode=%p", + __entry->inode + ) +); + TRACE_EVENT(nfsd_file_fsync, TP_PROTO( const struct nfsd_file *nf, From 973acfdfe90c8a4e58ade97ff0653a498531ff2e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jan 2023 14:55:56 -0500 Subject: [PATCH 297/543] nfsd: fix handling of cached open files in nfsd4_open codepath [ Upstream commit 0b3a551fa58b4da941efeb209b3770868e2eddd7 ] Commit fb70bf124b05 ("NFSD: Instantiate a struct file when creating a regular NFSv4 file") added the ability to cache an open fd over a compound. There are a couple of problems with the way this currently works: It's racy, as a newly-created nfsd_file can end up with its PENDING bit cleared while the nf is hashed, and the nf_file pointer is still zeroed out. Other tasks can find it in this state and they expect to see a valid nf_file, and can oops if nf_file is NULL. Also, there is no guarantee that we'll end up creating a new nfsd_file if one is already in the hash. If an extant entry is in the hash with a valid nf_file, nfs4_get_vfs_file will clobber its nf_file pointer with the value of op_file and the old nf_file will leak. Fix both issues by making a new nfsd_file_acquirei_opened variant that takes an optional file pointer. If one is present when this is called, we'll take a new reference to it instead of trying to open the file. If the nfsd_file already has a valid nf_file, we'll just ignore the optional file and pass the nfsd_file back as-is. Also rework the tracepoints a bit to allow for an "opened" variant and don't try to avoid counting acquisitions in the case where we already have a cached open file. Fixes: fb70bf124b05 ("NFSD: Instantiate a struct file when creating a regular NFSv4 file") Cc: Trond Myklebust Reported-by: Stanislav Saner Reported-and-Tested-by: Ruben Vestergaard Reported-and-Tested-by: Torkil Svensgaard Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 40 ++++++++++++++++++---------------- fs/nfsd/filecache.h | 5 +++-- fs/nfsd/nfs4state.c | 16 ++++---------- fs/nfsd/trace.h | 52 ++++++++++++--------------------------------- 4 files changed, 42 insertions(+), 71 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 9bf78506d071..ea6fb0e6b165 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1048,8 +1048,8 @@ nfsd_file_is_cached(struct inode *inode) static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf, - bool open, bool want_gc) + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf, bool want_gc) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_FULL, @@ -1124,8 +1124,7 @@ wait_for_construction: status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { - if (open) - this_cpu_inc(nfsd_file_acquisitions); + this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { if (refcount_dec_and_test(&nf->nf_ref)) @@ -1135,20 +1134,23 @@ out: out_status: put_cred(key.cred); - if (open) - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); if (nf->nf_mark) { - if (open) { + if (file) { + get_file(file); + nf->nf_file = file; + status = nfs_ok; + trace_nfsd_file_opened(nf, status); + } else { status = nfsd_open_verified(rqstp, fhp, may_flags, &nf->nf_file); trace_nfsd_file_open(nf, status); - } else - status = nfs_ok; + } } else status = nfserr_jukebox; /* @@ -1184,7 +1186,7 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); } /** @@ -1205,28 +1207,30 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); } /** - * nfsd_file_create - Get a struct nfsd_file, do not open + * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file * @rqstp: the RPC transaction being executed * @fhp: the NFS filehandle of the file just created * @may_flags: NFSD_MAY_ settings for the file + * @file: cached, already-open file (may be NULL) * @pnf: OUT: new or found "struct nfsd_file" object * - * The nfsd_file_object returned by this API is reference-counted - * but not garbage-collected. The object is released immediately - * one RCU grace period after the final nfsd_file_put(). + * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, + * and @file is non-NULL, use it to instantiate a new nfsd_file instead of + * opening a new one. * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ __be32 -nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) +nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index b7efb2c3ddb1..41516a4263ea 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **nfp); +__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **nfp); int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 16c3e991ddcc..2247d107da90 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5211,18 +5211,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); - if (!open->op_filp) { - status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - } else { - status = nfsd_file_create(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - nf->nf_file = open->op_filp; - open->op_filp = NULL; - trace_nfsd_file_create(rqstp, access, nf); - } + status = nfsd_file_acquire_opened(rqstp, cur_fh, access, + open->op_filp, &nf); + if (status != nfs_ok) + goto out_put_access; spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 4feeaed32541..4eb4e1039c7f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -922,43 +922,6 @@ TRACE_EVENT(nfsd_file_acquire, ) ); -TRACE_EVENT(nfsd_file_create, - TP_PROTO( - const struct svc_rqst *rqstp, - unsigned int may_flags, - const struct nfsd_file *nf - ), - - TP_ARGS(rqstp, may_flags, nf), - - TP_STRUCT__entry( - __field(const void *, nf_inode) - __field(const void *, nf_file) - __field(unsigned long, may_flags) - __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(unsigned int, nf_ref) - __field(u32, xid) - ), - - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_file = nf->nf_file; - __entry->may_flags = may_flags; - __entry->nf_flags = nf->nf_flags; - __entry->nf_may = nf->nf_may; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->xid = be32_to_cpu(rqstp->rq_xid); - ), - - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", - __entry->xid, __entry->nf_inode, - show_nfsd_may_flags(__entry->may_flags), - __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), __entry->nf_file - ) -); - TRACE_EVENT(nfsd_file_insert_err, TP_PROTO( const struct svc_rqst *rqstp, @@ -1020,8 +983,8 @@ TRACE_EVENT(nfsd_file_cons_err, ) ); -TRACE_EVENT(nfsd_file_open, - TP_PROTO(struct nfsd_file *nf, __be32 status), +DECLARE_EVENT_CLASS(nfsd_file_open_class, + TP_PROTO(const struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), TP_STRUCT__entry( __field(void *, nf_inode) /* cannot be dereferenced */ @@ -1045,6 +1008,17 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) +#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \ +DEFINE_EVENT(nfsd_file_open_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf, \ + __be32 status \ + ), \ + TP_ARGS(nf, status)) + +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open); +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened); + TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, From d995fadd7f09c40babda10329d90e30bec5f2356 Mon Sep 17 00:00:00 2001 From: Angela Czubak Date: Thu, 5 Jan 2023 21:31:07 +0530 Subject: [PATCH 298/543] octeontx2-af: Fix LMAC config in cgx_lmac_rx_tx_enable [ Upstream commit b4e9b8763e417db31c7088103cc557d55cb7a8f5 ] PF netdev can request AF to enable or disable reception and transmission on assigned CGX::LMAC. The current code instead of disabling or enabling 'reception and transmission' also disables/enable the LMAC. This patch fixes this issue. Fixes: 1435f66a28b4 ("octeontx2-af: CGX Rx/Tx enable/disable mbox handlers") Signed-off-by: Angela Czubak Signed-off-by: Hariprasad Kelam Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230105160107.17638-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index c8724bfa86b0..8fdd3afe5998 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -768,9 +768,9 @@ int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable) cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); if (enable) - cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN; + cfg |= DATA_PKT_RX_EN | DATA_PKT_TX_EN; else - cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN); + cfg &= ~(DATA_PKT_RX_EN | DATA_PKT_TX_EN); cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 0b06788b8d80..04338db38671 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -30,7 +30,6 @@ #define CMR_P2X_SEL_SHIFT 59ULL #define CMR_P2X_SEL_NIX0 1ULL #define CMR_P2X_SEL_NIX1 2ULL -#define CMR_EN BIT_ULL(55) #define DATA_PKT_TX_EN BIT_ULL(53) #define DATA_PKT_RX_EN BIT_ULL(54) #define CGX_LMAC_TYPE_SHIFT 40 From 92b0051217f23cba7541d6c4ecbca026477fb642 Mon Sep 17 00:00:00 2001 From: Yair Podemsky Date: Wed, 30 Nov 2022 14:51:21 +0200 Subject: [PATCH 299/543] sched/core: Fix arch_scale_freq_tick() on tickless systems [ Upstream commit 7fb3ff22ad8772bbf0e3ce1ef3eb7b09f431807f ] In order for the scheduler to be frequency invariant we measure the ratio between the maximum CPU frequency and the actual CPU frequency. During long tickless periods of time the calculations that keep track of that might overflow, in the function scale_freq_tick(): if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) goto error; eventually forcing the kernel to disable the feature for all CPUs, and show the warning message: "Scheduler frequency invariance went wobbly, disabling!". Let's avoid that by limiting the frequency invariant calculations to CPUs with regular tick. Fixes: e2b0d619b400 ("x86, sched: check for counters overflow in frequency invariant accounting") Suggested-by: "Peter Zijlstra (Intel)" Signed-off-by: Yair Podemsky Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Acked-by: Giovanni Gherdovich Link: https://lore.kernel.org/r/20221130125121.34407-1-ypodemsk@redhat.com Signed-off-by: Sasha Levin --- kernel/sched/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 981e41cc4121..172ec79b66f6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5498,7 +5498,9 @@ void scheduler_tick(void) unsigned long thermal_pressure; u64 resched_latency; - arch_scale_freq_tick(); + if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + arch_scale_freq_tick(); + sched_clock_tick(); rq_lock(rq, &rf); From 7bed0d49ca35e281db93aaa5f631591cb91dd665 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 30 Nov 2022 17:36:02 +0100 Subject: [PATCH 300/543] hvc/xen: lock console list traversal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c0dccad87cf68fc6012aec7567e354353097ec1a ] The currently lockless access to the xen console list in vtermno_to_xencons() is incorrect, as additions and removals from the list can happen anytime, and as such the traversal of the list to get the private console data for a given termno needs to happen with the lock held. Note users that modify the list already do so with the lock taken. Adjust current lock takers to use the _irq{save,restore} helpers, since the context in which vtermno_to_xencons() is called can have interrupts disabled. Use the _irq{save,restore} set of helpers to switch the current callers to disable interrupts in the locked region. I haven't checked if existing users could instead use the _irq variant, as I think it's safer to use _irq{save,restore} upfront. While there switch from using list_for_each_entry_safe to list_for_each_entry: the current entry cursor won't be removed as part of the code in the loop body, so using the _safe variant is pointless. Fixes: 02e19f9c7cac ('hvc_xen: implement multiconsole support') Signed-off-by: Roger Pau Monné Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/20221130163611.14686-1-roger.pau@citrix.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/tty/hvc/hvc_xen.c | 46 ++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 7c23112dc923..37809c6c027f 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -52,17 +52,22 @@ static DEFINE_SPINLOCK(xencons_lock); static struct xencons_info *vtermno_to_xencons(int vtermno) { - struct xencons_info *entry, *n, *ret = NULL; + struct xencons_info *entry, *ret = NULL; + unsigned long flags; - if (list_empty(&xenconsoles)) - return NULL; + spin_lock_irqsave(&xencons_lock, flags); + if (list_empty(&xenconsoles)) { + spin_unlock_irqrestore(&xencons_lock, flags); + return NULL; + } - list_for_each_entry_safe(entry, n, &xenconsoles, list) { + list_for_each_entry(entry, &xenconsoles, list) { if (entry->vtermno == vtermno) { ret = entry; break; } } + spin_unlock_irqrestore(&xencons_lock, flags); return ret; } @@ -223,7 +228,7 @@ static int xen_hvm_console_init(void) { int r; uint64_t v = 0; - unsigned long gfn; + unsigned long gfn, flags; struct xencons_info *info; if (!xen_hvm_domain()) @@ -258,9 +263,9 @@ static int xen_hvm_console_init(void) goto err; info->vtermno = HVC_COOKIE; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; err: @@ -283,6 +288,7 @@ static int xencons_info_pv_init(struct xencons_info *info, int vtermno) static int xen_pv_console_init(void) { struct xencons_info *info; + unsigned long flags; if (!xen_pv_domain()) return -ENODEV; @@ -299,9 +305,9 @@ static int xen_pv_console_init(void) /* already configured */ return 0; } - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); xencons_info_pv_init(info, HVC_COOKIE); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -309,6 +315,7 @@ static int xen_pv_console_init(void) static int xen_initial_domain_console_init(void) { struct xencons_info *info; + unsigned long flags; if (!xen_initial_domain()) return -ENODEV; @@ -323,9 +330,9 @@ static int xen_initial_domain_console_init(void) info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false); info->vtermno = HVC_COOKIE; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -380,10 +387,12 @@ static void xencons_free(struct xencons_info *info) static int xen_console_remove(struct xencons_info *info) { + unsigned long flags; + xencons_disconnect_backend(info); - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_del(&info->list); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); if (info->xbdev != NULL) xencons_free(info); else { @@ -464,6 +473,7 @@ static int xencons_probe(struct xenbus_device *dev, { int ret, devid; struct xencons_info *info; + unsigned long flags; devid = dev->nodename[strlen(dev->nodename) - 1] - '0'; if (devid == 0) @@ -482,9 +492,9 @@ static int xencons_probe(struct xenbus_device *dev, ret = xencons_connect_backend(dev, info); if (ret < 0) goto error; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; @@ -584,10 +594,12 @@ static int __init xen_hvc_init(void) info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256); if (IS_ERR(info->hvc)) { + unsigned long flags; + r = PTR_ERR(info->hvc); - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_del(&info->list); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); if (info->irq) unbind_from_irqhandler(info->irq, NULL); kfree(info); From 8998db5021a28ad67aa8d627bdb4226e4046ccc4 Mon Sep 17 00:00:00 2001 From: Minsuk Kang Date: Fri, 6 Jan 2023 17:23:44 +0900 Subject: [PATCH 301/543] nfc: pn533: Wait for out_urb's completion in pn533_usb_send_frame() [ Upstream commit 9dab880d675b9d0dd56c6428e4e8352a3339371d ] Fix a use-after-free that occurs in hcd when in_urb sent from pn533_usb_send_frame() is completed earlier than out_urb. Its callback frees the skb data in pn533_send_async_complete() that is used as a transfer buffer of out_urb. Wait before sending in_urb until the callback of out_urb is called. To modify the callback of out_urb alone, separate the complete function of out_urb and ack_urb. Found by a modified version of syzkaller. BUG: KASAN: use-after-free in dummy_timer Call Trace: memcpy (mm/kasan/shadow.c:65) dummy_perform_transfer (drivers/usb/gadget/udc/dummy_hcd.c:1352) transfer (drivers/usb/gadget/udc/dummy_hcd.c:1453) dummy_timer (drivers/usb/gadget/udc/dummy_hcd.c:1972) arch_static_branch (arch/x86/include/asm/jump_label.h:27) static_key_false (include/linux/jump_label.h:207) timer_expire_exit (include/trace/events/timer.h:127) call_timer_fn (kernel/time/timer.c:1475) expire_timers (kernel/time/timer.c:1519) __run_timers (kernel/time/timer.c:1790) run_timer_softirq (kernel/time/timer.c:1803) Fixes: c46ee38620a2 ("NFC: pn533: add NXP pn533 nfc device driver") Signed-off-by: Minsuk Kang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/pn533/usb.c | 44 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 6f71ac72012e..ed9c5e2cf3ad 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -153,10 +153,17 @@ static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags) return usb_submit_urb(phy->ack_urb, flags); } +struct pn533_out_arg { + struct pn533_usb_phy *phy; + struct completion done; +}; + static int pn533_usb_send_frame(struct pn533 *dev, struct sk_buff *out) { struct pn533_usb_phy *phy = dev->phy; + struct pn533_out_arg arg; + void *cntx; int rc; if (phy->priv == NULL) @@ -168,10 +175,17 @@ static int pn533_usb_send_frame(struct pn533 *dev, print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1, out->data, out->len, false); + init_completion(&arg.done); + cntx = phy->out_urb->context; + phy->out_urb->context = &arg; + rc = usb_submit_urb(phy->out_urb, GFP_KERNEL); if (rc) return rc; + wait_for_completion(&arg.done); + phy->out_urb->context = cntx; + if (dev->protocol_type == PN533_PROTO_REQ_RESP) { /* request for response for sent packet directly */ rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); @@ -408,7 +422,31 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) return arg.rc; } -static void pn533_send_complete(struct urb *urb) +static void pn533_out_complete(struct urb *urb) +{ + struct pn533_out_arg *arg = urb->context; + struct pn533_usb_phy *phy = arg->phy; + + switch (urb->status) { + case 0: + break; /* success */ + case -ECONNRESET: + case -ENOENT: + dev_dbg(&phy->udev->dev, + "The urb has been stopped (status %d)\n", + urb->status); + break; + case -ESHUTDOWN: + default: + nfc_err(&phy->udev->dev, + "Urb failure (status %d)\n", + urb->status); + } + + complete(&arg->done); +} + +static void pn533_ack_complete(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; @@ -496,10 +534,10 @@ static int pn533_usb_probe(struct usb_interface *interface, usb_fill_bulk_urb(phy->out_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), - NULL, 0, pn533_send_complete, phy); + NULL, 0, pn533_out_complete, phy); usb_fill_bulk_urb(phy->ack_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), - NULL, 0, pn533_send_complete, phy); + NULL, 0, pn533_ack_complete, phy); switch (id->driver_info) { case PN533_DEVICE_STD: From df9cddce8f99a47af8a8cf035ef367e9aa256fa5 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 8 Nov 2022 13:33:28 +0100 Subject: [PATCH 302/543] gro: avoid checking for a failed search [ Upstream commit e081ecf084d31809242fb0b9f35484d5fb3a161a ] After searching for a protocol handler in dev_gro_receive, checking for failure is redundant. Skip the failure code after finding the corresponding handler. Suggested-by: Eric Dumazet Signed-off-by: Richard Gobert Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221108123320.GA59373@debian Signed-off-by: Paolo Abeni Stable-dep-of: 7871f54e3dee ("gro: take care of DODGY packets") Signed-off-by: Sasha Levin --- net/core/gro.c | 72 +++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/net/core/gro.c b/net/core/gro.c index bc9451743307..8e0fe85a647d 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -489,45 +489,45 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || !ptype->callbacks.gro_receive) - continue; - - skb_set_network_header(skb, skb_gro_offset(skb)); - skb_reset_mac_len(skb); - BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32)); - BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), - sizeof(u32))); /* Avoid slow unaligned acc */ - *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; - NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb); - NAPI_GRO_CB(skb)->is_atomic = 1; - NAPI_GRO_CB(skb)->count = 1; - if (unlikely(skb_is_gso(skb))) { - NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; - /* Only support TCP at the moment. */ - if (!skb_is_gso_tcp(skb)) - NAPI_GRO_CB(skb)->flush = 1; - } - - /* Setup for GRO checksum validation */ - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - NAPI_GRO_CB(skb)->csum = skb->csum; - NAPI_GRO_CB(skb)->csum_valid = 1; - break; - case CHECKSUM_UNNECESSARY: - NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; - break; - } - - pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, - ipv6_gro_receive, inet_gro_receive, - &gro_list->list, skb); - break; + if (ptype->type == type && ptype->callbacks.gro_receive) + goto found_ptype; } rcu_read_unlock(); + goto normal; - if (&ptype->list == head) - goto normal; +found_ptype: + skb_set_network_header(skb, skb_gro_offset(skb)); + skb_reset_mac_len(skb); + BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32)); + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), + sizeof(u32))); /* Avoid slow unaligned acc */ + *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; + NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb); + NAPI_GRO_CB(skb)->is_atomic = 1; + NAPI_GRO_CB(skb)->count = 1; + if (unlikely(skb_is_gso(skb))) { + NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; + /* Only support TCP at the moment. */ + if (!skb_is_gso_tcp(skb)) + NAPI_GRO_CB(skb)->flush = 1; + } + + /* Setup for GRO checksum validation */ + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + NAPI_GRO_CB(skb)->csum = skb->csum; + NAPI_GRO_CB(skb)->csum_valid = 1; + break; + case CHECKSUM_UNNECESSARY: + NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; + break; + } + + pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, + ipv6_gro_receive, inet_gro_receive, + &gro_list->list, skb); + + rcu_read_unlock(); if (PTR_ERR(pp) == -EINPROGRESS) { ret = GRO_CONSUMED; From 0b1605e45c36968a508703e08bf0947a68732841 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Jan 2023 14:25:23 +0000 Subject: [PATCH 303/543] gro: take care of DODGY packets [ Upstream commit 7871f54e3deed68a27111dda162c4fe9b9c65f8f ] Jaroslav reported a recent throughput regression with virtio_net caused by blamed commit. It is unclear if DODGY GSO packets coming from user space can be accepted by GRO engine in the future with minimal changes, and if there is any expected gain from it. In the meantime, make sure to detect and flush DODGY packets. Fixes: 5eddb24901ee ("gro: add support of (hw)gro packets to gro stack") Signed-off-by: Eric Dumazet Reported-and-bisected-by: Jaroslav Pulchart Cc: Coco Li Cc: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/gro.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/gro.c b/net/core/gro.c index 8e0fe85a647d..1b4abfb9a7a1 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -507,8 +507,9 @@ found_ptype: NAPI_GRO_CB(skb)->count = 1; if (unlikely(skb_is_gso(skb))) { NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; - /* Only support TCP at the moment. */ - if (!skb_is_gso_tcp(skb)) + /* Only support TCP and non DODGY users. */ + if (!skb_is_gso_tcp(skb) || + (skb_shinfo(skb)->gso_type & SKB_GSO_DODGY)) NAPI_GRO_CB(skb)->flush = 1; } From 31a997c40a8aeb079607e6a0164ecbfedc62ea36 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Sat, 7 Jan 2023 04:40:20 +0100 Subject: [PATCH 304/543] af_unix: selftest: Fix the size of the parameter to connect() [ Upstream commit 7d6ceeb1875cc08dc3d1e558e191434d94840cd5 ] Adjust size parameter in connect() to match the type of the parameter, to fix "No such file or directory" error in selftests/net/af_unix/ test_oob_unix.c:127. The existing code happens to work provided that the autogenerated pathname is shorter than sizeof (struct sockaddr), which is why it hasn't been noticed earlier. Visible from the trace excerpt: bind(3, {sa_family=AF_UNIX, sun_path="unix_oob_453059"}, 110) = 0 clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fa6a6577a10) = 453060 [pid ] connect(6, {sa_family=AF_UNIX, sun_path="unix_oob_45305"}, 16) = -1 ENOENT (No such file or directory) BUG: The filename is trimmed to sizeof (struct sockaddr). Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Shuah Khan Cc: Kuniyuki Iwashima Cc: Florian Westphal Reviewed-by: Florian Westphal Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Mirsad Goran Todorovac Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/af_unix/test_unix_oob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index b57e91e1c3f2..532459a15067 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr) wait_for_signal(pipefd[0]); if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(struct sockaddr)) != 0) { + sizeof(*consumer_addr)) != 0) { perror("Connect failed"); kill(0, SIGTERM); exit(1); From c878ac66db429530a54518305e302f0675e44a98 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Sat, 31 Dec 2022 12:55:06 +0100 Subject: [PATCH 305/543] ASoC: qcom: Fix building APQ8016 machine driver without SOUNDWIRE [ Upstream commit 0cbf1ecd8c4801ec7566231491f7ad9cec31098b ] Older Qualcomm platforms like APQ8016 do not have hardware support for SoundWire, so kernel configurations made specifically for those platforms will usually not have CONFIG_SOUNDWIRE enabled. Unfortunately commit 8d89cf6ff229 ("ASoC: qcom: cleanup and fix dependency of QCOM_COMMON") breaks those kernel configurations, because SOUNDWIRE is now a required dependency for SND_SOC_QCOM_COMMON (and in turn also SND_SOC_APQ8016_SBC). Trying to migrate such a kernel config silently disables SND_SOC_APQ8016_SBC and breaks audio functionality. The soundwire helpers in common.c are only used by two of the Qualcomm audio machine drivers, so building and requiring CONFIG_SOUNDWIRE for all platforms is unnecessary. There is no need to stuff all common code into a single module. Fix the issue by moving the soundwire helpers to a separate SND_SOC_QCOM_SDW module/option that is selected only by the machine drivers that make use of them. This also allows reverting the imply/depends changes from the previous fix because both SM8250 and SC8280XP already depend on SOUNDWIRE, so the soundwire helpers will be only built if SOUNDWIRE is really enabled. Cc: Srinivas Kandagatla Fixes: 8d89cf6ff229 ("ASoC: qcom: cleanup and fix dependency of QCOM_COMMON") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20221231115506.82991-1-stephan@gerhold.net Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/qcom/Kconfig | 21 ++++--- sound/soc/qcom/Makefile | 2 + sound/soc/qcom/common.c | 114 ----------------------------------- sound/soc/qcom/common.h | 10 ---- sound/soc/qcom/sc8280xp.c | 1 + sound/soc/qcom/sdw.c | 123 ++++++++++++++++++++++++++++++++++++++ sound/soc/qcom/sdw.h | 18 ++++++ sound/soc/qcom/sm8250.c | 1 + 8 files changed, 157 insertions(+), 133 deletions(-) create mode 100644 sound/soc/qcom/sdw.c create mode 100644 sound/soc/qcom/sdw.h diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index 96a6d4731e6f..e7b00d1d9e99 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -2,7 +2,6 @@ menuconfig SND_SOC_QCOM tristate "ASoC support for QCOM platforms" depends on ARCH_QCOM || COMPILE_TEST - imply SND_SOC_QCOM_COMMON help Say Y or M if you want to add support to use audio devices in Qualcomm Technologies SOC-based platforms. @@ -60,14 +59,16 @@ config SND_SOC_STORM config SND_SOC_APQ8016_SBC tristate "SoC Audio support for APQ8016 SBC platforms" select SND_SOC_LPASS_APQ8016 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON help Support for Qualcomm Technologies LPASS audio block in APQ8016 SOC-based systems. Say Y if you want to use audio devices on MI2S. config SND_SOC_QCOM_COMMON - depends on SOUNDWIRE + tristate + +config SND_SOC_QCOM_SDW tristate config SND_SOC_QDSP6_COMMON @@ -144,7 +145,7 @@ config SND_SOC_MSM8996 depends on QCOM_APR depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON help Support for Qualcomm Technologies LPASS audio block in APQ8096 SoC-based systems. @@ -155,7 +156,7 @@ config SND_SOC_SDM845 depends on QCOM_APR && I2C && SOUNDWIRE depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON select SND_SOC_RT5663 select SND_SOC_MAX98927 imply SND_SOC_CROS_EC_CODEC @@ -169,7 +170,8 @@ config SND_SOC_SM8250 depends on QCOM_APR && SOUNDWIRE depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_SDW help To add support for audio on Qualcomm Technologies Inc. SM8250 SoC-based systems. @@ -180,7 +182,8 @@ config SND_SOC_SC8280XP depends on QCOM_APR && SOUNDWIRE depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_SDW help To add support for audio on Qualcomm Technologies Inc. SC8280XP SoC-based systems. @@ -190,7 +193,7 @@ config SND_SOC_SC7180 tristate "SoC Machine driver for SC7180 boards" depends on I2C && GPIOLIB depends on SOUNDWIRE || SOUNDWIRE=n - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON select SND_SOC_LPASS_SC7180 select SND_SOC_MAX98357A select SND_SOC_RT5682_I2C @@ -204,7 +207,7 @@ config SND_SOC_SC7180 config SND_SOC_SC7280 tristate "SoC Machine driver for SC7280 boards" depends on I2C && SOUNDWIRE - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON select SND_SOC_LPASS_SC7280 select SND_SOC_MAX98357A select SND_SOC_WCD938X_SDW diff --git a/sound/soc/qcom/Makefile b/sound/soc/qcom/Makefile index 8b97172cf990..254350d9dc06 100644 --- a/sound/soc/qcom/Makefile +++ b/sound/soc/qcom/Makefile @@ -28,6 +28,7 @@ snd-soc-sdm845-objs := sdm845.o snd-soc-sm8250-objs := sm8250.o snd-soc-sc8280xp-objs := sc8280xp.o snd-soc-qcom-common-objs := common.o +snd-soc-qcom-sdw-objs := sdw.o obj-$(CONFIG_SND_SOC_STORM) += snd-soc-storm.o obj-$(CONFIG_SND_SOC_APQ8016_SBC) += snd-soc-apq8016-sbc.o @@ -38,6 +39,7 @@ obj-$(CONFIG_SND_SOC_SC8280XP) += snd-soc-sc8280xp.o obj-$(CONFIG_SND_SOC_SDM845) += snd-soc-sdm845.o obj-$(CONFIG_SND_SOC_SM8250) += snd-soc-sm8250.o obj-$(CONFIG_SND_SOC_QCOM_COMMON) += snd-soc-qcom-common.o +obj-$(CONFIG_SND_SOC_QCOM_SDW) += snd-soc-qcom-sdw.o #DSP lib obj-$(CONFIG_SND_SOC_QDSP6) += qdsp6/ diff --git a/sound/soc/qcom/common.c b/sound/soc/qcom/common.c index 49c74c1662a3..96fe80241fb4 100644 --- a/sound/soc/qcom/common.c +++ b/sound/soc/qcom/common.c @@ -180,120 +180,6 @@ err_put_np: } EXPORT_SYMBOL_GPL(qcom_snd_parse_of); -int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *sruntime, - bool *stream_prepared) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - int ret; - - if (!sruntime) - return 0; - - switch (cpu_dai->id) { - case WSA_CODEC_DMA_RX_0: - case WSA_CODEC_DMA_RX_1: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: - break; - default: - return 0; - } - - if (*stream_prepared) { - sdw_disable_stream(sruntime); - sdw_deprepare_stream(sruntime); - *stream_prepared = false; - } - - ret = sdw_prepare_stream(sruntime); - if (ret) - return ret; - - /** - * NOTE: there is a strict hw requirement about the ordering of port - * enables and actual WSA881x PA enable. PA enable should only happen - * after soundwire ports are enabled if not DC on the line is - * accumulated resulting in Click/Pop Noise - * PA enable/mute are handled as part of codec DAPM and digital mute. - */ - - ret = sdw_enable_stream(sruntime); - if (ret) { - sdw_deprepare_stream(sruntime); - return ret; - } - *stream_prepared = true; - - return ret; -} -EXPORT_SYMBOL_GPL(qcom_snd_sdw_prepare); - -int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, - struct sdw_stream_runtime **psruntime) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *codec_dai; - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - struct sdw_stream_runtime *sruntime; - int i; - - switch (cpu_dai->id) { - case WSA_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: - for_each_rtd_codec_dais(rtd, i, codec_dai) { - sruntime = snd_soc_dai_get_stream(codec_dai, substream->stream); - if (sruntime != ERR_PTR(-ENOTSUPP)) - *psruntime = sruntime; - } - break; - } - - return 0; - -} -EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_params); - -int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *sruntime, bool *stream_prepared) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - - switch (cpu_dai->id) { - case WSA_CODEC_DMA_RX_0: - case WSA_CODEC_DMA_RX_1: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: - if (sruntime && *stream_prepared) { - sdw_disable_stream(sruntime); - sdw_deprepare_stream(sruntime); - *stream_prepared = false; - } - break; - default: - break; - } - - return 0; -} -EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free); - int qcom_snd_wcd_jack_setup(struct snd_soc_pcm_runtime *rtd, struct snd_soc_jack *jack, bool *jack_setup) { diff --git a/sound/soc/qcom/common.h b/sound/soc/qcom/common.h index 3ef5bb6d12df..d7f80ee5ae26 100644 --- a/sound/soc/qcom/common.h +++ b/sound/soc/qcom/common.h @@ -5,19 +5,9 @@ #define __QCOM_SND_COMMON_H__ #include -#include int qcom_snd_parse_of(struct snd_soc_card *card); int qcom_snd_wcd_jack_setup(struct snd_soc_pcm_runtime *rtd, struct snd_soc_jack *jack, bool *jack_setup); -int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *runtime, - bool *stream_prepared); -int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, - struct sdw_stream_runtime **psruntime); -int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *sruntime, - bool *stream_prepared); #endif diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c index ade44ad7c585..14d9fea33d16 100644 --- a/sound/soc/qcom/sc8280xp.c +++ b/sound/soc/qcom/sc8280xp.c @@ -12,6 +12,7 @@ #include #include "qdsp6/q6afe.h" #include "common.h" +#include "sdw.h" #define DRIVER_NAME "sc8280xp" diff --git a/sound/soc/qcom/sdw.c b/sound/soc/qcom/sdw.c new file mode 100644 index 000000000000..10249519a39e --- /dev/null +++ b/sound/soc/qcom/sdw.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018, Linaro Limited. +// Copyright (c) 2018, The Linux Foundation. All rights reserved. + +#include +#include +#include "qdsp6/q6afe.h" +#include "sdw.h" + +int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *sruntime, + bool *stream_prepared) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + int ret; + + if (!sruntime) + return 0; + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case WSA_CODEC_DMA_RX_1: + case RX_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_1: + case TX_CODEC_DMA_TX_0: + case TX_CODEC_DMA_TX_1: + case TX_CODEC_DMA_TX_2: + case TX_CODEC_DMA_TX_3: + break; + default: + return 0; + } + + if (*stream_prepared) { + sdw_disable_stream(sruntime); + sdw_deprepare_stream(sruntime); + *stream_prepared = false; + } + + ret = sdw_prepare_stream(sruntime); + if (ret) + return ret; + + /** + * NOTE: there is a strict hw requirement about the ordering of port + * enables and actual WSA881x PA enable. PA enable should only happen + * after soundwire ports are enabled if not DC on the line is + * accumulated resulting in Click/Pop Noise + * PA enable/mute are handled as part of codec DAPM and digital mute. + */ + + ret = sdw_enable_stream(sruntime); + if (ret) { + sdw_deprepare_stream(sruntime); + return ret; + } + *stream_prepared = true; + + return ret; +} +EXPORT_SYMBOL_GPL(qcom_snd_sdw_prepare); + +int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct sdw_stream_runtime **psruntime) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *codec_dai; + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + struct sdw_stream_runtime *sruntime; + int i; + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_1: + case TX_CODEC_DMA_TX_0: + case TX_CODEC_DMA_TX_1: + case TX_CODEC_DMA_TX_2: + case TX_CODEC_DMA_TX_3: + for_each_rtd_codec_dais(rtd, i, codec_dai) { + sruntime = snd_soc_dai_get_stream(codec_dai, substream->stream); + if (sruntime != ERR_PTR(-ENOTSUPP)) + *psruntime = sruntime; + } + break; + } + + return 0; + +} +EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_params); + +int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *sruntime, bool *stream_prepared) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case WSA_CODEC_DMA_RX_1: + case RX_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_1: + case TX_CODEC_DMA_TX_0: + case TX_CODEC_DMA_TX_1: + case TX_CODEC_DMA_TX_2: + case TX_CODEC_DMA_TX_3: + if (sruntime && *stream_prepared) { + sdw_disable_stream(sruntime); + sdw_deprepare_stream(sruntime); + *stream_prepared = false; + } + break; + default: + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/qcom/sdw.h b/sound/soc/qcom/sdw.h new file mode 100644 index 000000000000..d74cbb84da13 --- /dev/null +++ b/sound/soc/qcom/sdw.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2018, The Linux Foundation. All rights reserved. + +#ifndef __QCOM_SND_SDW_H__ +#define __QCOM_SND_SDW_H__ + +#include + +int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *runtime, + bool *stream_prepared); +int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct sdw_stream_runtime **psruntime); +int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *sruntime, + bool *stream_prepared); +#endif diff --git a/sound/soc/qcom/sm8250.c b/sound/soc/qcom/sm8250.c index 8dbe9ef41b1c..9626a9ef78c2 100644 --- a/sound/soc/qcom/sm8250.c +++ b/sound/soc/qcom/sm8250.c @@ -12,6 +12,7 @@ #include #include "qdsp6/q6afe.h" #include "common.h" +#include "sdw.h" #define DRIVER_NAME "sm8250" #define MI2S_BCLK_RATE 1536000 From 91fa2bd352807bc0be4ae707321c7e7e01c7f040 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Jan 2023 08:54:39 +0100 Subject: [PATCH 306/543] tools/nolibc: restore mips branch ordering in the _start block [ Upstream commit 184177c3d6e023da934761e198c281344d7dd65b ] Depending on the compiler used and the optimization options, the sbrk() test was crashing, both on real hardware (mips-24kc) and in qemu. One such example is kernel.org toolchain in version 11.3 optimizing at -Os. Inspecting the sys_brk() call shows the following code: 0040047c : 40047c: 24020fcd li v0,4045 400480: 27bdffe0 addiu sp,sp,-32 400484: 0000000c syscall 400488: 27bd0020 addiu sp,sp,32 40048c: 10e00001 beqz a3,400494 400490: 00021023 negu v0,v0 400494: 03e00008 jr ra It is obviously wrong, the "negu" instruction is placed in beqz's delayed slot, and worse, there's no nop nor instruction after the return, so the next function's first instruction (addiu sip,sip,-32) will also be executed as part of the delayed slot that follows the return. This is caused by the ".set noreorder" directive in the _start block, that applies to the whole program. The compiler emits code without the delayed slots and relies on the compiler to swap instructions when this option is not set. Removing the option would require to change the startup code in a way that wouldn't make it look like the resulting code, which would not be easy to debug. Instead let's just save the default ordering before changing it, and restore it at the end of the _start block. Now the code is correct: 0040047c : 40047c: 24020fcd li v0,4045 400480: 27bdffe0 addiu sp,sp,-32 400484: 0000000c syscall 400488: 10e00002 beqz a3,400494 40048c: 27bd0020 addiu sp,sp,32 400490: 00021023 negu v0,v0 400494: 03e00008 jr ra 400498: 00000000 nop Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc") #5.0 Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- tools/include/nolibc/arch-mips.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 5fc5b8029bff..7380093ba9e7 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -192,6 +192,7 @@ struct sys_stat_struct { __asm__ (".section .text\n" ".weak __start\n" ".set nomips16\n" + ".set push\n" ".set noreorder\n" ".option pic0\n" ".ent __start\n" @@ -210,6 +211,7 @@ __asm__ (".section .text\n" "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" + ".set pop\n" ""); #endif // _NOLIBC_ARCH_MIPS_H From cd57d5e5e28e8b51fea428ebc7780aecb46abd02 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Jan 2023 08:54:42 +0100 Subject: [PATCH 307/543] tools/nolibc: fix the O_* fcntl/open macro definitions for riscv [ Upstream commit 00b18da4089330196906b9fe075c581c17eb726c ] When RISCV port was imported in 5.2, the O_* macros were taken with their octal value and written as-is in hex, resulting in the getdents64() to fail in nolibc-test. Fixes: 582e84f7b779 ("tool headers nolibc: add RISCV support") #5.2 Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- tools/include/nolibc/arch-riscv.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index ba04771cb3a3..a3bdd9803f8c 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -11,13 +11,13 @@ #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 -#define O_CREAT 0x100 -#define O_EXCL 0x200 -#define O_NOCTTY 0x400 -#define O_TRUNC 0x1000 -#define O_APPEND 0x2000 -#define O_NONBLOCK 0x4000 -#define O_DIRECTORY 0x200000 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 struct sys_stat_struct { unsigned long st_dev; /* Device. */ From f2faf0699af78968a27ca154bf76e94247f8c471 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Wed, 4 Jan 2023 17:09:02 -0500 Subject: [PATCH 308/543] drm/amdgpu: Fix potential NULL dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0be7ed8e7eb15282b5d0f6fdfea884db594ea9bf ] Fix potential NULL dereference, in the case when "man", the resource manager might be NULL, when/if we print debug information. Cc: Alex Deucher Cc: Christian König Cc: AMD Graphics Cc: Dan Carpenter Cc: kernel test robot Fixes: 7554886daa31ea ("drm/amdgpu: Fix size validation for non-exclusive domains (v4)") Signed-off-by: Luben Tuikov Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 3be3cba3a16d..cfd78c4a45ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -468,8 +468,9 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, return true; fail: - DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size); + if (man) + DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, + man->size); return false; } From 500ca1da9d0876244eb4d1b0ece6fa0e9968d45d Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 7 Dec 2022 08:55:02 +0000 Subject: [PATCH 309/543] ice: Fix potential memory leak in ice_gnss_tty_write() [ Upstream commit f58985620f55580a07d40062c4115d8c9cf6ae27 ] The ice_gnss_tty_write() return directly if the write_buf alloc failed, leaking the cmd_buf. Fix by free cmd_buf if write_buf alloc failed. Fixes: d6b98c8d242a ("ice: add write functionality for GNSS TTY") Signed-off-by: Yuan Can Reviewed-by: Leon Romanovsky Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_gnss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index b5a7f246d230..a1915551c69a 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -363,6 +363,7 @@ ice_gnss_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) /* Send the data out to a hardware port */ write_buf = kzalloc(sizeof(*write_buf), GFP_KERNEL); if (!write_buf) { + kfree(cmd_buf); err = -ENOMEM; goto exit; } From 96a9873188552ebb2afe76033d7329a5ecabef6e Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 8 Dec 2022 21:35:52 +0800 Subject: [PATCH 310/543] ice: Add check for kzalloc [ Upstream commit 40543b3d9d2c13227ecd3aa90a713c201d1d7f09 ] Add the check for the return value of kzalloc in order to avoid NULL pointer dereference. Moreover, use the goto-label to share the clean code. Fixes: d6b98c8d242a ("ice: add write functionality for GNSS TTY") Signed-off-by: Jiasheng Jiang Reviewed-by: Jiri Pirko Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_gnss.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index a1915551c69a..43e199b5b513 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -461,6 +461,9 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) { pf->gnss_tty_port[i] = kzalloc(sizeof(*pf->gnss_tty_port[i]), GFP_KERNEL); + if (!pf->gnss_tty_port[i]) + goto err_out; + pf->gnss_serial[i] = NULL; tty_port_init(pf->gnss_tty_port[i]); @@ -470,21 +473,23 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) err = tty_register_driver(tty_driver); if (err) { dev_err(dev, "Failed to register TTY driver err=%d\n", err); - - for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) { - tty_port_destroy(pf->gnss_tty_port[i]); - kfree(pf->gnss_tty_port[i]); - } - kfree(ttydrv_name); - tty_driver_kref_put(pf->ice_gnss_tty_driver); - - return NULL; + goto err_out; } for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) dev_info(dev, "%s%d registered\n", ttydrv_name, i); return tty_driver; + +err_out: + while (i--) { + tty_port_destroy(pf->gnss_tty_port[i]); + kfree(pf->gnss_tty_port[i]); + } + kfree(ttydrv_name); + tty_driver_kref_put(pf->ice_gnss_tty_driver); + + return NULL; } /** From 2f3313c5554469374c2aee9904e803703c3d5fe5 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 22 Oct 2022 00:02:20 -0400 Subject: [PATCH 311/543] drm/vmwgfx: Write the driver id registers [ Upstream commit 7f4c33778686cc2d34cb4ef65b4265eea874c159 ] Driver id registers are a new mechanism in the svga device to hint to the device which driver is running. This should not change device behavior in any way, but might be convenient to work-around specific bugs in guest drivers. Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-2-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 43 +++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index d7bd5eb1d3ac..45028e25d490 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -25,10 +25,13 @@ * **************************************************************************/ -#include -#include -#include -#include + +#include "vmwgfx_drv.h" + +#include "vmwgfx_devcaps.h" +#include "vmwgfx_mksstat.h" +#include "vmwgfx_binding.h" +#include "ttm_object.h" #include #include @@ -41,11 +44,11 @@ #include #include -#include "ttm_object.h" -#include "vmwgfx_binding.h" -#include "vmwgfx_devcaps.h" -#include "vmwgfx_drv.h" -#include "vmwgfx_mksstat.h" +#include +#include +#include +#include +#include #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices" @@ -806,6 +809,27 @@ static int vmw_detect_version(struct vmw_private *dev) return 0; } +static void vmw_write_driver_id(struct vmw_private *dev) +{ + if ((dev->capabilities2 & SVGA_CAP2_DX2) != 0) { + vmw_write(dev, SVGA_REG_GUEST_DRIVER_ID, + SVGA_REG_GUEST_DRIVER_ID_LINUX); + + vmw_write(dev, SVGA_REG_GUEST_DRIVER_VERSION1, + LINUX_VERSION_MAJOR << 24 | + LINUX_VERSION_PATCHLEVEL << 16 | + LINUX_VERSION_SUBLEVEL); + vmw_write(dev, SVGA_REG_GUEST_DRIVER_VERSION2, + VMWGFX_DRIVER_MAJOR << 24 | + VMWGFX_DRIVER_MINOR << 16 | + VMWGFX_DRIVER_PATCHLEVEL); + vmw_write(dev, SVGA_REG_GUEST_DRIVER_VERSION3, 0); + + vmw_write(dev, SVGA_REG_GUEST_DRIVER_ID, + SVGA_REG_GUEST_DRIVER_ID_SUBMIT); + } +} + static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) { int ret; @@ -1091,6 +1115,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) vmw_host_printf("vmwgfx: Module Version: %d.%d.%d (kernel: %s)", VMWGFX_DRIVER_MAJOR, VMWGFX_DRIVER_MINOR, VMWGFX_DRIVER_PATCHLEVEL, UTS_RELEASE); + vmw_write_driver_id(dev_priv); if (dev_priv->enable_fb) { vmw_fifo_resource_inc(dev_priv); From 8557e0e42e02c2066588e626957345530923cedf Mon Sep 17 00:00:00 2001 From: Maaz Mombasawala Date: Sat, 22 Oct 2022 00:02:22 -0400 Subject: [PATCH 312/543] drm/vmwgfx: Refactor resource manager's hashtable to use linux/hashtable implementation. [ Upstream commit 43531dc661b7fb6be249c023bf25847b38215545 ] Vmwgfx's hashtab implementation needs to be replaced with linux/hashtable to reduce maintenance burden. Refactor cmdbuf resource manager to use linux/hashtable.h implementation as part of this effort. Signed-off-by: Maaz Mombasawala Reviewed-by: Zack Rusin Reviewed-by: Martin Krastev Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-4-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c | 62 +++++++++------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 82ef58ccdd42..142aef686fcd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2014-2015 VMware, Inc., Palo Alto, CA., USA + * Copyright 2014-2022 VMware, Inc., Palo Alto, CA., USA * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -28,6 +28,8 @@ #include "vmwgfx_drv.h" #include "vmwgfx_resource_priv.h" +#include + #define VMW_CMDBUF_RES_MAN_HT_ORDER 12 /** @@ -59,7 +61,7 @@ struct vmw_cmdbuf_res { * @resources and @list are protected by the cmdbuf mutex for now. */ struct vmw_cmdbuf_res_manager { - struct vmwgfx_open_hash resources; + DECLARE_HASHTABLE(resources, VMW_CMDBUF_RES_MAN_HT_ORDER); struct list_head list; struct vmw_private *dev_priv; }; @@ -82,14 +84,13 @@ vmw_cmdbuf_res_lookup(struct vmw_cmdbuf_res_manager *man, u32 user_key) { struct vmwgfx_hash_item *hash; - int ret; unsigned long key = user_key | (res_type << 24); - ret = vmwgfx_ht_find_item(&man->resources, key, &hash); - if (unlikely(ret != 0)) - return ERR_PTR(ret); - - return drm_hash_entry(hash, struct vmw_cmdbuf_res, hash)->res; + hash_for_each_possible_rcu(man->resources, hash, head, key) { + if (hash->key == key) + return drm_hash_entry(hash, struct vmw_cmdbuf_res, hash)->res; + } + return ERR_PTR(-EINVAL); } /** @@ -105,7 +106,7 @@ static void vmw_cmdbuf_res_free(struct vmw_cmdbuf_res_manager *man, struct vmw_cmdbuf_res *entry) { list_del(&entry->head); - WARN_ON(vmwgfx_ht_remove_item(&man->resources, &entry->hash)); + hash_del_rcu(&entry->hash.head); vmw_resource_unreference(&entry->res); kfree(entry); } @@ -159,7 +160,6 @@ void vmw_cmdbuf_res_commit(struct list_head *list) void vmw_cmdbuf_res_revert(struct list_head *list) { struct vmw_cmdbuf_res *entry, *next; - int ret; list_for_each_entry_safe(entry, next, list, head) { switch (entry->state) { @@ -167,8 +167,8 @@ void vmw_cmdbuf_res_revert(struct list_head *list) vmw_cmdbuf_res_free(entry->man, entry); break; case VMW_CMDBUF_RES_DEL: - ret = vmwgfx_ht_insert_item(&entry->man->resources, &entry->hash); - BUG_ON(ret); + hash_add_rcu(entry->man->resources, &entry->hash.head, + entry->hash.key); list_move_tail(&entry->head, &entry->man->list); entry->state = VMW_CMDBUF_RES_COMMITTED; break; @@ -199,26 +199,20 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man, struct list_head *list) { struct vmw_cmdbuf_res *cres; - int ret; cres = kzalloc(sizeof(*cres), GFP_KERNEL); if (unlikely(!cres)) return -ENOMEM; cres->hash.key = user_key | (res_type << 24); - ret = vmwgfx_ht_insert_item(&man->resources, &cres->hash); - if (unlikely(ret != 0)) { - kfree(cres); - goto out_invalid_key; - } + hash_add_rcu(man->resources, &cres->hash.head, cres->hash.key); cres->state = VMW_CMDBUF_RES_ADD; cres->res = vmw_resource_reference(res); cres->man = man; list_add_tail(&cres->head, list); -out_invalid_key: - return ret; + return 0; } /** @@ -243,24 +237,26 @@ int vmw_cmdbuf_res_remove(struct vmw_cmdbuf_res_manager *man, struct list_head *list, struct vmw_resource **res_p) { - struct vmw_cmdbuf_res *entry; + struct vmw_cmdbuf_res *entry = NULL; struct vmwgfx_hash_item *hash; - int ret; + unsigned long key = user_key | (res_type << 24); - ret = vmwgfx_ht_find_item(&man->resources, user_key | (res_type << 24), - &hash); - if (likely(ret != 0)) + hash_for_each_possible_rcu(man->resources, hash, head, key) { + if (hash->key == key) { + entry = drm_hash_entry(hash, struct vmw_cmdbuf_res, hash); + break; + } + } + if (unlikely(!entry)) return -EINVAL; - entry = drm_hash_entry(hash, struct vmw_cmdbuf_res, hash); - switch (entry->state) { case VMW_CMDBUF_RES_ADD: vmw_cmdbuf_res_free(man, entry); *res_p = NULL; break; case VMW_CMDBUF_RES_COMMITTED: - (void) vmwgfx_ht_remove_item(&man->resources, &entry->hash); + hash_del_rcu(&entry->hash.head); list_del(&entry->head); entry->state = VMW_CMDBUF_RES_DEL; list_add_tail(&entry->head, list); @@ -287,7 +283,6 @@ struct vmw_cmdbuf_res_manager * vmw_cmdbuf_res_man_create(struct vmw_private *dev_priv) { struct vmw_cmdbuf_res_manager *man; - int ret; man = kzalloc(sizeof(*man), GFP_KERNEL); if (!man) @@ -295,12 +290,8 @@ vmw_cmdbuf_res_man_create(struct vmw_private *dev_priv) man->dev_priv = dev_priv; INIT_LIST_HEAD(&man->list); - ret = vmwgfx_ht_create(&man->resources, VMW_CMDBUF_RES_MAN_HT_ORDER); - if (ret == 0) - return man; - - kfree(man); - return ERR_PTR(ret); + hash_init(man->resources); + return man; } /** @@ -320,7 +311,6 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man) list_for_each_entry_safe(entry, next, &man->list, head) vmw_cmdbuf_res_free(man, entry); - vmwgfx_ht_remove(&man->resources); kfree(man); } From c00e42f1c9f1a8a38a4a909378fc1479457e9f66 Mon Sep 17 00:00:00 2001 From: Maaz Mombasawala Date: Sat, 22 Oct 2022 00:02:23 -0400 Subject: [PATCH 313/543] drm/vmwgfx: Remove ttm object hashtable [ Upstream commit 931e09d8d5b4aa19bdae0234f2727049f1cd13d9 ] The object_hash hashtable for ttm objects is not being used. Remove it and perform refactoring in ttm_object init function. Signed-off-by: Maaz Mombasawala Reviewed-by: Zack Rusin Reviewed-by: Martin Krastev Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-5-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/ttm_object.c | 24 ++++++------------------ drivers/gpu/drm/vmwgfx/ttm_object.h | 6 ++---- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- 3 files changed, 9 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 26a55fef1ab5..9546b121bc22 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright (c) 2009-2013 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2022 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -44,13 +44,14 @@ #define pr_fmt(fmt) "[TTM] " fmt +#include "ttm_object.h" +#include "vmwgfx_drv.h" + #include #include #include #include #include -#include "ttm_object.h" -#include "vmwgfx_drv.h" MODULE_IMPORT_NS(DMA_BUF); @@ -81,9 +82,7 @@ struct ttm_object_file { /* * struct ttm_object_device * - * @object_lock: lock that protects the object_hash hash table. - * - * @object_hash: hash table for fast lookup of object global names. + * @object_lock: lock that protects idr. * * @object_count: Per device object count. * @@ -92,7 +91,6 @@ struct ttm_object_file { struct ttm_object_device { spinlock_t object_lock; - struct vmwgfx_open_hash object_hash; atomic_t object_count; struct dma_buf_ops ops; void (*dmabuf_release)(struct dma_buf *dma_buf); @@ -449,20 +447,15 @@ out_err: } struct ttm_object_device * -ttm_object_device_init(unsigned int hash_order, - const struct dma_buf_ops *ops) +ttm_object_device_init(const struct dma_buf_ops *ops) { struct ttm_object_device *tdev = kmalloc(sizeof(*tdev), GFP_KERNEL); - int ret; if (unlikely(tdev == NULL)) return NULL; spin_lock_init(&tdev->object_lock); atomic_set(&tdev->object_count, 0); - ret = vmwgfx_ht_create(&tdev->object_hash, hash_order); - if (ret != 0) - goto out_no_object_hash; /* * Our base is at VMWGFX_NUM_MOB + 1 because we want to create @@ -477,10 +470,6 @@ ttm_object_device_init(unsigned int hash_order, tdev->dmabuf_release = tdev->ops.release; tdev->ops.release = ttm_prime_dmabuf_release; return tdev; - -out_no_object_hash: - kfree(tdev); - return NULL; } void ttm_object_device_release(struct ttm_object_device **p_tdev) @@ -491,7 +480,6 @@ void ttm_object_device_release(struct ttm_object_device **p_tdev) WARN_ON_ONCE(!idr_is_empty(&tdev->idr)); idr_destroy(&tdev->idr); - vmwgfx_ht_remove(&tdev->object_hash); kfree(tdev); } diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index 1a2fa0f83f5f..6870f951b677 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2006-2022 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -262,7 +262,6 @@ extern void ttm_object_file_release(struct ttm_object_file **p_tfile); /** * ttm_object device init - initialize a struct ttm_object_device * - * @hash_order: Order of hash table used to hash the base objects. * @ops: DMA buf ops for prime objects of this device. * * This function is typically called on device initialization to prepare @@ -270,8 +269,7 @@ extern void ttm_object_file_release(struct ttm_object_file **p_tfile); */ extern struct ttm_object_device * -ttm_object_device_init(unsigned int hash_order, - const struct dma_buf_ops *ops); +ttm_object_device_init(const struct dma_buf_ops *ops); /** * ttm_object_device_release - release data held by a ttm_object_device diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 45028e25d490..13b90273eb77 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -994,7 +994,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) goto out_err0; } - dev_priv->tdev = ttm_object_device_init(12, &vmw_prime_dmabuf_ops); + dev_priv->tdev = ttm_object_device_init(&vmw_prime_dmabuf_ops); if (unlikely(dev_priv->tdev == NULL)) { drm_err(&dev_priv->drm, From e98d62090b6d6326196e444375c1e2104d2e583f Mon Sep 17 00:00:00 2001 From: Maaz Mombasawala Date: Sat, 22 Oct 2022 00:02:24 -0400 Subject: [PATCH 314/543] drm/vmwgfx: Refactor resource validation hashtable to use linux/hashtable implementation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9e931f2e09701e25744f3d186a4ba13b5342b136 ] Vmwgfx's hashtab implementation needs to be replaced with linux/hashtable to reduce maintenence burden. As part of this effort, refactor the res_ht hashtable used for resource validation during execbuf execution to use linux/hashtable implementation. This also refactors vmw_validation_context to use vmw_sw_context as the container for the hashtable, whereas before it used a vmwgfx_open_hash directly. This makes vmw_validation_context less generic, but there is no functional change since res_ht is the only instance where validation context used a hashtable in vmwgfx driver. Signed-off-by: Maaz Mombasawala Reviewed-by: Thomas Hellström Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-6-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 24 ++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 5 +- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 14 ++---- drivers/gpu/drm/vmwgfx/vmwgfx_validation.c | 55 +++++++++++----------- drivers/gpu/drm/vmwgfx/vmwgfx_validation.h | 26 +++------- 5 files changed, 58 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 13b90273eb77..8d77e79bd904 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -830,6 +830,22 @@ static void vmw_write_driver_id(struct vmw_private *dev) } } +static void vmw_sw_context_init(struct vmw_private *dev_priv) +{ + struct vmw_sw_context *sw_context = &dev_priv->ctx; + + hash_init(sw_context->res_ht); +} + +static void vmw_sw_context_fini(struct vmw_private *dev_priv) +{ + struct vmw_sw_context *sw_context = &dev_priv->ctx; + + vfree(sw_context->cmd_bounce); + if (sw_context->staged_bindings) + vmw_binding_state_free(sw_context->staged_bindings); +} + static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) { int ret; @@ -839,6 +855,8 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) dev_priv->drm.dev_private = dev_priv; + vmw_sw_context_init(dev_priv); + mutex_init(&dev_priv->cmdbuf_mutex); mutex_init(&dev_priv->binding_mutex); spin_lock_init(&dev_priv->resource_lock); @@ -1168,9 +1186,7 @@ static void vmw_driver_unload(struct drm_device *dev) unregister_pm_notifier(&dev_priv->pm_nb); - if (dev_priv->ctx.res_ht_initialized) - vmwgfx_ht_remove(&dev_priv->ctx.res_ht); - vfree(dev_priv->ctx.cmd_bounce); + vmw_sw_context_fini(dev_priv); if (dev_priv->enable_fb) { vmw_fb_off(dev_priv); vmw_fb_close(dev_priv); @@ -1198,8 +1214,6 @@ static void vmw_driver_unload(struct drm_device *dev) vmw_irq_uninstall(&dev_priv->drm); ttm_object_device_release(&dev_priv->tdev); - if (dev_priv->ctx.staged_bindings) - vmw_binding_state_free(dev_priv->ctx.staged_bindings); for (i = vmw_res_context; i < vmw_res_max; ++i) idr_destroy(&dev_priv->res_idr[i]); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 09e2d738aa87..d87aeedb78d0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -93,6 +94,7 @@ #define VMW_RES_STREAM ttm_driver_type2 #define VMW_RES_FENCE ttm_driver_type3 #define VMW_RES_SHADER ttm_driver_type4 +#define VMW_RES_HT_ORDER 12 #define MKSSTAT_CAPACITY_LOG2 5U #define MKSSTAT_CAPACITY (1U << MKSSTAT_CAPACITY_LOG2) @@ -425,8 +427,7 @@ struct vmw_ctx_validation_info; * @ctx: The validation context */ struct vmw_sw_context{ - struct vmwgfx_open_hash res_ht; - bool res_ht_initialized; + DECLARE_HASHTABLE(res_ht, VMW_RES_HT_ORDER); bool kernel; struct vmw_fpriv *fp; struct drm_file *filp; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index f085dbd4736d..c943ab801ca7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009 - 2015 VMware, Inc., Palo Alto, CA., USA + * Copyright 2009 - 2022 VMware, Inc., Palo Alto, CA., USA * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -25,6 +25,7 @@ * **************************************************************************/ #include +#include #include "vmwgfx_drv.h" #include "vmwgfx_reg.h" @@ -34,7 +35,6 @@ #include "vmwgfx_binding.h" #include "vmwgfx_mksstat.h" -#define VMW_RES_HT_ORDER 12 /* * Helper macro to get dx_ctx_node if available otherwise print an error @@ -4101,7 +4101,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, int ret; int32_t out_fence_fd = -1; struct sync_file *sync_file = NULL; - DECLARE_VAL_CONTEXT(val_ctx, &sw_context->res_ht, 1); + DECLARE_VAL_CONTEXT(val_ctx, sw_context, 1); if (flags & DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); @@ -4164,14 +4164,6 @@ int vmw_execbuf_process(struct drm_file *file_priv, if (sw_context->staged_bindings) vmw_binding_state_reset(sw_context->staged_bindings); - if (!sw_context->res_ht_initialized) { - ret = vmwgfx_ht_create(&sw_context->res_ht, VMW_RES_HT_ORDER); - if (unlikely(ret != 0)) - goto out_unlock; - - sw_context->res_ht_initialized = true; - } - INIT_LIST_HEAD(&sw_context->staged_cmd_res); sw_context->ctx = &val_ctx; ret = vmw_execbuf_tie_context(dev_priv, sw_context, dx_context_handle); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index f46891012be3..f5c4a40fb16d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright © 2018 VMware, Inc., Palo Alto, CA., USA + * Copyright © 2018 - 2022 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -180,11 +180,16 @@ vmw_validation_find_bo_dup(struct vmw_validation_context *ctx, if (!ctx->merge_dups) return NULL; - if (ctx->ht) { + if (ctx->sw_context) { struct vmwgfx_hash_item *hash; + unsigned long key = (unsigned long) vbo; - if (!vmwgfx_ht_find_item(ctx->ht, (unsigned long) vbo, &hash)) - bo_node = container_of(hash, typeof(*bo_node), hash); + hash_for_each_possible_rcu(ctx->sw_context->res_ht, hash, head, key) { + if (hash->key == key) { + bo_node = container_of(hash, typeof(*bo_node), hash); + break; + } + } } else { struct vmw_validation_bo_node *entry; @@ -217,11 +222,16 @@ vmw_validation_find_res_dup(struct vmw_validation_context *ctx, if (!ctx->merge_dups) return NULL; - if (ctx->ht) { + if (ctx->sw_context) { struct vmwgfx_hash_item *hash; + unsigned long key = (unsigned long) res; - if (!vmwgfx_ht_find_item(ctx->ht, (unsigned long) res, &hash)) - res_node = container_of(hash, typeof(*res_node), hash); + hash_for_each_possible_rcu(ctx->sw_context->res_ht, hash, head, key) { + if (hash->key == key) { + res_node = container_of(hash, typeof(*res_node), hash); + break; + } + } } else { struct vmw_validation_res_node *entry; @@ -269,20 +279,15 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx, } } else { struct ttm_validate_buffer *val_buf; - int ret; bo_node = vmw_validation_mem_alloc(ctx, sizeof(*bo_node)); if (!bo_node) return -ENOMEM; - if (ctx->ht) { + if (ctx->sw_context) { bo_node->hash.key = (unsigned long) vbo; - ret = vmwgfx_ht_insert_item(ctx->ht, &bo_node->hash); - if (ret) { - DRM_ERROR("Failed to initialize a buffer " - "validation entry.\n"); - return ret; - } + hash_add_rcu(ctx->sw_context->res_ht, &bo_node->hash.head, + bo_node->hash.key); } val_buf = &bo_node->base; val_buf->bo = ttm_bo_get_unless_zero(&vbo->base); @@ -316,7 +321,6 @@ int vmw_validation_add_resource(struct vmw_validation_context *ctx, bool *first_usage) { struct vmw_validation_res_node *node; - int ret; node = vmw_validation_find_res_dup(ctx, res); if (node) { @@ -330,14 +334,9 @@ int vmw_validation_add_resource(struct vmw_validation_context *ctx, return -ENOMEM; } - if (ctx->ht) { + if (ctx->sw_context) { node->hash.key = (unsigned long) res; - ret = vmwgfx_ht_insert_item(ctx->ht, &node->hash); - if (ret) { - DRM_ERROR("Failed to initialize a resource validation " - "entry.\n"); - return ret; - } + hash_add_rcu(ctx->sw_context->res_ht, &node->hash.head, node->hash.key); } node->res = vmw_resource_reference_unless_doomed(res); if (!node->res) @@ -681,19 +680,19 @@ void vmw_validation_drop_ht(struct vmw_validation_context *ctx) struct vmw_validation_bo_node *entry; struct vmw_validation_res_node *val; - if (!ctx->ht) + if (!ctx->sw_context) return; list_for_each_entry(entry, &ctx->bo_list, base.head) - (void) vmwgfx_ht_remove_item(ctx->ht, &entry->hash); + hash_del_rcu(&entry->hash.head); list_for_each_entry(val, &ctx->resource_list, head) - (void) vmwgfx_ht_remove_item(ctx->ht, &val->hash); + hash_del_rcu(&val->hash.head); list_for_each_entry(val, &ctx->resource_ctx_list, head) - (void) vmwgfx_ht_remove_item(ctx->ht, &val->hash); + hash_del_rcu(&entry->hash.head); - ctx->ht = NULL; + ctx->sw_context = NULL; } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h index f21df053882b..ab9ec226f433 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright © 2018 VMware, Inc., Palo Alto, CA., USA + * Copyright © 2018 - 2022 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -29,12 +29,11 @@ #define _VMWGFX_VALIDATION_H_ #include +#include #include #include -#include "vmwgfx_hashtab.h" - #define VMW_RES_DIRTY_NONE 0 #define VMW_RES_DIRTY_SET BIT(0) #define VMW_RES_DIRTY_CLEAR BIT(1) @@ -59,7 +58,7 @@ * @total_mem: Amount of reserved memory. */ struct vmw_validation_context { - struct vmwgfx_open_hash *ht; + struct vmw_sw_context *sw_context; struct list_head resource_list; struct list_head resource_ctx_list; struct list_head bo_list; @@ -82,16 +81,16 @@ struct vmw_fence_obj; /** * DECLARE_VAL_CONTEXT - Declare a validation context with initialization * @_name: The name of the variable - * @_ht: The hash table used to find dups or NULL if none + * @_sw_context: Contains the hash table used to find dups or NULL if none * @_merge_dups: Whether to merge duplicate buffer object- or resource * entries. If set to true, ideally a hash table pointer should be supplied * as well unless the number of resources and buffer objects per validation * is known to be very small */ #endif -#define DECLARE_VAL_CONTEXT(_name, _ht, _merge_dups) \ +#define DECLARE_VAL_CONTEXT(_name, _sw_context, _merge_dups) \ struct vmw_validation_context _name = \ - { .ht = _ht, \ + { .sw_context = _sw_context, \ .resource_list = LIST_HEAD_INIT((_name).resource_list), \ .resource_ctx_list = LIST_HEAD_INIT((_name).resource_ctx_list), \ .bo_list = LIST_HEAD_INIT((_name).bo_list), \ @@ -114,19 +113,6 @@ vmw_validation_has_bos(struct vmw_validation_context *ctx) return !list_empty(&ctx->bo_list); } -/** - * vmw_validation_set_ht - Register a hash table for duplicate finding - * @ctx: The validation context - * @ht: Pointer to a hash table to use for duplicate finding - * This function is intended to be used if the hash table wasn't - * available at validation context declaration time - */ -static inline void vmw_validation_set_ht(struct vmw_validation_context *ctx, - struct vmwgfx_open_hash *ht) -{ - ctx->ht = ht; -} - /** * vmw_validation_bo_reserve - Reserve buffer objects registered with a * validation context From 7f3d691dedfe73741d609707ed417200479f0b0c Mon Sep 17 00:00:00 2001 From: Maaz Mombasawala Date: Sat, 22 Oct 2022 00:02:29 -0400 Subject: [PATCH 315/543] drm/vmwgfx: Refactor ttm reference object hashtable to use linux/hashtable. [ Upstream commit 76a9e07f270cf5fb556ac237dbf11f5dacd61fef ] This is part of an effort to move from the vmwgfx_open_hash hashtable to linux/hashtable implementation. Refactor the ref_hash hashtable, used for fast lookup of reference objects associated with a ttm file. This also exposed a problem related to inconsistently using 32-bit and 64-bit keys with this hashtable. The hash function used changes depending on the size of the type, and results are not consistent across numbers, for example, hash_32(329) = 329, but hash_long(329) = 328. This would cause the lookup to fail for objects already in the hashtable, since keys of different sizes were being passed during adding and lookup. This was not an issue before because vmwgfx_open_hash always used hash_long. Fix this by always using 64-bit keys for this hashtable, which means that hash_long is always used. Signed-off-by: Maaz Mombasawala Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-11-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/ttm_object.c | 91 ++++++++++++++++------------- drivers/gpu/drm/vmwgfx/ttm_object.h | 12 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- 3 files changed, 56 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 9546b121bc22..c07b81fbc495 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -52,9 +52,12 @@ #include #include #include +#include MODULE_IMPORT_NS(DMA_BUF); +#define VMW_TTM_OBJECT_REF_HT_ORDER 10 + /** * struct ttm_object_file * @@ -75,7 +78,7 @@ struct ttm_object_file { struct ttm_object_device *tdev; spinlock_t lock; struct list_head ref_list; - struct vmwgfx_open_hash ref_hash; + DECLARE_HASHTABLE(ref_hash, VMW_TTM_OBJECT_REF_HT_ORDER); struct kref refcount; }; @@ -136,6 +139,36 @@ ttm_object_file_ref(struct ttm_object_file *tfile) return tfile; } +static int ttm_tfile_find_ref_rcu(struct ttm_object_file *tfile, + uint64_t key, + struct vmwgfx_hash_item **p_hash) +{ + struct vmwgfx_hash_item *hash; + + hash_for_each_possible_rcu(tfile->ref_hash, hash, head, key) { + if (hash->key == key) { + *p_hash = hash; + return 0; + } + } + return -EINVAL; +} + +static int ttm_tfile_find_ref(struct ttm_object_file *tfile, + uint64_t key, + struct vmwgfx_hash_item **p_hash) +{ + struct vmwgfx_hash_item *hash; + + hash_for_each_possible(tfile->ref_hash, hash, head, key) { + if (hash->key == key) { + *p_hash = hash; + return 0; + } + } + return -EINVAL; +} + static void ttm_object_file_destroy(struct kref *kref) { struct ttm_object_file *tfile = @@ -238,14 +271,13 @@ void ttm_base_object_unref(struct ttm_base_object **p_base) * Return: A pointer to the object if successful or NULL otherwise. */ struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint32_t key) +ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key) { struct vmwgfx_hash_item *hash; - struct vmwgfx_open_hash *ht = &tfile->ref_hash; int ret; rcu_read_lock(); - ret = vmwgfx_ht_find_item_rcu(ht, key, &hash); + ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); if (ret) { rcu_read_unlock(); return NULL; @@ -257,15 +289,14 @@ ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint32_t key) EXPORT_SYMBOL(ttm_base_object_noref_lookup); struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, - uint32_t key) + uint64_t key) { struct ttm_base_object *base = NULL; struct vmwgfx_hash_item *hash; - struct vmwgfx_open_hash *ht = &tfile->ref_hash; int ret; rcu_read_lock(); - ret = vmwgfx_ht_find_item_rcu(ht, key, &hash); + ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); if (likely(ret == 0)) { base = drm_hash_entry(hash, struct ttm_ref_object, hash)->obj; @@ -278,7 +309,7 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, } struct ttm_base_object * -ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint32_t key) +ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint64_t key) { struct ttm_base_object *base; @@ -297,7 +328,6 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, bool *existed, bool require_existed) { - struct vmwgfx_open_hash *ht = &tfile->ref_hash; struct ttm_ref_object *ref; struct vmwgfx_hash_item *hash; int ret = -EINVAL; @@ -310,7 +340,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, while (ret == -EINVAL) { rcu_read_lock(); - ret = vmwgfx_ht_find_item_rcu(ht, base->handle, &hash); + ret = ttm_tfile_find_ref_rcu(tfile, base->handle, &hash); if (ret == 0) { ref = drm_hash_entry(hash, struct ttm_ref_object, hash); @@ -335,21 +365,14 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, kref_init(&ref->kref); spin_lock(&tfile->lock); - ret = vmwgfx_ht_insert_item_rcu(ht, &ref->hash); - - if (likely(ret == 0)) { - list_add_tail(&ref->head, &tfile->ref_list); - kref_get(&base->refcount); - spin_unlock(&tfile->lock); - if (existed != NULL) - *existed = false; - break; - } + hash_add_rcu(tfile->ref_hash, &ref->hash.head, ref->hash.key); + ret = 0; + list_add_tail(&ref->head, &tfile->ref_list); + kref_get(&base->refcount); spin_unlock(&tfile->lock); - BUG_ON(ret != -EINVAL); - - kfree(ref); + if (existed != NULL) + *existed = false; } return ret; @@ -361,10 +384,8 @@ ttm_ref_object_release(struct kref *kref) struct ttm_ref_object *ref = container_of(kref, struct ttm_ref_object, kref); struct ttm_object_file *tfile = ref->tfile; - struct vmwgfx_open_hash *ht; - ht = &tfile->ref_hash; - (void)vmwgfx_ht_remove_item_rcu(ht, &ref->hash); + hash_del_rcu(&ref->hash.head); list_del(&ref->head); spin_unlock(&tfile->lock); @@ -376,13 +397,12 @@ ttm_ref_object_release(struct kref *kref) int ttm_ref_object_base_unref(struct ttm_object_file *tfile, unsigned long key) { - struct vmwgfx_open_hash *ht = &tfile->ref_hash; struct ttm_ref_object *ref; struct vmwgfx_hash_item *hash; int ret; spin_lock(&tfile->lock); - ret = vmwgfx_ht_find_item(ht, key, &hash); + ret = ttm_tfile_find_ref(tfile, key, &hash); if (unlikely(ret != 0)) { spin_unlock(&tfile->lock); return -EINVAL; @@ -414,16 +434,13 @@ void ttm_object_file_release(struct ttm_object_file **p_tfile) } spin_unlock(&tfile->lock); - vmwgfx_ht_remove(&tfile->ref_hash); ttm_object_file_unref(&tfile); } -struct ttm_object_file *ttm_object_file_init(struct ttm_object_device *tdev, - unsigned int hash_order) +struct ttm_object_file *ttm_object_file_init(struct ttm_object_device *tdev) { struct ttm_object_file *tfile = kmalloc(sizeof(*tfile), GFP_KERNEL); - int ret; if (unlikely(tfile == NULL)) return NULL; @@ -433,17 +450,9 @@ struct ttm_object_file *ttm_object_file_init(struct ttm_object_device *tdev, kref_init(&tfile->refcount); INIT_LIST_HEAD(&tfile->ref_list); - ret = vmwgfx_ht_create(&tfile->ref_hash, hash_order); - if (ret) - goto out_err; + hash_init(tfile->ref_hash); return tfile; -out_err: - vmwgfx_ht_remove(&tfile->ref_hash); - - kfree(tfile); - - return NULL; } struct ttm_object_device * diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index 6870f951b677..67f30d589e27 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -104,7 +104,7 @@ struct ttm_base_object { struct ttm_object_file *tfile; struct kref refcount; void (*refcount_release) (struct ttm_base_object **base); - u32 handle; + u64 handle; enum ttm_object_type object_type; u32 shareable; }; @@ -164,7 +164,7 @@ extern int ttm_base_object_init(struct ttm_object_file *tfile, */ extern struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file - *tfile, uint32_t key); + *tfile, uint64_t key); /** * ttm_base_object_lookup_for_ref @@ -178,7 +178,7 @@ extern struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file */ extern struct ttm_base_object * -ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint32_t key); +ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint64_t key); /** * ttm_base_object_unref @@ -237,14 +237,12 @@ extern int ttm_ref_object_base_unref(struct ttm_object_file *tfile, * ttm_object_file_init - initialize a struct ttm_object file * * @tdev: A struct ttm_object device this file is initialized on. - * @hash_order: Order of the hash table used to hold the reference objects. * * This is typically called by the file_ops::open function. */ extern struct ttm_object_file *ttm_object_file_init(struct ttm_object_device - *tdev, - unsigned int hash_order); + *tdev); /** * ttm_object_file_release - release data held by a ttm_object_file @@ -312,7 +310,7 @@ extern int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, kfree_rcu(__obj, __prime.base.rhead) struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint32_t key); +ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key); /** * ttm_base_object_noref_release - release a base object pointer looked up diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 8d77e79bd904..b909a3ce9af3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1242,7 +1242,7 @@ static int vmw_driver_open(struct drm_device *dev, struct drm_file *file_priv) if (unlikely(!vmw_fp)) return ret; - vmw_fp->tfile = ttm_object_file_init(dev_priv->tdev, 10); + vmw_fp->tfile = ttm_object_file_init(dev_priv->tdev); if (unlikely(vmw_fp->tfile == NULL)) goto out_no_tfile; From a3be7e2afc4360312c97e573bb6f12f248a8d1b0 Mon Sep 17 00:00:00 2001 From: Maaz Mombasawala Date: Sat, 22 Oct 2022 00:02:30 -0400 Subject: [PATCH 316/543] drm/vmwgfx: Remove vmwgfx_hashtab [ Upstream commit 9da30cdd6a318595199319708c143ae318f804ef ] The vmwgfx driver has migrated from using the hashtable in vmwgfx_hashtab to the linux/hashtable implementation. Remove the vmwgfx_hashtab from the driver. Signed-off-by: Maaz Mombasawala Reviewed-by: Martin Krastev Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-12-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- Documentation/gpu/todo.rst | 11 -- drivers/gpu/drm/vmwgfx/Makefile | 2 +- drivers/gpu/drm/vmwgfx/ttm_object.c | 8 +- drivers/gpu/drm/vmwgfx/ttm_object.h | 2 - drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 6 +- drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c | 199 --------------------- drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h | 83 --------- 8 files changed, 12 insertions(+), 303 deletions(-) delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 393d218e4a0c..b2c6aaf1edf2 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -651,17 +651,6 @@ See drivers/gpu/drm/amd/display/TODO for tasks. Contact: Harry Wentland, Alex Deucher -vmwgfx: Replace hashtable with Linux' implementation ----------------------------------------------------- - -The vmwgfx driver uses its own hashtable implementation. Replace the -code with Linux' implementation and update the callers. It's mostly a -refactoring task, but the interfaces are different. - -Contact: Zack Rusin, Thomas Zimmermann - -Level: Intermediate - Bootsplash ========== diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile index eee73b9aa404..68e350f410ad 100644 --- a/drivers/gpu/drm/vmwgfx/Makefile +++ b/drivers/gpu/drm/vmwgfx/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_hashtab.o vmwgfx_kms.o vmwgfx_drv.o \ +vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_ttm_buffer.o \ vmwgfx_cmd.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \ vmwgfx_overlay.o vmwgfx_gmrid_manager.o vmwgfx_fence.o \ diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index c07b81fbc495..932b125ebf3d 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -284,7 +284,7 @@ ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key) } __release(RCU); - return drm_hash_entry(hash, struct ttm_ref_object, hash)->obj; + return hlist_entry(hash, struct ttm_ref_object, hash)->obj; } EXPORT_SYMBOL(ttm_base_object_noref_lookup); @@ -299,7 +299,7 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); if (likely(ret == 0)) { - base = drm_hash_entry(hash, struct ttm_ref_object, hash)->obj; + base = hlist_entry(hash, struct ttm_ref_object, hash)->obj; if (!kref_get_unless_zero(&base->refcount)) base = NULL; } @@ -343,7 +343,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, ret = ttm_tfile_find_ref_rcu(tfile, base->handle, &hash); if (ret == 0) { - ref = drm_hash_entry(hash, struct ttm_ref_object, hash); + ref = hlist_entry(hash, struct ttm_ref_object, hash); if (kref_get_unless_zero(&ref->kref)) { rcu_read_unlock(); break; @@ -407,7 +407,7 @@ int ttm_ref_object_base_unref(struct ttm_object_file *tfile, spin_unlock(&tfile->lock); return -EINVAL; } - ref = drm_hash_entry(hash, struct ttm_ref_object, hash); + ref = hlist_entry(hash, struct ttm_ref_object, hash); kref_put(&ref->kref, ttm_ref_object_release); spin_unlock(&tfile->lock); return 0; diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index 67f30d589e27..f0ebbe340ad6 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -42,8 +42,6 @@ #include #include -#include "vmwgfx_hashtab.h" - /** * enum ttm_object_type * diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 142aef686fcd..47bc0b411055 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -88,7 +88,7 @@ vmw_cmdbuf_res_lookup(struct vmw_cmdbuf_res_manager *man, hash_for_each_possible_rcu(man->resources, hash, head, key) { if (hash->key == key) - return drm_hash_entry(hash, struct vmw_cmdbuf_res, hash)->res; + return hlist_entry(hash, struct vmw_cmdbuf_res, hash)->res; } return ERR_PTR(-EINVAL); } @@ -243,7 +243,7 @@ int vmw_cmdbuf_res_remove(struct vmw_cmdbuf_res_manager *man, hash_for_each_possible_rcu(man->resources, hash, head, key) { if (hash->key == key) { - entry = drm_hash_entry(hash, struct vmw_cmdbuf_res, hash); + entry = hlist_entry(hash, struct vmw_cmdbuf_res, hash); break; } } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d87aeedb78d0..7c45c3de0dcf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -43,7 +43,6 @@ #include "ttm_object.h" #include "vmwgfx_fence.h" -#include "vmwgfx_hashtab.h" #include "vmwgfx_reg.h" #include "vmwgfx_validation.h" @@ -104,6 +103,11 @@ struct vmw_fpriv { bool gb_aware; /* user-space is guest-backed aware */ }; +struct vmwgfx_hash_item { + struct hlist_node head; + unsigned long key; +}; + /** * struct vmw_buffer_object - TTM buffer object with vmwgfx additions * @base: The TTM buffer object diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c b/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c deleted file mode 100644 index 06aebc12774e..000000000000 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Simple open hash tab implementation. - * - * Authors: - * Thomas Hellström - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "vmwgfx_hashtab.h" - -int vmwgfx_ht_create(struct vmwgfx_open_hash *ht, unsigned int order) -{ - unsigned int size = 1 << order; - - ht->order = order; - ht->table = NULL; - if (size <= PAGE_SIZE / sizeof(*ht->table)) - ht->table = kcalloc(size, sizeof(*ht->table), GFP_KERNEL); - else - ht->table = vzalloc(array_size(size, sizeof(*ht->table))); - if (!ht->table) { - DRM_ERROR("Out of memory for hash table\n"); - return -ENOMEM; - } - return 0; -} - -void vmwgfx_ht_verbose_list(struct vmwgfx_open_hash *ht, unsigned long key) -{ - struct vmwgfx_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - int count = 0; - - hashed_key = hash_long(key, ht->order); - DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, h_list, head) - DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key); -} - -static struct hlist_node *vmwgfx_ht_find_key(struct vmwgfx_open_hash *ht, unsigned long key) -{ - struct vmwgfx_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, h_list, head) { - if (entry->key == key) - return &entry->head; - if (entry->key > key) - break; - } - return NULL; -} - -static struct hlist_node *vmwgfx_ht_find_key_rcu(struct vmwgfx_open_hash *ht, unsigned long key) -{ - struct vmwgfx_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry_rcu(entry, h_list, head) { - if (entry->key == key) - return &entry->head; - if (entry->key > key) - break; - } - return NULL; -} - -int vmwgfx_ht_insert_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item) -{ - struct vmwgfx_hash_item *entry; - struct hlist_head *h_list; - struct hlist_node *parent; - unsigned int hashed_key; - unsigned long key = item->key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - parent = NULL; - hlist_for_each_entry(entry, h_list, head) { - if (entry->key == key) - return -EINVAL; - if (entry->key > key) - break; - parent = &entry->head; - } - if (parent) - hlist_add_behind_rcu(&item->head, parent); - else - hlist_add_head_rcu(&item->head, h_list); - return 0; -} - -/* - * Just insert an item and return any "bits" bit key that hasn't been - * used before. - */ -int vmwgfx_ht_just_insert_please(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item, - unsigned long seed, int bits, int shift, - unsigned long add) -{ - int ret; - unsigned long mask = (1UL << bits) - 1; - unsigned long first, unshifted_key; - - unshifted_key = hash_long(seed, bits); - first = unshifted_key; - do { - item->key = (unshifted_key << shift) + add; - ret = vmwgfx_ht_insert_item(ht, item); - if (ret) - unshifted_key = (unshifted_key + 1) & mask; - } while (ret && (unshifted_key != first)); - - if (ret) { - DRM_ERROR("Available key bit space exhausted\n"); - return -EINVAL; - } - return 0; -} - -int vmwgfx_ht_find_item(struct vmwgfx_open_hash *ht, unsigned long key, - struct vmwgfx_hash_item **item) -{ - struct hlist_node *list; - - list = vmwgfx_ht_find_key_rcu(ht, key); - if (!list) - return -EINVAL; - - *item = hlist_entry(list, struct vmwgfx_hash_item, head); - return 0; -} - -int vmwgfx_ht_remove_key(struct vmwgfx_open_hash *ht, unsigned long key) -{ - struct hlist_node *list; - - list = vmwgfx_ht_find_key(ht, key); - if (list) { - hlist_del_init_rcu(list); - return 0; - } - return -EINVAL; -} - -int vmwgfx_ht_remove_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item) -{ - hlist_del_init_rcu(&item->head); - return 0; -} - -void vmwgfx_ht_remove(struct vmwgfx_open_hash *ht) -{ - if (ht->table) { - kvfree(ht->table); - ht->table = NULL; - } -} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h b/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h deleted file mode 100644 index a9ce12922e21..000000000000 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright 2006 Tungsten Graphics, Inc., Bismack, ND. USA. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Simple open hash tab implementation. - * - * Authors: - * Thomas Hellström - */ - -/* - * TODO: Replace this hashtable with Linux' generic implementation - * from . - */ - -#ifndef VMWGFX_HASHTAB_H -#define VMWGFX_HASHTAB_H - -#include - -#define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member) - -struct vmwgfx_hash_item { - struct hlist_node head; - unsigned long key; -}; - -struct vmwgfx_open_hash { - struct hlist_head *table; - u8 order; -}; - -int vmwgfx_ht_create(struct vmwgfx_open_hash *ht, unsigned int order); -int vmwgfx_ht_insert_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item); -int vmwgfx_ht_just_insert_please(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item, - unsigned long seed, int bits, int shift, - unsigned long add); -int vmwgfx_ht_find_item(struct vmwgfx_open_hash *ht, unsigned long key, - struct vmwgfx_hash_item **item); - -void vmwgfx_ht_verbose_list(struct vmwgfx_open_hash *ht, unsigned long key); -int vmwgfx_ht_remove_key(struct vmwgfx_open_hash *ht, unsigned long key); -int vmwgfx_ht_remove_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item); -void vmwgfx_ht_remove(struct vmwgfx_open_hash *ht); - -/* - * RCU-safe interface - * - * The user of this API needs to make sure that two or more instances of the - * hash table manipulation functions are never run simultaneously. - * The lookup function vmwgfx_ht_find_item_rcu may, however, run simultaneously - * with any of the manipulation functions as long as it's called from within - * an RCU read-locked section. - */ -#define vmwgfx_ht_insert_item_rcu vmwgfx_ht_insert_item -#define vmwgfx_ht_just_insert_please_rcu vmwgfx_ht_just_insert_please -#define vmwgfx_ht_remove_key_rcu vmwgfx_ht_remove_key -#define vmwgfx_ht_remove_item_rcu vmwgfx_ht_remove_item -#define vmwgfx_ht_find_item_rcu vmwgfx_ht_find_item - -#endif From 7ac9578e45b20e3f3c0c8eb71f5417a499a7226a Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 7 Dec 2022 12:29:07 -0500 Subject: [PATCH 317/543] drm/vmwgfx: Remove rcu locks from user resources [ Upstream commit a309c7194e8a2f8bd4539b9449917913f6c2cd50 ] User resource lookups used rcu to avoid two extra atomics. Unfortunately the rcu paths were buggy and it was easy to make the driver crash by submitting command buffers from two different threads. Because the lookups never show up in performance profiles replace them with a regular spin lock which fixes the races in accesses to those shared resources. Fixes kernel oops'es in IGT's vmwgfx execution_buffer stress test and seen crashes with apps using shared resources. Fixes: e14c02e6b699 ("drm/vmwgfx: Look up objects without taking a reference") Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Link: https://patchwork.freedesktop.org/patch/msgid/20221207172907.959037-1-zack@kde.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/ttm_object.c | 41 +---- drivers/gpu/drm/vmwgfx/ttm_object.h | 14 -- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 38 ----- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 18 +-- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 182 +++++++++++------------ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 ---- 6 files changed, 90 insertions(+), 236 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 932b125ebf3d..ddf8373c1d77 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -254,40 +254,6 @@ void ttm_base_object_unref(struct ttm_base_object **p_base) kref_put(&base->refcount, ttm_release_base); } -/** - * ttm_base_object_noref_lookup - look up a base object without reference - * @tfile: The struct ttm_object_file the object is registered with. - * @key: The object handle. - * - * This function looks up a ttm base object and returns a pointer to it - * without refcounting the pointer. The returned pointer is only valid - * until ttm_base_object_noref_release() is called, and the object - * pointed to by the returned pointer may be doomed. Any persistent usage - * of the object requires a refcount to be taken using kref_get_unless_zero(). - * Iff this function returns successfully it needs to be paired with - * ttm_base_object_noref_release() and no sleeping- or scheduling functions - * may be called inbetween these function callse. - * - * Return: A pointer to the object if successful or NULL otherwise. - */ -struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key) -{ - struct vmwgfx_hash_item *hash; - int ret; - - rcu_read_lock(); - ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); - if (ret) { - rcu_read_unlock(); - return NULL; - } - - __release(RCU); - return hlist_entry(hash, struct ttm_ref_object, hash)->obj; -} -EXPORT_SYMBOL(ttm_base_object_noref_lookup); - struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, uint64_t key) { @@ -295,15 +261,16 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, struct vmwgfx_hash_item *hash; int ret; - rcu_read_lock(); - ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); + spin_lock(&tfile->lock); + ret = ttm_tfile_find_ref(tfile, key, &hash); if (likely(ret == 0)) { base = hlist_entry(hash, struct ttm_ref_object, hash)->obj; if (!kref_get_unless_zero(&base->refcount)) base = NULL; } - rcu_read_unlock(); + spin_unlock(&tfile->lock); + return base; } diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index f0ebbe340ad6..8098a3846bae 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -307,18 +307,4 @@ extern int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, #define ttm_prime_object_kfree(__obj, __prime) \ kfree_rcu(__obj, __prime.base.rhead) -struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key); - -/** - * ttm_base_object_noref_release - release a base object pointer looked up - * without reference - * - * Releases a base object pointer looked up with ttm_base_object_noref_lookup(). - */ -static inline void ttm_base_object_noref_release(void) -{ - __acquire(RCU); - rcu_read_unlock(); -} #endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 822251aaab0a..973a0a52462e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -715,44 +715,6 @@ int vmw_user_bo_lookup(struct drm_file *filp, return 0; } -/** - * vmw_user_bo_noref_lookup - Look up a vmw user buffer object without reference - * @filp: The TTM object file the handle is registered with. - * @handle: The user buffer object handle. - * - * This function looks up a struct vmw_bo and returns a pointer to the - * struct vmw_buffer_object it derives from without refcounting the pointer. - * The returned pointer is only valid until vmw_user_bo_noref_release() is - * called, and the object pointed to by the returned pointer may be doomed. - * Any persistent usage of the object requires a refcount to be taken using - * ttm_bo_reference_unless_doomed(). Iff this function returns successfully it - * needs to be paired with vmw_user_bo_noref_release() and no sleeping- - * or scheduling functions may be called in between these function calls. - * - * Return: A struct vmw_buffer_object pointer if successful or negative - * error pointer on failure. - */ -struct vmw_buffer_object * -vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle) -{ - struct vmw_buffer_object *vmw_bo; - struct ttm_buffer_object *bo; - struct drm_gem_object *gobj = drm_gem_object_lookup(filp, handle); - - if (!gobj) { - DRM_ERROR("Invalid buffer object handle 0x%08lx.\n", - (unsigned long)handle); - return ERR_PTR(-ESRCH); - } - vmw_bo = gem_to_vmw_bo(gobj); - bo = ttm_bo_get_unless_zero(&vmw_bo->base); - vmw_bo = vmw_buffer_object(bo); - drm_gem_object_put(gobj); - - return vmw_bo; -} - - /** * vmw_bo_fence_single - Utility function to fence a single TTM buffer * object without unreserving it. diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 7c45c3de0dcf..0bc1ebc43002 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -826,12 +826,7 @@ extern int vmw_user_resource_lookup_handle( uint32_t handle, const struct vmw_user_resource_conv *converter, struct vmw_resource **p_res); -extern struct vmw_resource * -vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, - uint32_t handle, - const struct vmw_user_resource_conv * - converter); + extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data, @@ -870,15 +865,6 @@ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) return !RB_EMPTY_NODE(&res->mob_node); } -/** - * vmw_user_resource_noref_release - release a user resource pointer looked up - * without reference - */ -static inline void vmw_user_resource_noref_release(void) -{ - ttm_base_object_noref_release(); -} - /** * Buffer object helper functions - vmwgfx_bo.c */ @@ -930,8 +916,6 @@ extern void vmw_bo_unmap(struct vmw_buffer_object *vbo); extern void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); extern void vmw_bo_swap_notify(struct ttm_buffer_object *bo); -extern struct vmw_buffer_object * -vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle); /** * vmw_bo_adjust_prio - Adjust the buffer object eviction priority diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index c943ab801ca7..70cfed4fdba0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -290,20 +290,26 @@ static void vmw_execbuf_rcache_update(struct vmw_res_cache_entry *rcache, rcache->valid_handle = 0; } +enum vmw_val_add_flags { + vmw_val_add_flag_none = 0, + vmw_val_add_flag_noctx = 1 << 0, +}; + /** - * vmw_execbuf_res_noref_val_add - Add a resource described by an unreferenced - * rcu-protected pointer to the validation list. + * vmw_execbuf_res_val_add - Add a resource to the validation list. * * @sw_context: Pointer to the software context. * @res: Unreferenced rcu-protected pointer to the resource. * @dirty: Whether to change dirty status. + * @flags: specifies whether to use the context or not * * Returns: 0 on success. Negative error code on failure. Typical error codes * are %-EINVAL on inconsistency and %-ESRCH if the resource was doomed. */ -static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, - struct vmw_resource *res, - u32 dirty) +static int vmw_execbuf_res_val_add(struct vmw_sw_context *sw_context, + struct vmw_resource *res, + u32 dirty, + u32 flags) { struct vmw_private *dev_priv = res->dev_priv; int ret; @@ -318,24 +324,30 @@ static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, if (dirty) vmw_validation_res_set_dirty(sw_context->ctx, rcache->private, dirty); - vmw_user_resource_noref_release(); return 0; } - priv_size = vmw_execbuf_res_size(dev_priv, res_type); - ret = vmw_validation_add_resource(sw_context->ctx, res, priv_size, - dirty, (void **)&ctx_info, - &first_usage); - vmw_user_resource_noref_release(); - if (ret) - return ret; - - if (priv_size && first_usage) { - ret = vmw_cmd_ctx_first_setup(dev_priv, sw_context, res, - ctx_info); - if (ret) { - VMW_DEBUG_USER("Failed first usage context setup.\n"); + if ((flags & vmw_val_add_flag_noctx) != 0) { + ret = vmw_validation_add_resource(sw_context->ctx, res, 0, dirty, + (void **)&ctx_info, NULL); + if (ret) return ret; + + } else { + priv_size = vmw_execbuf_res_size(dev_priv, res_type); + ret = vmw_validation_add_resource(sw_context->ctx, res, priv_size, + dirty, (void **)&ctx_info, + &first_usage); + if (ret) + return ret; + + if (priv_size && first_usage) { + ret = vmw_cmd_ctx_first_setup(dev_priv, sw_context, res, + ctx_info); + if (ret) { + VMW_DEBUG_USER("Failed first usage context setup.\n"); + return ret; + } } } @@ -343,43 +355,6 @@ static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, return 0; } -/** - * vmw_execbuf_res_noctx_val_add - Add a non-context resource to the resource - * validation list if it's not already on it - * - * @sw_context: Pointer to the software context. - * @res: Pointer to the resource. - * @dirty: Whether to change dirty status. - * - * Returns: Zero on success. Negative error code on failure. - */ -static int vmw_execbuf_res_noctx_val_add(struct vmw_sw_context *sw_context, - struct vmw_resource *res, - u32 dirty) -{ - struct vmw_res_cache_entry *rcache; - enum vmw_res_type res_type = vmw_res_type(res); - void *ptr; - int ret; - - rcache = &sw_context->res_cache[res_type]; - if (likely(rcache->valid && rcache->res == res)) { - if (dirty) - vmw_validation_res_set_dirty(sw_context->ctx, - rcache->private, dirty); - return 0; - } - - ret = vmw_validation_add_resource(sw_context->ctx, res, 0, dirty, - &ptr, NULL); - if (ret) - return ret; - - vmw_execbuf_rcache_update(rcache, res, ptr); - - return 0; -} - /** * vmw_view_res_val_add - Add a view and the surface it's pointing to to the * validation list @@ -398,13 +373,13 @@ static int vmw_view_res_val_add(struct vmw_sw_context *sw_context, * First add the resource the view is pointing to, otherwise it may be * swapped out when the view is validated. */ - ret = vmw_execbuf_res_noctx_val_add(sw_context, vmw_view_srf(view), - vmw_view_dirtying(view)); + ret = vmw_execbuf_res_val_add(sw_context, vmw_view_srf(view), + vmw_view_dirtying(view), vmw_val_add_flag_noctx); if (ret) return ret; - return vmw_execbuf_res_noctx_val_add(sw_context, view, - VMW_RES_DIRTY_NONE); + return vmw_execbuf_res_val_add(sw_context, view, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); } /** @@ -475,8 +450,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, if (IS_ERR(res)) continue; - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_SET); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_SET, + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) return ret; } @@ -490,9 +466,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, if (vmw_res_type(entry->res) == vmw_res_view) ret = vmw_view_res_val_add(sw_context, entry->res); else - ret = vmw_execbuf_res_noctx_val_add - (sw_context, entry->res, - vmw_binding_dirtying(entry->bt)); + ret = vmw_execbuf_res_val_add(sw_context, entry->res, + vmw_binding_dirtying(entry->bt), + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) break; } @@ -658,7 +634,8 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, { struct vmw_res_cache_entry *rcache = &sw_context->res_cache[res_type]; struct vmw_resource *res; - int ret; + int ret = 0; + bool needs_unref = false; if (p_res) *p_res = NULL; @@ -683,17 +660,18 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, if (ret) return ret; - res = vmw_user_resource_noref_lookup_handle - (dev_priv, sw_context->fp->tfile, *id_loc, converter); - if (IS_ERR(res)) { + ret = vmw_user_resource_lookup_handle + (dev_priv, sw_context->fp->tfile, *id_loc, converter, &res); + if (ret != 0) { VMW_DEBUG_USER("Could not find/use resource 0x%08x.\n", (unsigned int) *id_loc); - return PTR_ERR(res); - } - - ret = vmw_execbuf_res_noref_val_add(sw_context, res, dirty); - if (unlikely(ret != 0)) return ret; + } + needs_unref = true; + + ret = vmw_execbuf_res_val_add(sw_context, res, dirty, vmw_val_add_flag_none); + if (unlikely(ret != 0)) + goto res_check_done; if (rcache->valid && rcache->res == res) { rcache->valid_handle = true; @@ -708,7 +686,11 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, if (p_res) *p_res = res; - return 0; +res_check_done: + if (needs_unref) + vmw_resource_unreference(&res); + + return ret; } /** @@ -1171,9 +1153,9 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, int ret; vmw_validation_preload_bo(sw_context->ctx); - vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle); - if (IS_ERR(vmw_bo)) { - VMW_DEBUG_USER("Could not find or use MOB buffer.\n"); + ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); + if (ret != 0) { + drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n"); return PTR_ERR(vmw_bo); } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, true, false); @@ -1225,9 +1207,9 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, int ret; vmw_validation_preload_bo(sw_context->ctx); - vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle); - if (IS_ERR(vmw_bo)) { - VMW_DEBUG_USER("Could not find or use GMR region.\n"); + ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); + if (ret != 0) { + drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n"); return PTR_ERR(vmw_bo); } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, false, false); @@ -2025,8 +2007,9 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, res = vmw_shader_lookup(vmw_context_res_man(ctx), cmd->body.shid, cmd->body.type); if (!IS_ERR(res)) { - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) return ret; @@ -2273,8 +2256,9 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv, return PTR_ERR(res); } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) return ret; } @@ -2777,8 +2761,8 @@ static int vmw_cmd_dx_bind_shader(struct vmw_private *dev_priv, return PTR_ERR(res); } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { VMW_DEBUG_USER("Error creating resource validation node.\n"); return ret; @@ -3098,8 +3082,8 @@ static int vmw_cmd_dx_bind_streamoutput(struct vmw_private *dev_priv, vmw_dx_streamoutput_set_size(res, cmd->body.sizeInBytes); - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { DRM_ERROR("Error creating resource validation node.\n"); return ret; @@ -3148,8 +3132,8 @@ static int vmw_cmd_dx_set_streamoutput(struct vmw_private *dev_priv, return 0; } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { DRM_ERROR("Error creating resource validation node.\n"); return ret; @@ -4067,22 +4051,26 @@ static int vmw_execbuf_tie_context(struct vmw_private *dev_priv, if (ret) return ret; - res = vmw_user_resource_noref_lookup_handle + ret = vmw_user_resource_lookup_handle (dev_priv, sw_context->fp->tfile, handle, - user_context_converter); - if (IS_ERR(res)) { + user_context_converter, &res); + if (ret != 0) { VMW_DEBUG_USER("Could not find or user DX context 0x%08x.\n", (unsigned int) handle); - return PTR_ERR(res); + return ret; } - ret = vmw_execbuf_res_noref_val_add(sw_context, res, VMW_RES_DIRTY_SET); - if (unlikely(ret != 0)) + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_SET, + vmw_val_add_flag_none); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); return ret; + } sw_context->dx_ctx_node = vmw_execbuf_info_from_res(sw_context, res); sw_context->man = vmw_context_res_man(res); + vmw_resource_unreference(&res); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index f66caa540e14..c7d645e5ec7b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -281,39 +281,6 @@ out_bad_resource: return ret; } -/** - * vmw_user_resource_noref_lookup_handle - lookup a struct resource from a - * TTM user-space handle and perform basic type checks - * - * @dev_priv: Pointer to a device private struct - * @tfile: Pointer to a struct ttm_object_file identifying the caller - * @handle: The TTM user-space handle - * @converter: Pointer to an object describing the resource type - * - * If the handle can't be found or is associated with an incorrect resource - * type, -EINVAL will be returned. - */ -struct vmw_resource * -vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, - uint32_t handle, - const struct vmw_user_resource_conv - *converter) -{ - struct ttm_base_object *base; - - base = ttm_base_object_noref_lookup(tfile, handle); - if (!base) - return ERR_PTR(-ESRCH); - - if (unlikely(ttm_base_object_type(base) != converter->object_type)) { - ttm_base_object_noref_release(); - return ERR_PTR(-EINVAL); - } - - return converter->base_obj_to_res(base); -} - /* * Helper function that looks either a surface or bo. * From 8a97b544b98e44f596219ebb290fd2ba2fd5d644 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 7 Jan 2023 19:10:04 +0200 Subject: [PATCH 318/543] net/sched: act_mpls: Fix warning during failed attribute validation [ Upstream commit 9e17f99220d111ea031b44153fdfe364b0024ff2 ] The 'TCA_MPLS_LABEL' attribute is of 'NLA_U32' type, but has a validation type of 'NLA_VALIDATE_FUNCTION'. This is an invalid combination according to the comment above 'struct nla_policy': " Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN: NLA_BINARY Validation function called for the attribute. All other Unused - but note that it's a union " This can trigger the warning [1] in nla_get_range_unsigned() when validation of the attribute fails. Despite being of 'NLA_U32' type, the associated 'min'/'max' fields in the policy are negative as they are aliased by the 'validate' field. Fix by changing the attribute type to 'NLA_BINARY' which is consistent with the above comment and all other users of NLA_POLICY_VALIDATE_FN(). As a result, move the length validation to the validation function. No regressions in MPLS tests: # ./tdc.py -f tc-tests/actions/mpls.json [...] # echo $? 0 [1] WARNING: CPU: 0 PID: 17743 at lib/nlattr.c:118 nla_get_range_unsigned+0x1d8/0x1e0 lib/nlattr.c:117 Modules linked in: CPU: 0 PID: 17743 Comm: syz-executor.0 Not tainted 6.1.0-rc8 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014 RIP: 0010:nla_get_range_unsigned+0x1d8/0x1e0 lib/nlattr.c:117 [...] Call Trace: __netlink_policy_dump_write_attr+0x23d/0x990 net/netlink/policy.c:310 netlink_policy_dump_write_attr+0x22/0x30 net/netlink/policy.c:411 netlink_ack_tlv_fill net/netlink/af_netlink.c:2454 [inline] netlink_ack+0x546/0x760 net/netlink/af_netlink.c:2506 netlink_rcv_skb+0x1b7/0x240 net/netlink/af_netlink.c:2546 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:6109 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x5e9/0x6b0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x739/0x860 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x38f/0x500 net/socket.c:2482 ___sys_sendmsg net/socket.c:2536 [inline] __sys_sendmsg+0x197/0x230 net/socket.c:2565 __do_sys_sendmsg net/socket.c:2574 [inline] __se_sys_sendmsg net/socket.c:2572 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2572 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://lore.kernel.org/netdev/CAO4mrfdmjvRUNbDyP0R03_DrD_eFCLCguz6OxZ2TYRSv0K9gxA@mail.gmail.com/ Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC") Reported-by: Wei Chen Tested-by: Wei Chen Signed-off-by: Ido Schimmel Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230107171004.608436-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/act_mpls.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 8ad25cc8ccd5..ea5959094adb 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -132,6 +132,11 @@ static int valid_label(const struct nlattr *attr, { const u32 *label = nla_data(attr); + if (nla_len(attr) != sizeof(*label)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length"); + return -EINVAL; + } + if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) { NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range"); return -EINVAL; @@ -143,7 +148,8 @@ static int valid_label(const struct nlattr *attr, static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = { [TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)), [TCA_MPLS_PROTO] = { .type = NLA_U16 }, - [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label), + [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + valid_label), [TCA_MPLS_TC] = NLA_POLICY_RANGE(NLA_U8, 0, 7), [TCA_MPLS_TTL] = NLA_POLICY_MIN(NLA_U8, 1), [TCA_MPLS_BOS] = NLA_POLICY_RANGE(NLA_U8, 0, 1), From 7697de4ad708099cda1427aff4a73af1a25ab162 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 8 Jan 2023 20:37:53 +0100 Subject: [PATCH 319/543] Revert "r8169: disable detection of chip version 36" [ Upstream commit 2ea26b4de6f42b74a5f1701de41efa6bc9f12666 ] This reverts commit 42666b2c452ce87894786aae05e3fad3cfc6cb59. This chip version seems to be very rare, but it exits in consumer devices, see linked report. Link: https://stackoverflow.com/questions/75049473/cant-setup-a-wired-network-in-archlinux-fresh-install Fixes: 42666b2c452c ("r8169: disable detection of chip version 36") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/42e9674c-d5d0-a65a-f578-e5c74f244739@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a73d061d9fcb..fe8dc8e0522b 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1996,10 +1996,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) /* 8168F family. */ { 0x7c8, 0x488, RTL_GIGA_MAC_VER_38 }, - /* It seems this chip version never made it to - * the wild. Let's disable detection. - * { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, - */ + { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, { 0x7cf, 0x480, RTL_GIGA_MAC_VER_35 }, /* 8168E family. */ From 39124c14af45b4c0b393c9d271697b17721ad84b Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 16 Aug 2022 23:19:11 +0300 Subject: [PATCH 320/543] net/mlx5: check attr pointer validity before dereferencing it [ Upstream commit e0bf81bf0d3d4747c146e0bf44774d3d881d7137 ] Fix attr pointer validity checks after it was already dereferenced. Fixes: cb0d54cbf948 ("net/mlx5e: Fix wrong source vport matching on tunnel rule") Signed-off-by: Ariel Levkovich Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8c6c9bcb3dc3..b4e263e8cfb8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -142,7 +142,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, if (mlx5_esw_indir_table_decap_vport(attr)) vport = mlx5_esw_indir_table_decap_vport(attr); - if (attr && !attr->chain && esw_attr->int_port) + if (!attr->chain && esw_attr && esw_attr->int_port) metadata = mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); else From 1f232fb61913f4ef527528330749d0b23d31c806 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 30 Aug 2022 00:32:30 +0300 Subject: [PATCH 321/543] net/mlx5e: TC, Keep mod hdr actions after mod hdr alloc [ Upstream commit 5e72f3f1c558019082cfeedeed73748f35d780c6 ] When offloading TC NIC rule which has mod_hdr action, the mod_hdr actions list is freed upon mod_hdr allocation. In the new format of handling multi table actions and CT in particular, the mod_hdr actions list is still relevant when setting the pre and post rules and therefore, freeing the list may cause adding rules which don't set the FTE_ID. Therefore, the mod_hdr actions list needs to be kept for the pre/post flows as well and should be left for these handler to be freed. Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions") Signed-off-by: Ariel Levkovich Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index bd9936af4582..4c313b7424bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1283,7 +1283,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1341,8 +1340,10 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, } mutex_unlock(&tc->t_lock); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); mlx5e_detach_mod_hdr(priv, flow); + } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(priv->mdev, attr->counter); From ddf458641a511e7dff19f3bf0cbbc5dd9fe08ce5 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 28 Nov 2022 19:05:47 +0200 Subject: [PATCH 322/543] net/mlx5: Fix command stats access after free [ Upstream commit da2e552b469a0cd130ff70a88ccc4139da428a65 ] Command may fail while driver is reloading and can't accept FW commands till command interface is reinitialized. Such command failure is being logged to command stats. This results in NULL pointer access as command stats structure is being freed and reallocated during mlx5 devlink reload (see kernel log below). Fix it by making command stats statically allocated on driver probe. Kernel log: [ 2394.808802] BUG: unable to handle kernel paging request at 000000000002a9c0 [ 2394.810610] PGD 0 P4D 0 [ 2394.811811] Oops: 0002 [#1] SMP NOPTI ... [ 2394.815482] RIP: 0010:native_queued_spin_lock_slowpath+0x183/0x1d0 ... [ 2394.829505] Call Trace: [ 2394.830667] _raw_spin_lock_irq+0x23/0x26 [ 2394.831858] cmd_status_err+0x55/0x110 [mlx5_core] [ 2394.833020] mlx5_access_reg+0xe7/0x150 [mlx5_core] [ 2394.834175] mlx5_query_port_ptys+0x78/0xa0 [mlx5_core] [ 2394.835337] mlx5e_ethtool_get_link_ksettings+0x74/0x590 [mlx5_core] [ 2394.836454] ? kmem_cache_alloc_trace+0x140/0x1c0 [ 2394.837562] __rh_call_get_link_ksettings+0x33/0x100 [ 2394.838663] ? __rtnl_unlock+0x25/0x50 [ 2394.839755] __ethtool_get_link_ksettings+0x72/0x150 [ 2394.840862] duplex_show+0x6e/0xc0 [ 2394.841963] dev_attr_show+0x1c/0x40 [ 2394.843048] sysfs_kf_seq_show+0x9b/0x100 [ 2394.844123] seq_read+0x153/0x410 [ 2394.845187] vfs_read+0x91/0x140 [ 2394.846226] ksys_read+0x4f/0xb0 [ 2394.847234] do_syscall_64+0x5b/0x1a0 [ 2394.848228] entry_SYSCALL_64_after_hwframe+0x65/0xca Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 13 ++----------- include/linux/mlx5/driver.h | 2 +- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e7a894ba5c3e..723891eb86ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -2177,15 +2177,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) return -EINVAL; } - cmd->stats = kvcalloc(MLX5_CMD_OP_MAX, sizeof(*cmd->stats), GFP_KERNEL); - if (!cmd->stats) - return -ENOMEM; - cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) { - err = -ENOMEM; - goto dma_pool_err; - } + if (!cmd->pool) + return -ENOMEM; err = alloc_cmd_page(dev, cmd); if (err) @@ -2269,8 +2263,6 @@ err_free_page: err_free_pool: dma_pool_destroy(cmd->pool); -dma_pool_err: - kvfree(cmd->stats); return err; } @@ -2283,7 +2275,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) destroy_msg_cache(dev); free_cmd_page(dev, cmd); dma_pool_destroy(cmd->pool); - kvfree(cmd->stats); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 06cbad166225..ad55470a9fb9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -315,7 +315,7 @@ struct mlx5_cmd { struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; - struct mlx5_cmd_stats *stats; + struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; }; struct mlx5_cmd_mailbox { From 798604a1bd6eb0a0d8ac539628d0f5ad1f50d112 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Wed, 4 Jan 2023 11:16:21 +0200 Subject: [PATCH 323/543] net/mlx5e: Verify dev is present for fix features ndo [ Upstream commit ab4b01bfdaa69492fb36484026b0a0f0af02d75a ] The native NIC port net device instance is being used as Uplink representor. While changing profiles private resources are not available, fix features ndo does not check if the netdev is present. Add driver protection to verify private resources are ready. Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode") Signed-off-by: Roy Novich Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 951ede433813..4dc149ef618c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4071,6 +4071,9 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, struct mlx5e_vlan_table *vlan; struct mlx5e_params *params; + if (!netif_device_present(netdev)) + return features; + vlan = mlx5e_fs_get_vlan(priv->fs); mutex_lock(&priv->state_lock); params = &priv->channels.params; From b5f108e202b74b3641dfcfcbacea096e159760dc Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 15 Dec 2022 13:02:38 +0200 Subject: [PATCH 324/543] net/mlx5e: IPoIB, Block queue count configuration when sub interfaces are present [ Upstream commit 806a8df7126a8c05d60411eeb81057c2a8bbe7a7 ] PKEY sub interfaces share the receive queues with the parent interface. While setting the sub interface queue count is not supported, it is currently possible to change the number of queues of the parent interface. Thus we can end up with inconsistent queue sizes between the parent and its sub interfaces. This change disallows setting the queue count on the parent interface when sub interfaces are present. This is achieved by introducing an explicit reference to the parent netdev in the mlx5i_priv of the child interface. An additional counter is also required on the parent side to detect when sub interfaces are attached and for proper cleanup. The rtnl lock is taken during the ethtool op and the sub interface ndo_init/uninit ops. There is no race here around counting the sub interfaces, reading the sub interfaces and setting the number of channels. The ASSERT_RTNL was added to document that. Fixes: be98737a4faa ("net/mlx5e: Use dynamic per-channel allocations in stats") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/ipoib/ethtool.c | 16 +++++++- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 38 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 6 +++ .../mellanox/mlx5/core/ipoib/ipoib_vlan.c | 9 ++--- 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index c247cca154e9..eff92dc0927c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -90,9 +90,21 @@ static void mlx5i_get_ringparam(struct net_device *dev, static int mlx5i_set_channels(struct net_device *dev, struct ethtool_channels *ch) { - struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = netdev_priv(dev); + struct mlx5e_priv *epriv = mlx5i_epriv(dev); - return mlx5e_ethtool_set_channels(priv, ch); + /* rtnl lock protects from race between this ethtool op and sub + * interface ndo_init/uninit. + */ + ASSERT_RTNL(); + if (ipriv->num_sub_interfaces > 0) { + mlx5_core_warn(epriv->mdev, + "can't change number of channels for interfaces with sub interfaces (%u)\n", + ipriv->num_sub_interfaces); + return -EINVAL; + } + + return mlx5e_ethtool_set_channels(epriv, ch); } static void mlx5i_get_channels(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 84f5352b0ce1..038ae0fcf9d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -160,6 +160,44 @@ void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_dropped = sstats->tx_queue_dropped; } +struct net_device *mlx5i_parent_get(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + parent_ifindex = netdev->netdev_ops->ndo_get_iflink(netdev); + parent_dev = dev_get_by_index(dev_net(netdev), parent_ifindex); + if (!parent_dev) + return NULL; + + parent_ipriv = netdev_priv(parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces++; + + ipriv->parent_dev = parent_dev; + + return parent_dev; +} + +void mlx5i_parent_put(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + + ipriv = priv->ppriv; + parent_ipriv = netdev_priv(ipriv->parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces--; + + dev_put(ipriv->parent_dev); +} + int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 99d46fda9f82..f3f2af972020 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -54,9 +54,11 @@ struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ u32 qpn; bool sub_interface; + u32 num_sub_interfaces; u32 qkey; u16 pkey_index; struct mlx5i_pkey_qpn_ht *qpn_htbl; + struct net_device *parent_dev; char *mlx5e_priv[]; }; @@ -117,5 +119,9 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more); void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +/* Reference management for child to parent interfaces. */ +struct net_device *mlx5i_parent_get(struct net_device *netdev); +void mlx5i_parent_put(struct net_device *netdev); + #endif /* CONFIG_MLX5_CORE_IPOIB */ #endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 0227a521d301..3d31c59e69d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -158,21 +158,19 @@ static int mlx5i_pkey_dev_init(struct net_device *dev) struct mlx5e_priv *priv = mlx5i_epriv(dev); struct mlx5i_priv *ipriv, *parent_ipriv; struct net_device *parent_dev; - int parent_ifindex; ipriv = priv->ppriv; - /* Get QPN to netdevice hash table from parent */ - parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev); - parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex); + /* Link to parent */ + parent_dev = mlx5i_parent_get(dev); if (!parent_dev) { mlx5_core_warn(priv->mdev, "failed to get parent device\n"); return -EINVAL; } + /* Get QPN to netdevice hash table from parent */ parent_ipriv = netdev_priv(parent_dev); ipriv->qpn_htbl = parent_ipriv->qpn_htbl; - dev_put(parent_dev); return mlx5i_dev_init(dev); } @@ -184,6 +182,7 @@ static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) { + mlx5i_parent_put(netdev); return mlx5i_dev_cleanup(netdev); } From 5844a46f09f768da866d6b0ffbf1a9073266bf24 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 25 Nov 2022 17:51:19 +0200 Subject: [PATCH 325/543] net/mlx5e: IPoIB, Block PKEY interfaces with less rx queues than parent [ Upstream commit 31c70bfe58ef09fe36327ddcced9143a16e9e83d ] A user is able to configure an arbitrary number of rx queues when creating an interface via netlink. This doesn't work for child PKEY interfaces because the child interface uses the parent receive channels. Although the child shares the parent's receive channels, the number of rx queues is important for the channel_stats array: the parent's rx channel index is used to access the child's channel_stats. So the array has to be at least as large as the parent's rx queue size for the counting to work correctly and to prevent out of bound accesses. This patch checks for the mentioned scenario and returns an error when trying to create the interface. The error is propagated to the user. Fixes: be98737a4faa ("net/mlx5e: Use dynamic per-channel allocations in stats") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 3d31c59e69d4..0cf4eaf852d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -168,6 +168,15 @@ static int mlx5i_pkey_dev_init(struct net_device *dev) return -EINVAL; } + if (dev->num_rx_queues < parent_dev->real_num_rx_queues) { + mlx5_core_warn(priv->mdev, + "failed to create child device with rx queues [%d] less than parent's [%d]\n", + dev->num_rx_queues, + parent_dev->real_num_rx_queues); + mlx5i_parent_put(dev); + return -EINVAL; + } + /* Get QPN to netdevice hash table from parent */ parent_ipriv = netdev_priv(parent_dev); ipriv->qpn_htbl = parent_ipriv->qpn_htbl; From 0fa15a13670d91ed15c6d0b552404db3fc79642b Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 23 Nov 2022 16:59:13 +0200 Subject: [PATCH 326/543] net/mlx5e: IPoIB, Fix child PKEY interface stats on rx path [ Upstream commit b5e23931c45a2f99f60a2f2b98a9e4d5a62a5b13 ] The current code always does the accounting using the stats from the parent interface (linked in the rq). This doesn't work when there are child interfaces configured. Fix this behavior by always using the stats from the child interface priv. This will also work for parent only interfaces: the child (netdev) and parent netdev (rq->netdev) will point to the same thing. Fixes: be98737a4faa ("net/mlx5e: Use dynamic per-channel allocations in stats") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index a61a43fc8d5c..56d1bd22c7c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2300,7 +2300,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, priv = mlx5i_epriv(netdev); tstamp = &priv->tstamp; - stats = rq->stats; + stats = &priv->channel_stats[rq->ix]->rq; flags_rqpn = be32_to_cpu(cqe->flags_rqpn); g = (flags_rqpn >> 28) & 3; From f594f740c3523abe1067c684a832c47eb4f01745 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 5 Dec 2022 14:26:09 -0800 Subject: [PATCH 327/543] net/mlx5: Fix ptp max frequency adjustment range [ Upstream commit fe91d57277eef8bb4aca05acfa337b4a51d0bba4 ] .max_adj of ptp_clock_info acts as an absolute value for the amount in ppb that can be set for a single call of .adjfine. This means that a single call to .getfine cannot be greater than .max_adj or less than -(.max_adj). Provides correct value for max frequency adjustment value supported by devices. Fixes: 3d8c38af1493 ("net/mlx5e: Add PTP Hardware Clock (PHC) support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index d3a9ae80fd30..d7ddfc489536 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -691,7 +691,7 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, static const struct ptp_clock_info mlx5_ptp_clock_info = { .owner = THIS_MODULE, .name = "mlx5_ptp", - .max_adj = 100000000, + .max_adj = 50000000, .n_alarm = 0, .n_ext_ts = 0, .n_per_out = 0, From e02190787f1141935784c1015b964fa39370c686 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Tue, 27 Dec 2022 04:54:09 +0200 Subject: [PATCH 328/543] net/mlx5e: Don't support encap rules with gbp option [ Upstream commit d515d63cae2cd186acf40deaa8ef33067bb7f637 ] Previously, encap rules with gbp option would be offloaded by mistake but driver does not support gbp option offload. To fix this issue, check if the encap rule has gbp option and don't offload the rule Fixes: d8f9dfae49ce ("net: sched: allow flower to match vxlan options") Signed-off-by: Gavin Li Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index fd07c4cbfd1d..1f62c702b625 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -88,6 +88,8 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], struct udphdr *udp = (struct udphdr *)(buf); struct vxlanhdr *vxh; + if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) + return -EOPNOTSUPP; vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); *ip_proto = IPPROTO_UDP; From 9be4e25bdc1fb6bacde61e3e35e276c821a485a0 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Wed, 14 Dec 2022 16:34:13 +0200 Subject: [PATCH 329/543] net/mlx5e: Fix macsec ssci attribute handling in offload path [ Upstream commit f5e1ed04aa2ea665a796f0109091ca3f2b01024a ] Currently when macsec offload is set with extended packet number (epn) enabled, the driver wrongly deduce the short secure channel identifier (ssci) from the salt instead of the stand alone ssci attribute as it should, consequently creating a mismatch between the kernel and driver's ssci values. Fix by using the ssci value from the relevant attribute. Fixes: 4411a6c0abd3 ("net/mlx5e: Support MACsec offload extended packet number (EPN)") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index f900709639f6..7c0085ba2fc5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -62,6 +62,7 @@ struct mlx5e_macsec_sa { u32 enc_key_id; u32 next_pn; sci_t sci; + ssci_t ssci; salt_t salt; struct rhash_head hash; @@ -499,10 +500,11 @@ mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec, } static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key, - const pn_t *next_pn_halves) + const pn_t *next_pn_halves, ssci_t ssci) { struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state; + sa->ssci = ssci; sa->salt = key->salt; epn_state->epn_enabled = 1; epn_state->epn_msb = next_pn_halves->upper; @@ -550,7 +552,8 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) tx_sa->assoc_num = assoc_num; if (secy->xpn) - update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves); + update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves, + ctx_tx_sa->ssci); err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len, MLX5_ACCEL_OBJ_MACSEC_KEY, @@ -945,7 +948,8 @@ static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id; if (ctx->secy->xpn) - update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves); + update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves, + ctx_rx_sa->ssci); err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len, MLX5_ACCEL_OBJ_MACSEC_KEY, From 514d9c6a39213d8200884e70f60ce7faef1ee597 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Sun, 11 Dec 2022 13:22:23 +0200 Subject: [PATCH 330/543] net/mlx5e: Fix macsec possible null dereference when updating MAC security entity (SecY) [ Upstream commit 9828994ac492e8e7de47fe66097b7e665328f348 ] Upon updating MAC security entity (SecY) in hw offload path, the macsec security association (SA) initialization routine is called. In case of extended packet number (epn) is enabled the salt and ssci attributes are retrieved using the MACsec driver rx_sa context which is unavailable when updating a SecY property such as encoding-sa hence the null dereference. Fix by using the provided SA to set those attributes. Fixes: 4411a6c0abd3 ("net/mlx5e: Support MACsec offload extended packet number (EPN)") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 7c0085ba2fc5..b92d541b5286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -359,7 +359,6 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_macsec_obj_attrs obj_attrs; union mlx5e_macsec_rule *macsec_rule; - struct macsec_key *key; int err; obj_attrs.next_pn = sa->next_pn; @@ -369,13 +368,9 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, obj_attrs.aso_pdn = macsec->aso.pdn; obj_attrs.epn_state = sa->epn_state; - key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key; - if (sa->epn_state.epn_enabled) { - obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) : - cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); - - memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); + obj_attrs.ssci = cpu_to_be32((__force u32)sa->ssci); + memcpy(&obj_attrs.salt, &sa->salt, sizeof(sa->salt)); } obj_attrs.replay_window = ctx->secy->replay_window; From 1621f9a5394880e765f28f1e3401ba582b2ab2ee Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:41 +0100 Subject: [PATCH 331/543] selftests/net: l2_tos_ttl_inherit.sh: Set IPv6 addresses with "nodad". [ Upstream commit e59370b2e96eb8e7e057a2a16e999ff385a3f2fb ] The ping command can run before DAD completes. In that case, ping may fail and break the selftest. We don't need DAD here since we're working on isolated device pairs. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- tools/testing/selftests/net/l2_tos_ttl_inherit.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index dca1e6f777a8..e2574b08eabc 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -137,8 +137,8 @@ setup() { if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 + ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 nodad + $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 nodad ip link add name tep0 type $type $local_addr1 \ remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ $vxlan $geneve @@ -170,8 +170,8 @@ setup() { ip addr add 198.19.0.1/24 brd + dev ${parent}0 $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 + ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 nodad + $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 nodad fi } From 2440f74a9967460b87b6ba74203a81515e049cf7 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:46 +0100 Subject: [PATCH 332/543] selftests/net: l2_tos_ttl_inherit.sh: Run tests in their own netns. [ Upstream commit c53cb00f7983a5474f2d36967f84908b85af9159 ] This selftest currently runs half in the current namespace and half in a netns of its own. Therefore, the test can fail if the current namespace is already configured with incompatible parameters (for example if it already has a veth0 interface). Adapt the script to put both ends of the veth pair in their own netns. Now veth0 is created in NS0 instead of the current namespace, while veth1 is set up in NS1 (instead of the 'testing' netns). The user visible netns names are randomised to minimise the risk of conflicts with already existing namespaces. The cleanup() function doesn't need to remove the virtual interface anymore: deleting NS0 and NS1 automatically removes the virtual interfaces they contained. We can remove $ns, which was only used to run ip commands in the 'testing' netns (let's use the builtin "-netns" option instead). However, we still need a similar functionality as ping and tcpdump now need to run in NS0. So we now have $RUN_NS0 for that. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../selftests/net/l2_tos_ttl_inherit.sh | 162 ++++++++++-------- 1 file changed, 93 insertions(+), 69 deletions(-) diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index e2574b08eabc..cf56680d598f 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -25,6 +25,11 @@ expected_tos="0x00" expected_ttl="0" failed=false +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) + +RUN_NS0="ip netns exec ${NS0}" + get_random_tos() { # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4 echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\ @@ -61,7 +66,6 @@ setup() { local vlan="$5" local test_tos="0x00" local test_ttl="0" - local ns="ip netns exec testing" # We don't want a test-tos of 0x00, # because this is the value that we get when no tos is set. @@ -94,14 +98,15 @@ setup() { printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan" - # Create 'testing' netns, veth pair and connect main ns with testing ns - ip netns add testing - ip link add type veth - ip link set veth1 netns testing - ip link set veth0 up - $ns ip link set veth1 up - ip addr flush dev veth0 - $ns ip addr flush dev veth1 + # Create netns NS0 and NS1 and connect them with a veth pair + ip netns add "${NS0}" + ip netns add "${NS1}" + ip link add name veth0 netns "${NS0}" type veth \ + peer name veth1 netns "${NS1}" + ip -netns "${NS0}" link set dev veth0 up + ip -netns "${NS1}" link set dev veth1 up + ip -netns "${NS0}" address flush dev veth0 + ip -netns "${NS1}" address flush dev veth1 local local_addr1="" local local_addr2="" @@ -127,51 +132,59 @@ setup() { if [ "$type" = "gre" ]; then type="gretap" fi - ip addr add 198.18.0.1/24 dev veth0 - $ns ip addr add 198.18.0.2/24 dev veth1 - ip link add name tep0 type $type $local_addr1 remote \ - 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 remote \ - 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve + ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0 + ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1 + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote 198.18.0.2 tos $test_tos ttl $test_ttl \ + $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote 198.18.0.1 tos $test_tos ttl $test_ttl \ + $vxlan $geneve elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 nodad - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 nodad - ip link add name tep0 type $type $local_addr1 \ - remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ - $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 \ - remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \ - $vxlan $geneve + ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \ + dev veth0 nodad + ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \ + dev veth1 nodad + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote fdd1:ced0:5d88:3fce::2 tos $test_tos \ + ttl $test_ttl $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote fdd1:ced0:5d88:3fce::1 tos $test_tos \ + ttl $test_ttl $vxlan $geneve fi # Bring L2-tunnel link up and create VLAN on top - ip link set tep0 up - $ns ip link set tep1 up - ip addr flush dev tep0 - $ns ip addr flush dev tep1 + ip -netns "${NS0}" link set tep0 up + ip -netns "${NS1}" link set tep1 up + ip -netns "${NS0}" address flush dev tep0 + ip -netns "${NS1}" address flush dev tep1 local parent if $vlan; then parent="vlan99-" - ip link add link tep0 name ${parent}0 type vlan id 99 - $ns ip link add link tep1 name ${parent}1 type vlan id 99 - ip link set ${parent}0 up - $ns ip link set ${parent}1 up - ip addr flush dev ${parent}0 - $ns ip addr flush dev ${parent}1 + ip -netns "${NS0}" link add link tep0 name ${parent}0 \ + type vlan id 99 + ip -netns "${NS1}" link add link tep1 name ${parent}1 \ + type vlan id 99 + ip -netns "${NS0}" link set dev ${parent}0 up + ip -netns "${NS1}" link set dev ${parent}1 up + ip -netns "${NS0}" address flush dev ${parent}0 + ip -netns "${NS1}" address flush dev ${parent}1 else parent="tep" fi # Assign inner IPv4/IPv6 addresses if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then - ip addr add 198.19.0.1/24 brd + dev ${parent}0 - $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 + ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0 + ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 nodad - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 nodad + ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \ + dev ${parent}0 nodad + ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \ + dev ${parent}1 nodad fi } @@ -192,10 +205,10 @@ verify() { ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6 fi if [ "$tos_ttl" = "inherit" ]; then - ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \ - 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \ + -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!" else - ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" fi local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset if [ "$type" = "gre" ]; then @@ -216,10 +229,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x01 and \ - ip[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x01 and \ + ip[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then req_proto_offset="44" req_offset="78" @@ -231,10 +246,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x3a and \ - ip[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x3a and \ + ip[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then req_proto_offset="36" req_offset="45" @@ -250,11 +267,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x08 and \ - ip[$((req_proto_offset + 1))] = 0x06 and \ - ip[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x08 and \ + ip[$((req_proto_offset + 1))] = 0x06 and \ + ip[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then @@ -273,10 +292,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x01 and \ - ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x01 and \ + ip6[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then local req_proto_offset="72" local req_offset="106" @@ -288,10 +309,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x3a and \ - ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x3a and \ + ip6[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then local req_proto_offset="64" local req_offset="73" @@ -307,11 +330,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x08 and \ - ip6[$((req_proto_offset + 1))] = 0x06 and \ - ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x08 and \ + ip6[$((req_proto_offset + 1))] = 0x06 and \ + ip6[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi fi kill -9 $ping_pid @@ -351,9 +376,8 @@ verify() { } cleanup() { - ip link del veth0 2>/dev/null - ip netns del testing 2>/dev/null - ip link del tep0 2>/dev/null + ip netns del "${NS0}" 2>/dev/null + ip netns del "${NS1}" 2>/dev/null } printf "┌────────┬───────┬───────┬──────────────┬" From 0daf5f4a1eef482be8f59c6d0b39a413dbaf235e Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:50 +0100 Subject: [PATCH 333/543] selftests/net: l2_tos_ttl_inherit.sh: Ensure environment cleanup on failure. [ Upstream commit d68ff8ad3351b8fc8d6f14b9a4f5cc8ba3e8bd13 ] Use 'set -e' and an exit handler to stop the script if a command fails and ensure the test environment is cleaned up in any case. Also, handle the case where the script is interrupted by SIGINT. The only command that's expected to fail is 'wait $ping_pid', since it's killed by the script. Handle this case with '|| true' to make it play well with 'set -e'. Finally, return the Kselftest SKIP code (4) when the script breaks because of an environment problem or a command line failure. The 0 and 1 return codes should now reliably indicate that all tests have been run (0: all tests run and passed, 1: all tests run but at least one failed, 4: test script didn't run completely). Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../selftests/net/l2_tos_ttl_inherit.sh | 40 +++++++++++++++++-- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index cf56680d598f..f11756e7df2f 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -12,13 +12,16 @@ # In addition this script also checks if forcing a specific field in the # outer header is working. +# Return 4 by default (Kselftest SKIP code) +ERR=4 + if [ "$(id -u)" != "0" ]; then echo "Please run as root." - exit 0 + exit $ERR fi if ! which tcpdump > /dev/null 2>&1; then echo "No tcpdump found. Required for this test." - exit 0 + exit $ERR fi expected_tos="0x00" @@ -340,7 +343,7 @@ verify() { fi fi kill -9 $ping_pid - wait $ping_pid 2>/dev/null + wait $ping_pid 2>/dev/null || true result="FAIL" if [ "$outer" = "4" ]; then captured_ttl="$(get_field "ttl" "$out")" @@ -380,6 +383,31 @@ cleanup() { ip netns del "${NS1}" 2>/dev/null } +exit_handler() { + # Don't exit immediately if one of the intermediate commands fails. + # We might be called at the end of the script, when the network + # namespaces have already been deleted. So cleanup() may fail, but we + # still need to run until 'exit $ERR' or the script won't return the + # correct error code. + set +e + + cleanup + + exit $ERR +} + +# Restore the default SIGINT handler (just in case) and exit. +# The exit handler will take care of cleaning everything up. +interrupted() { + trap - INT + + exit $ERR +} + +set -e +trap exit_handler EXIT +trap interrupted INT + printf "┌────────┬───────┬───────┬──────────────┬" printf "──────────────┬───────┬────────┐\n" for type in gre vxlan geneve; do @@ -409,6 +437,10 @@ done printf "└────────┴───────┴───────┴──────────────┴" printf "──────────────┴───────┴────────┘\n" +# All tests done. +# Set ERR appropriately: it will be returned by the exit handler. if $failed; then - exit 1 + ERR=1 +else + ERR=0 fi From c8ca0ad10df08ea36bcac1288062d567d22604c9 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Mon, 9 Jan 2023 11:43:25 +0530 Subject: [PATCH 334/543] octeontx2-pf: Fix resource leakage in VF driver unbind [ Upstream commit 53da7aec32982f5ee775b69dce06d63992ce4af3 ] resources allocated like mcam entries to support the Ntuple feature and hash tables for the tc feature are not getting freed in driver unbind. This patch fixes the issue. Fixes: 2da489432747 ("octeontx2-pf: devlink params support to set mcam entry count") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Link: https://lore.kernel.org/r/20230109061325.21395-1-hkelam@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 86653bb8e403..7f8ffbf79cf7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -758,6 +758,8 @@ static void otx2vf_remove(struct pci_dev *pdev) if (vf->otx2_wq) destroy_workqueue(vf->otx2_wq); otx2_ptp_destroy(vf); + otx2_mcam_flow_del(vf); + otx2_shutdown_tc(vf); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) From d4bc9f017f3163348bfa7c2f76b2ab2058d12e72 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 6 Jan 2023 07:13:19 -0800 Subject: [PATCH 335/543] perf build: Properly guard libbpf includes [ Upstream commit d891f2b724b39a2a41e3ad7b57110193993242ff ] Including libbpf header files should be guarded by HAVE_LIBBPF_SUPPORT. In bpf_counter.h, move the skeleton utilities under HAVE_BPF_SKEL. Fixes: d6a735ef3277c45f ("perf bpf_counter: Move common functions to bpf_counter.h") Reported-by: Mike Leach Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Tested-by: Jiri Olsa Tested-by: Mike Leach Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20230105172243.7238-1-mike.leach@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-trace.c | 2 ++ tools/perf/util/bpf_counter.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 3dcf6aed1ef7..97b17f8941dc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -17,7 +17,9 @@ #include "util/record.h" #include #include +#ifdef HAVE_LIBBPF_SUPPORT #include +#endif #include "util/bpf_map.h" #include "util/rlimit.h" #include "builtin.h" diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 4dbf26408b69..c6d21c07b14c 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -4,9 +4,12 @@ #include #include + +#ifdef HAVE_LIBBPF_SUPPORT #include #include #include +#endif struct evsel; struct target; @@ -87,6 +90,8 @@ static inline void set_max_rlimit(void) setrlimit(RLIMIT_MEMLOCK, &rinf); } +#ifdef HAVE_BPF_SKEL + static inline __u32 bpf_link_get_id(int fd) { struct bpf_link_info link_info = { .id = 0, }; @@ -127,5 +132,6 @@ static inline int bperf_trigger_reading(int prog_fd, int cpu) return bpf_prog_test_run_opts(prog_fd, &opts); } +#endif /* HAVE_BPF_SKEL */ #endif /* __PERF_BPF_COUNTER_H */ From 4e81a8a47dc667574afe2d1ee825b9a130b9fe0f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 8 Jan 2023 14:23:59 +0800 Subject: [PATCH 336/543] perf kmem: Support legacy tracepoints [ Upstream commit b3719108ae60169eda5c941ca5e1be1faa371c57 ] Commit 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") removed tracepoints 'kmalloc_node' and 'kmem_cache_alloc_node', we need to consider the tool should be backward compatible. If it detect the tracepoint "kmem:kmalloc_node", this patch enables the legacy tracepoints, otherwise, it will ignore them. Fixes: 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") Reported-by: Ravi Bangoria Reviewed-by: James Clark Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vlastimil Babka Link: https://lore.kernel.org/r/20230108062400.250690-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-kmem.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ebfab2ca1702..63c759edb8bc 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1823,6 +1823,19 @@ static int parse_line_opt(const struct option *opt __maybe_unused, return 0; } +static bool slab_legacy_tp_is_exposed(void) +{ + /* + * The tracepoints "kmem:kmalloc_node" and + * "kmem:kmem_cache_alloc_node" have been removed on the latest + * kernel, if the tracepoint "kmem:kmalloc_node" is existed it + * means the tool is running on an old kernel, we need to + * rollback to support these legacy tracepoints. + */ + return IS_ERR(trace_event__tp_format("kmem", "kmalloc_node")) ? + false : true; +} + static int __cmd_record(int argc, const char **argv) { const char * const record_args[] = { @@ -1830,22 +1843,28 @@ static int __cmd_record(int argc, const char **argv) }; const char * const slab_events[] = { "-e", "kmem:kmalloc", - "-e", "kmem:kmalloc_node", "-e", "kmem:kfree", "-e", "kmem:kmem_cache_alloc", - "-e", "kmem:kmem_cache_alloc_node", "-e", "kmem:kmem_cache_free", }; + const char * const slab_legacy_events[] = { + "-e", "kmem:kmalloc_node", + "-e", "kmem:kmem_cache_alloc_node", + }; const char * const page_events[] = { "-e", "kmem:mm_page_alloc", "-e", "kmem:mm_page_free", }; unsigned int rec_argc, i, j; const char **rec_argv; + unsigned int slab_legacy_tp_exposed = slab_legacy_tp_is_exposed(); rec_argc = ARRAY_SIZE(record_args) + argc - 1; - if (kmem_slab) + if (kmem_slab) { rec_argc += ARRAY_SIZE(slab_events); + if (slab_legacy_tp_exposed) + rec_argc += ARRAY_SIZE(slab_legacy_events); + } if (kmem_page) rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */ @@ -1860,6 +1879,10 @@ static int __cmd_record(int argc, const char **argv) if (kmem_slab) { for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) rec_argv[i] = strdup(slab_events[j]); + if (slab_legacy_tp_exposed) { + for (j = 0; j < ARRAY_SIZE(slab_legacy_events); j++, i++) + rec_argv[i] = strdup(slab_legacy_events[j]); + } } if (kmem_page) { rec_argv[i++] = strdup("-g"); From 8c3dc300617cc7ceae0335c8363d4e03a0a92e54 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 8 Jan 2023 14:24:00 +0800 Subject: [PATCH 337/543] perf kmem: Support field "node" in evsel__process_alloc_event() coping with recent tracepoint restructuring [ Upstream commit dce088ab0d51ae3b14fb2bd608e9c649aadfe5dc ] Commit 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") adds the field "node" into the tracepoints 'kmalloc' and 'kmem_cache_alloc', so this patch modifies the event process function to support the field "node". If field "node" is detected by checking function evsel__field(), it stats the cross allocation. When the "node" value is NUMA_NO_NODE (-1), it means the memory can be allocated from any memory node, in this case, we don't account it as a cross allocation. Fixes: 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") Reported-by: Ravi Bangoria Reviewed-by: James Clark Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vlastimil Babka Link: https://lore.kernel.org/r/20230108062400.250690-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-kmem.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 63c759edb8bc..40dd52acc48a 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include +#include #include #include #include @@ -184,22 +185,33 @@ static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *s total_allocated += bytes_alloc; nr_allocs++; - return 0; -} -static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample) -{ - int ret = evsel__process_alloc_event(evsel, sample); + /* + * Commit 11e9734bcb6a ("mm/slab_common: unify NUMA and UMA + * version of tracepoints") adds the field "node" into the + * tracepoints 'kmalloc' and 'kmem_cache_alloc'. + * + * The legacy tracepoints 'kmalloc_node' and 'kmem_cache_alloc_node' + * also contain the field "node". + * + * If the tracepoint contains the field "node" the tool stats the + * cross allocation. + */ + if (evsel__field(evsel, "node")) { + int node1, node2; - if (!ret) { - int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}), - node2 = evsel__intval(evsel, sample, "node"); + node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}); + node2 = evsel__intval(evsel, sample, "node"); - if (node1 != node2) + /* + * If the field "node" is NUMA_NO_NODE (-1), we don't take it + * as a cross allocation. + */ + if ((node2 != NUMA_NO_NODE) && (node1 != node2)) nr_cross_allocs++; } - return ret; + return 0; } static int ptr_cmp(void *, void *); @@ -1368,8 +1380,8 @@ static int __cmd_kmem(struct perf_session *session) /* slab allocator */ { "kmem:kmalloc", evsel__process_alloc_event, }, { "kmem:kmem_cache_alloc", evsel__process_alloc_event, }, - { "kmem:kmalloc_node", evsel__process_alloc_node_event, }, - { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, }, + { "kmem:kmalloc_node", evsel__process_alloc_event, }, + { "kmem:kmem_cache_alloc_node", evsel__process_alloc_event, }, { "kmem:kfree", evsel__process_free_event, }, { "kmem:kmem_cache_free", evsel__process_free_event, }, /* page allocator */ From bbc17e883cbad205d9d45b215b41d3dbad03168f Mon Sep 17 00:00:00 2001 From: Christopher S Hall Date: Wed, 14 Dec 2022 16:10:38 +0800 Subject: [PATCH 338/543] igc: Fix PPS delta between two synchronized end-points [ Upstream commit 5e91c72e560cc85f7163bbe3d14197268de31383 ] This patch fix the pulse per second output delta between two synchronized end-points. Based on Intel Discrete I225 Software User Manual Section 4.2.15 TimeSync Auxiliary Control Register, ST0[Bit 4] and ST1[Bit 7] must be set to ensure that clock output will be toggles based on frequency value defined. This is to ensure that output of the PPS is aligned with the clock. How to test: 1) Running time synchronization on both end points. Ex: ptp4l --step_threshold=1 -m -f gPTP.cfg -i 2) Configure PPS output using below command for both end-points Ex: SDP0 on I225 REV4 SKU variant ./testptp -d /dev/ptp0 -L 0,2 ./testptp -d /dev/ptp0 -p 1000000000 3) Measure the output using analyzer for both end-points Fixes: 87938851b6ef ("igc: enable auxiliary PHC functions for the i225") Signed-off-by: Christopher S Hall Signed-off-by: Muhammad Husaini Zulkifli Acked-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_defines.h | 2 ++ drivers/net/ethernet/intel/igc/igc_ptp.c | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 4ad35fbdc02e..dbfa4b9dee06 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -466,7 +466,9 @@ #define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */ #define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */ #define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */ +#define IGC_TSAUXC_ST0 BIT(4) /* Start Clock 0 Toggle on Target Time 0. */ #define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */ +#define IGC_TSAUXC_ST1 BIT(7) /* Start Clock 1 Toggle on Target Time 1. */ #define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */ #define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */ #define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 8dbb9f903ca7..c34734d432e0 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -322,7 +322,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, ts = ns_to_timespec64(ns); if (rq->perout.index == 1) { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK1; + tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT1; @@ -333,7 +333,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, freqout = IGC_FREQOUT1; } else { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK0; + tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT0; @@ -347,10 +347,12 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, tsauxc = rd32(IGC_TSAUXC); tsim = rd32(IGC_TSIM); if (rq->perout.index == 1) { - tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1); + tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 | + IGC_TSAUXC_ST1); tsim &= ~IGC_TSICR_TT1; } else { - tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0); + tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 | + IGC_TSAUXC_ST0); tsim &= ~IGC_TSICR_TT0; } if (on) { From c2258d55935c23095e545c7e36f08d8b3433969b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Mon, 9 Jan 2023 16:32:23 +0100 Subject: [PATCH 339/543] net: lan966x: check for ptp to be enabled in lan966x_ptp_deinit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b0e380b5d4275299adf43e249f18309331b6f54f ] If ptp was not enabled due to missing IRQ for instance, lan966x_ptp_deinit() will dereference NULL pointers. Fixes: d096459494a8 ("net: lan966x: Add support for ptp clocks") Signed-off-by: Clément Léger Reviewed-by: Horatiu Vultur Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index e5a2bbe064f8..8e368318558a 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -853,6 +853,9 @@ void lan966x_ptp_deinit(struct lan966x *lan966x) struct lan966x_port *port; int i; + if (!lan966x->ptp) + return; + for (i = 0; i < lan966x->num_phys_ports; i++) { port = lan966x->ports[i]; if (!port) From 4fe577ad7b29413f7a64cd9992c51ee1239a27a3 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Tue, 10 Jan 2023 19:53:59 +0800 Subject: [PATCH 340/543] net: hns3: fix wrong use of rss size during VF rss config [ Upstream commit ae9f29fdfd827ad06c1ae8155c042245a9d00757 ] Currently, it used old rss size to get current tc mode. As a result, the rss size is updated, but the tc mode is still configured based on the old rss size. So this patch fixes it by using the new rss size in both process. Fixes: 93969dc14fcd ("net: hns3: refactor VF rss init APIs with new common rss init APIs") Signed-off-by: Jie Wang Signed-off-by: Hao Lan Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230110115359.10163-1-lanhao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 081bd2c3f289..e84e5be8e59e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3130,7 +3130,7 @@ static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, hclgevf_update_rss_size(handle, new_tqps_num); - hclge_comm_get_rss_tc_info(cur_rss_size, hdev->hw_tc_map, + hclge_comm_get_rss_tc_info(kinfo->rss_size, hdev->hw_tc_map, tc_offset, tc_valid, tc_size); ret = hclge_comm_set_rss_tc_mode(&hdev->hw.hw, tc_offset, tc_valid, tc_size); From de76fc134f74fa1005b7de2bf3841b503ad1d971 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jan 2023 20:25:47 -0800 Subject: [PATCH 341/543] bnxt: make sure we return pages to the pool [ Upstream commit 97f5e03a4a27d27ee4fed0cdb1658c81cf2784db ] Before the commit under Fixes the page would have been released from the pool before the napi_alloc_skb() call, so normal page freeing was fine (released page == no longer in the pool). After the change we just mark the page for recycling so it's still in the pool if the skb alloc fails, we need to recycle. Same commit added the same bug in the new bnxt_rx_multi_page_skb(). Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") Reviewed-by: Andy Gospodarek Link: https://lore.kernel.org/r/20230111042547.987749-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f5a8bae8d79a..edca16b5f9e3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -990,7 +990,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, DMA_ATTR_WEAK_ORDERING); skb = build_skb(page_address(page), PAGE_SIZE); if (!skb) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } skb_mark_for_recycle(skb); @@ -1028,7 +1028,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, skb = napi_alloc_skb(&rxr->bnapi->napi, payload); if (!skb) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } From 50b3cdf8239b11545f311c4f7b89e0092e4feedb Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Tue, 20 Dec 2022 18:56:07 +0100 Subject: [PATCH 342/543] platform/surface: aggregator: Add missing call to ssam_request_sync_free() [ Upstream commit c965daac370f08a9b71d573a71d13cda76f2a884 ] Although rare, ssam_request_sync_init() can fail. In that case, the request should be freed via ssam_request_sync_free(). Currently it is leaked instead. Fix this. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221220175608.1436273-1-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/surface/aggregator/controller.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 43e765199137..c6537a1b3a2e 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -1700,8 +1700,10 @@ int ssam_request_sync(struct ssam_controller *ctrl, return status; status = ssam_request_sync_init(rqst, spec->flags); - if (status) + if (status) { + ssam_request_sync_free(rqst); return status; + } ssam_request_sync_set_resp(rqst, rsp); From 3944162821295993ec89992dec98ab6be6306cc0 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 11:25:33 +0400 Subject: [PATCH 343/543] platform/x86/amd: Fix refcount leak in amd_pmc_probe [ Upstream commit ccb32e2be14271a60e9ba89c6d5660cc9998773c ] pci_get_domain_bus_and_slot() takes reference, the caller should release the reference by calling pci_dev_put() after use. Call pci_dev_put() in the error path to fix this. Fixes: 3d7d407dfb05 ("platform/x86: amd-pmc: Add support for AMD Spill to DRAM STB feature") Signed-off-by: Miaoqian Lin Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20221229072534.1381432-1-linmq006@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/amd/pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index 439d282aafd1..8d924986381b 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -932,7 +932,7 @@ static int amd_pmc_probe(struct platform_device *pdev) if (enable_stb && (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB)) { err = amd_pmc_s2d_init(dev); if (err) - return err; + goto err_pci_dev_put; } platform_set_drvdata(pdev, dev); From a474d4ad59cd4642d1b7e3a6c08cef9eca0992c8 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 13 Jan 2023 09:53:11 +0100 Subject: [PATCH 344/543] ALSA: usb-audio: Fix possible NULL pointer dereference in snd_usb_pcm_has_fixed_rate() [ Upstream commit 92a9c0ad86d47ff4cce899012e355c400f02cfb8 ] The subs function argument may be NULL, so do not use it before the NULL check. Fixes: 291e9da91403 ("ALSA: usb-audio: Always initialize fixed_rate in snd_usb_find_implicit_fb_sync_format()") Reported-by: coverity-bot Link: https://lore.kernel.org/alsa-devel/202301121424.4A79A485@keescook/ Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230113085311.623325-1-perex@perex.cz Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/pcm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 29838000eee0..2c5765cbed2d 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -160,11 +160,12 @@ find_substream_format(struct snd_usb_substream *subs, bool snd_usb_pcm_has_fixed_rate(struct snd_usb_substream *subs) { const struct audioformat *fp; - struct snd_usb_audio *chip = subs->stream->chip; + struct snd_usb_audio *chip; int rate = -1; if (!subs) return false; + chip = subs->stream->chip; if (!(chip->quirk_flags & QUIRK_FLAG_FIXED_RATE)) return false; list_for_each_entry(fp, &subs->fmt_list, list) { From adc96d30f6503d30dc68670c013716f1d9fcc747 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 19 Dec 2022 10:10:04 +0100 Subject: [PATCH 345/543] efi: fix NULL-deref in init error path [ Upstream commit 703c13fe3c9af557d312f5895ed6a5fda2711104 ] In cases where runtime services are not supported or have been disabled, the runtime services workqueue will never have been allocated. Do not try to destroy the workqueue unconditionally in the unlikely event that EFI initialisation fails to avoid dereferencing a NULL pointer. Fixes: 98086df8b70c ("efi: add missed destroy_workqueue when efisubsys_init fails") Cc: stable@vger.kernel.org Cc: Li Heng Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/efi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index f12cc29bd4b8..033aac6be7da 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -374,8 +374,8 @@ static int __init efisubsys_init(void) efi_kobj = kobject_create_and_add("efi", firmware_kobj); if (!efi_kobj) { pr_err("efi: Firmware registration failed.\n"); - destroy_workqueue(efi_rts_wq); - return -ENOMEM; + error = -ENOMEM; + goto err_destroy_wq; } if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE | @@ -423,7 +423,10 @@ err_unregister: generic_ops_unregister(); err_put: kobject_put(efi_kobj); - destroy_workqueue(efi_rts_wq); +err_destroy_wq: + if (efi_rts_wq) + destroy_workqueue(efi_rts_wq); + return error; } From 7fc3990dad04a677606337ebc61964094d6cb41b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 14 Jan 2023 08:41:33 -0700 Subject: [PATCH 346/543] io_uring: lock overflowing for IOPOLL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 544d163d659d45a206d8929370d5a2984e546cb7 upstream. syzbot reports an issue with overflow filling for IOPOLL: WARNING: CPU: 0 PID: 28 at io_uring/io_uring.c:734 io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734 CPU: 0 PID: 28 Comm: kworker/u4:1 Not tainted 6.2.0-rc3-syzkaller-16369-g358a161a6a9e #0 Workqueue: events_unbound io_ring_exit_work Call trace:  io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734  io_req_cqe_overflow+0x5c/0x70 io_uring/io_uring.c:773  io_fill_cqe_req io_uring/io_uring.h:168 [inline]  io_do_iopoll+0x474/0x62c io_uring/rw.c:1065  io_iopoll_try_reap_events+0x6c/0x108 io_uring/io_uring.c:1513  io_uring_try_cancel_requests+0x13c/0x258 io_uring/io_uring.c:3056  io_ring_exit_work+0xec/0x390 io_uring/io_uring.c:2869  process_one_work+0x2d8/0x504 kernel/workqueue.c:2289  worker_thread+0x340/0x610 kernel/workqueue.c:2436  kthread+0x12c/0x158 kernel/kthread.c:376  ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:863 There is no real problem for normal IOPOLL as flush is also called with uring_lock taken, but it's getting more complicated for IOPOLL|SQPOLL, for which __io_cqring_overflow_flush() happens from the CQ waiting path. Reported-and-tested-by: syzbot+6805087452d72929404e@syzkaller.appspotmail.com Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/rw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index bb47cc4da713..6223472095d2 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -1055,7 +1055,11 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) continue; req->cqe.flags = io_put_kbuf(req, 0); - __io_fill_cqe_req(req->ctx, req); + if (unlikely(!__io_fill_cqe_req(ctx, req))) { + spin_lock(&ctx->completion_lock); + io_req_cqe_overflow(req); + spin_unlock(&ctx->completion_lock); + } } if (unlikely(!nr_events)) From c06015ebc4367be38904b88582e13cc079672075 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 14 Jan 2023 08:46:14 -0700 Subject: [PATCH 347/543] io_uring/poll: attempt request issue after racy poll wakeup commit 6e5aedb9324aab1c14a23fae3d8eeb64a679c20e upstream. If we have multiple requests waiting on the same target poll waitqueue, then it's quite possible to get a request triggered and get disappointed in not being able to make any progress with it. If we race in doing so, we'll potentially leave the poll request on the internal tables, but removed from the waitqueue. That means that any subsequent trigger of the poll waitqueue will not kick that request into action, causing an application to potentially wait for completion of a request that will never happen. Fix this by adding a new poll return state, IOU_POLL_REISSUE. Rather than have complicated logic for how to re-arm a given type of request, just punt it for a reissue. While in there, move the 'ret' variable to the only section where it gets used. This avoids confusion the scope of it. Cc: stable@vger.kernel.org Fixes: eb0089d629ba ("io_uring: single shot poll removal optimisation") Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/poll.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index df42fd8a6ab0..f2f9f174fc62 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -223,22 +223,23 @@ enum { IOU_POLL_DONE = 0, IOU_POLL_NO_ACTION = 1, IOU_POLL_REMOVE_POLL_USE_RES = 2, + IOU_POLL_REISSUE = 3, }; /* * All poll tw should go through this. Checks for poll events, manages * references, does rewait, etc. * - * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action require, - * which is either spurious wakeup or multishot CQE is served. - * IOU_POLL_DONE when it's done with the request, then the mask is stored in req->cqe.res. - * IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result - * is stored in req->cqe. + * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action + * require, which is either spurious wakeup or multishot CQE is served. + * IOU_POLL_DONE when it's done with the request, then the mask is stored in + * req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot + * poll and that the result is stored in req->cqe. */ static int io_poll_check_events(struct io_kiocb *req, bool *locked) { struct io_ring_ctx *ctx = req->ctx; - int v, ret; + int v; /* req->task == current here, checking PF_EXITING is safe */ if (unlikely(req->task->flags & PF_EXITING)) @@ -274,10 +275,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) if (!req->cqe.res) { struct poll_table_struct pt = { ._key = req->apoll_events }; req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; + /* + * We got woken with a mask, but someone else got to + * it first. The above vfs_poll() doesn't add us back + * to the waitqueue, so if we get nothing back, we + * should be safe and attempt a reissue. + */ + if (unlikely(!req->cqe.res)) + return IOU_POLL_REISSUE; } - - if ((unlikely(!req->cqe.res))) - continue; if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; if (io_is_uring_fops(req->file)) @@ -294,7 +300,7 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return IOU_POLL_REMOVE_POLL_USE_RES; } } else { - ret = io_poll_issue(req, locked); + int ret = io_poll_issue(req, locked); if (ret == IOU_STOP_MULTISHOT) return IOU_POLL_REMOVE_POLL_USE_RES; if (ret < 0) @@ -325,6 +331,11 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) if (ret == IOU_POLL_DONE) { struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); req->cqe.res = mangle_poll(req->cqe.res & poll->events); + } else if (ret == IOU_POLL_REISSUE) { + io_poll_remove_entries(req); + io_poll_tw_hash_eject(req, locked); + io_req_task_submit(req, locked); + return; } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { req->cqe.res = ret; req_set_fail(req); @@ -350,7 +361,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked) if (ret == IOU_POLL_REMOVE_POLL_USE_RES) io_req_complete_post(req); - else if (ret == IOU_POLL_DONE) + else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE) io_req_task_submit(req, locked); else io_req_complete_failed(req, ret); From 3925336af00f79fb7f3a390b6ab30d455d32d863 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 25 Oct 2022 21:50:15 +0200 Subject: [PATCH 348/543] drm/i915: Fix CFI violations in gt_sysfs commit a8a4f0467d706fc22d286dfa973946e5944b793c upstream. When booting with CONFIG_CFI_CLANG, there are numerous violations when accessing the files under /sys/devices/pci0000:00/0000:00:02.0/drm/card0/gt/gt0: $ cd /sys/devices/pci0000:00/0000:00:02.0/drm/card0/gt/gt0 $ grep . * id:0 punit_req_freq_mhz:350 rc6_enable:1 rc6_residency_ms:214934 rps_act_freq_mhz:1300 rps_boost_freq_mhz:1300 rps_cur_freq_mhz:350 rps_max_freq_mhz:1300 rps_min_freq_mhz:350 rps_RP0_freq_mhz:1300 rps_RP1_freq_mhz:350 rps_RPn_freq_mhz:350 throttle_reason_pl1:0 throttle_reason_pl2:0 throttle_reason_pl4:0 throttle_reason_prochot:0 throttle_reason_ratl:0 throttle_reason_status:0 throttle_reason_thermal:0 throttle_reason_vr_tdc:0 throttle_reason_vr_thermalert:0 $ sudo dmesg &| grep "CFI failure at" [ 214.595903] CFI failure at kobj_attr_show+0x19/0x30 (target: id_show+0x0/0x70 [i915]; expected type: 0xc527b809) [ 214.596064] CFI failure at kobj_attr_show+0x19/0x30 (target: punit_req_freq_mhz_show+0x0/0x40 [i915]; expected type: 0xc527b809) [ 214.596407] CFI failure at kobj_attr_show+0x19/0x30 (target: rc6_enable_show+0x0/0x40 [i915]; expected type: 0xc527b809) [ 214.596528] CFI failure at kobj_attr_show+0x19/0x30 (target: rc6_residency_ms_show+0x0/0x270 [i915]; expected type: 0xc527b809) [ 214.596682] CFI failure at kobj_attr_show+0x19/0x30 (target: act_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.596792] CFI failure at kobj_attr_show+0x19/0x30 (target: boost_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.596893] CFI failure at kobj_attr_show+0x19/0x30 (target: cur_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.596996] CFI failure at kobj_attr_show+0x19/0x30 (target: max_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.597099] CFI failure at kobj_attr_show+0x19/0x30 (target: min_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.597198] CFI failure at kobj_attr_show+0x19/0x30 (target: RP0_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.597301] CFI failure at kobj_attr_show+0x19/0x30 (target: RP1_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.597405] CFI failure at kobj_attr_show+0x19/0x30 (target: RPn_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.597538] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.597701] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.597836] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.597952] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.598071] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.598177] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.598307] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.598439] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.598542] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) With kCFI, indirect calls are validated against their expected type versus actual type and failures occur when the two types do not match. The ultimate issue is that these sysfs functions are expecting to be called via dev_attr_show() but they may also be called via kobj_attr_show(), as certain files are created under two different kobjects that have two different sysfs_ops in intel_gt_sysfs_register(), hence the warnings above. When accessing the gt_ files under /sys/devices/pci0000:00/0000:00:02.0/drm/card0, which are using the same sysfs functions, there are no violations, meaning the functions are being called with the proper type. To make everything work properly, adjust certain functions to match the type of the ->show() and ->store() members in 'struct kobj_attribute'. Add a macro to generate functions for that can be called via both dev_attr_{show,store}() or kobj_attr_{show,store}() so that they can be called through both kobject locations without violating kCFI and adjust the attribute groups to account for this. Link: https://github.com/ClangBuiltLinux/linux/issues/1716 Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Reviewed-by: Kees Cook Signed-off-by: Nathan Chancellor Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221013205909.1282545-1-nathan@kernel.org Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 15 +- drivers/gpu/drm/i915/gt/intel_gt_sysfs.h | 2 +- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 461 +++++++++----------- 3 files changed, 220 insertions(+), 258 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c index d651ccd0ab20..9486dd3bed99 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -22,11 +22,9 @@ bool is_object_gt(struct kobject *kobj) return !strncmp(kobj->name, "gt", 2); } -struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, +struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj, const char *name) { - struct kobject *kobj = &dev->kobj; - /* * We are interested at knowing from where the interface * has been called, whether it's called from gt/ or from @@ -38,6 +36,7 @@ struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, * "struct drm_i915_private *" type. */ if (!is_object_gt(kobj)) { + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); return to_gt(i915); @@ -51,18 +50,18 @@ static struct kobject *gt_get_parent_obj(struct intel_gt *gt) return >->i915->drm.primary->kdev->kobj; } -static ssize_t id_show(struct device *dev, - struct device_attribute *attr, +static ssize_t id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); return sysfs_emit(buf, "%u\n", gt->info.id); } -static DEVICE_ATTR_RO(id); +static struct kobj_attribute attr_id = __ATTR_RO(id); static struct attribute *id_attrs[] = { - &dev_attr_id.attr, + &attr_id.attr, NULL, }; ATTRIBUTE_GROUPS(id); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h index 6232923a420d..c3a123faee98 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h @@ -30,7 +30,7 @@ static inline struct intel_gt *kobj_to_gt(struct kobject *kobj) void intel_gt_sysfs_register(struct intel_gt *gt); void intel_gt_sysfs_unregister(struct intel_gt *gt); -struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, +struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj, const char *name); #endif /* SYSFS_GT_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 180dd6f3ef57..b108f0a8a044 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -24,14 +24,15 @@ enum intel_gt_sysfs_op { }; static int -sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, +sysfs_gt_attribute_w_func(struct kobject *kobj, struct attribute *attr, int (func)(struct intel_gt *gt, u32 val), u32 val) { struct intel_gt *gt; int ret; - if (!is_object_gt(&dev->kobj)) { + if (!is_object_gt(kobj)) { int i; + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); for_each_gt(gt, i915, i) { @@ -40,7 +41,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, break; } } else { - gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + gt = intel_gt_sysfs_get_drvdata(kobj, attr->name); ret = func(gt, val); } @@ -48,7 +49,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, } static u32 -sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, +sysfs_gt_attribute_r_func(struct kobject *kobj, struct attribute *attr, u32 (func)(struct intel_gt *gt), enum intel_gt_sysfs_op op) { @@ -57,8 +58,9 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1; - if (!is_object_gt(&dev->kobj)) { + if (!is_object_gt(kobj)) { int i; + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); for_each_gt(gt, i915, i) { @@ -77,7 +79,7 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, } } } else { - gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + gt = intel_gt_sysfs_get_drvdata(kobj, attr->name); ret = func(gt); } @@ -92,6 +94,76 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, #define sysfs_gt_attribute_r_max_func(d, a, f) \ sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) +#define INTEL_GT_SYSFS_SHOW(_name, _attr_type) \ + static ssize_t _name##_show_common(struct kobject *kobj, \ + struct attribute *attr, char *buff) \ + { \ + u32 val = sysfs_gt_attribute_r_##_attr_type##_func(kobj, attr, \ + __##_name##_show); \ + \ + return sysfs_emit(buff, "%u\n", val); \ + } \ + static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buff) \ + { \ + return _name ##_show_common(kobj, &attr->attr, buff); \ + } \ + static ssize_t _name##_dev_show(struct device *dev, \ + struct device_attribute *attr, char *buff) \ + { \ + return _name##_show_common(&dev->kobj, &attr->attr, buff); \ + } + +#define INTEL_GT_SYSFS_STORE(_name, _func) \ + static ssize_t _name##_store_common(struct kobject *kobj, \ + struct attribute *attr, \ + const char *buff, size_t count) \ + { \ + int ret; \ + u32 val; \ + \ + ret = kstrtou32(buff, 0, &val); \ + if (ret) \ + return ret; \ + \ + ret = sysfs_gt_attribute_w_func(kobj, attr, _func, val); \ + \ + return ret ?: count; \ + } \ + static ssize_t _name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, const char *buff, \ + size_t count) \ + { \ + return _name##_store_common(kobj, &attr->attr, buff, count); \ + } \ + static ssize_t _name##_dev_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buff, size_t count) \ + { \ + return _name##_store_common(&dev->kobj, &attr->attr, buff, count); \ + } + +#define INTEL_GT_SYSFS_SHOW_MAX(_name) INTEL_GT_SYSFS_SHOW(_name, max) +#define INTEL_GT_SYSFS_SHOW_MIN(_name) INTEL_GT_SYSFS_SHOW(_name, min) + +#define INTEL_GT_ATTR_RW(_name) \ + static struct kobj_attribute attr_##_name = __ATTR_RW(_name) + +#define INTEL_GT_ATTR_RO(_name) \ + static struct kobj_attribute attr_##_name = __ATTR_RO(_name) + +#define INTEL_GT_DUAL_ATTR_RW(_name) \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, 0644, \ + _name##_dev_show, \ + _name##_dev_store); \ + INTEL_GT_ATTR_RW(_name) + +#define INTEL_GT_DUAL_ATTR_RO(_name) \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, 0444, \ + _name##_dev_show, \ + NULL); \ + INTEL_GT_ATTR_RO(_name) + #ifdef CONFIG_PM static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) { @@ -104,11 +176,8 @@ static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) return DIV_ROUND_CLOSEST_ULL(res, 1000); } -static ssize_t rc6_enable_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static u8 get_rc6_mask(struct intel_gt *gt) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); u8 mask = 0; if (HAS_RC6(gt->i915)) @@ -118,7 +187,25 @@ static ssize_t rc6_enable_show(struct device *dev, if (HAS_RC6pp(gt->i915)) mask |= BIT(2); - return sysfs_emit(buff, "%x\n", mask); + return mask; +} + +static ssize_t rc6_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + + return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); +} + +static ssize_t rc6_enable_dev_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name); + + return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); } static u32 __rc6_residency_ms_show(struct intel_gt *gt) @@ -126,97 +213,79 @@ static u32 __rc6_residency_ms_show(struct intel_gt *gt) return get_residency(gt, GEN6_GT_GFX_RC6); } -static ssize_t rc6_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6_residency_ms_show); - - return sysfs_emit(buff, "%u\n", rc6_residency); -} - static u32 __rc6p_residency_ms_show(struct intel_gt *gt) { return get_residency(gt, GEN6_GT_GFX_RC6p); } -static ssize_t rc6p_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6p_residency_ms_show); - - return sysfs_emit(buff, "%u\n", rc6p_residency); -} - static u32 __rc6pp_residency_ms_show(struct intel_gt *gt) { return get_residency(gt, GEN6_GT_GFX_RC6pp); } -static ssize_t rc6pp_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6pp_residency_ms_show); - - return sysfs_emit(buff, "%u\n", rc6pp_residency); -} - static u32 __media_rc6_residency_ms_show(struct intel_gt *gt) { return get_residency(gt, VLV_GT_MEDIA_RC6); } -static ssize_t media_rc6_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __media_rc6_residency_ms_show); +INTEL_GT_SYSFS_SHOW_MIN(rc6_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(rc6p_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(rc6pp_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(media_rc6_residency_ms); - return sysfs_emit(buff, "%u\n", rc6_residency); -} - -static DEVICE_ATTR_RO(rc6_enable); -static DEVICE_ATTR_RO(rc6_residency_ms); -static DEVICE_ATTR_RO(rc6p_residency_ms); -static DEVICE_ATTR_RO(rc6pp_residency_ms); -static DEVICE_ATTR_RO(media_rc6_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6_enable); +INTEL_GT_DUAL_ATTR_RO(rc6_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6p_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6pp_residency_ms); +INTEL_GT_DUAL_ATTR_RO(media_rc6_residency_ms); static struct attribute *rc6_attrs[] = { + &attr_rc6_enable.attr, + &attr_rc6_residency_ms.attr, + NULL +}; + +static struct attribute *rc6p_attrs[] = { + &attr_rc6p_residency_ms.attr, + &attr_rc6pp_residency_ms.attr, + NULL +}; + +static struct attribute *media_rc6_attrs[] = { + &attr_media_rc6_residency_ms.attr, + NULL +}; + +static struct attribute *rc6_dev_attrs[] = { &dev_attr_rc6_enable.attr, &dev_attr_rc6_residency_ms.attr, NULL }; -static struct attribute *rc6p_attrs[] = { +static struct attribute *rc6p_dev_attrs[] = { &dev_attr_rc6p_residency_ms.attr, &dev_attr_rc6pp_residency_ms.attr, NULL }; -static struct attribute *media_rc6_attrs[] = { +static struct attribute *media_rc6_dev_attrs[] = { &dev_attr_media_rc6_residency_ms.attr, NULL }; static const struct attribute_group rc6_attr_group[] = { { .attrs = rc6_attrs, }, - { .name = power_group_name, .attrs = rc6_attrs, }, + { .name = power_group_name, .attrs = rc6_dev_attrs, }, }; static const struct attribute_group rc6p_attr_group[] = { { .attrs = rc6p_attrs, }, - { .name = power_group_name, .attrs = rc6p_attrs, }, + { .name = power_group_name, .attrs = rc6p_dev_attrs, }, }; static const struct attribute_group media_rc6_attr_group[] = { { .attrs = media_rc6_attrs, }, - { .name = power_group_name, .attrs = media_rc6_attrs, }, + { .name = power_group_name, .attrs = media_rc6_dev_attrs, }, }; static int __intel_gt_sysfs_create_group(struct kobject *kobj, @@ -271,104 +340,34 @@ static u32 __act_freq_mhz_show(struct intel_gt *gt) return intel_rps_read_actual_frequency(>->rps); } -static ssize_t act_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __act_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", actual_freq); -} - static u32 __cur_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_requested_frequency(>->rps); } -static ssize_t cur_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __cur_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", cur_freq); -} - static u32 __boost_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_boost_frequency(>->rps); } -static ssize_t boost_freq_mhz_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __boost_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", boost_freq); -} - static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val) { return intel_rps_set_boost_frequency(>->rps, val); } -static ssize_t boost_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - ssize_t ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - return sysfs_gt_attribute_w_func(dev, attr, - __boost_freq_mhz_store, val) ?: count; -} - -static u32 __rp0_freq_mhz_show(struct intel_gt *gt) +static u32 __RP0_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_rp0_frequency(>->rps); } -static ssize_t RP0_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rp0_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rp0_freq); -} - -static u32 __rp1_freq_mhz_show(struct intel_gt *gt) -{ - return intel_rps_get_rp1_frequency(>->rps); -} - -static ssize_t RP1_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rp1_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rp1_freq); -} - -static u32 __rpn_freq_mhz_show(struct intel_gt *gt) +static u32 __RPn_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_rpn_frequency(>->rps); } -static ssize_t RPn_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) +static u32 __RP1_freq_mhz_show(struct intel_gt *gt) { - u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rpn_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rpn_freq); + return intel_rps_get_rp1_frequency(>->rps); } static u32 __max_freq_mhz_show(struct intel_gt *gt) @@ -376,71 +375,21 @@ static u32 __max_freq_mhz_show(struct intel_gt *gt) return intel_rps_get_max_frequency(>->rps); } -static ssize_t max_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __max_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", max_freq); -} - static int __set_max_freq(struct intel_gt *gt, u32 val) { return intel_rps_set_max_frequency(>->rps, val); } -static ssize_t max_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - int ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val); - - return ret ?: count; -} - static u32 __min_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_min_frequency(>->rps); } -static ssize_t min_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr, - __min_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", min_freq); -} - static int __set_min_freq(struct intel_gt *gt, u32 val) { return intel_rps_set_min_frequency(>->rps, val); } -static ssize_t min_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - int ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val); - - return ret ?: count; -} - static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt) { struct intel_rps *rps = >->rps; @@ -448,23 +397,31 @@ static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt) return intel_gpu_freq(rps, rps->efficient_freq); } -static ssize_t vlv_rpe_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __vlv_rpe_freq_mhz_show); +INTEL_GT_SYSFS_SHOW_MAX(act_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(boost_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(cur_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RP0_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RP1_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RPn_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(max_freq_mhz); +INTEL_GT_SYSFS_SHOW_MIN(min_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(vlv_rpe_freq_mhz); +INTEL_GT_SYSFS_STORE(boost_freq_mhz, __boost_freq_mhz_store); +INTEL_GT_SYSFS_STORE(max_freq_mhz, __set_max_freq); +INTEL_GT_SYSFS_STORE(min_freq_mhz, __set_min_freq); - return sysfs_emit(buff, "%u\n", rpe_freq); -} +#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store, _show_dev, _store_dev) \ + static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, \ + _show_dev, _store_dev); \ + static struct kobj_attribute attr_rps_##_name = __ATTR(rps_##_name, _mode, \ + _show, _store) -#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \ - static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \ - static struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store) - -#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \ - INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL) -#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \ - INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store) +#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \ + INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL, \ + _name##_dev_show, NULL) +#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \ + INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store, \ + _name##_dev_show, _name##_dev_store) /* The below macros generate static structures */ INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz); @@ -475,32 +432,31 @@ INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz); +INTEL_GT_RPS_SYSFS_ATTR_RO(vlv_rpe_freq_mhz); -static DEVICE_ATTR_RO(vlv_rpe_freq_mhz); - -#define GEN6_ATTR(s) { \ - &dev_attr_##s##_act_freq_mhz.attr, \ - &dev_attr_##s##_cur_freq_mhz.attr, \ - &dev_attr_##s##_boost_freq_mhz.attr, \ - &dev_attr_##s##_max_freq_mhz.attr, \ - &dev_attr_##s##_min_freq_mhz.attr, \ - &dev_attr_##s##_RP0_freq_mhz.attr, \ - &dev_attr_##s##_RP1_freq_mhz.attr, \ - &dev_attr_##s##_RPn_freq_mhz.attr, \ +#define GEN6_ATTR(p, s) { \ + &p##attr_##s##_act_freq_mhz.attr, \ + &p##attr_##s##_cur_freq_mhz.attr, \ + &p##attr_##s##_boost_freq_mhz.attr, \ + &p##attr_##s##_max_freq_mhz.attr, \ + &p##attr_##s##_min_freq_mhz.attr, \ + &p##attr_##s##_RP0_freq_mhz.attr, \ + &p##attr_##s##_RP1_freq_mhz.attr, \ + &p##attr_##s##_RPn_freq_mhz.attr, \ NULL, \ } -#define GEN6_RPS_ATTR GEN6_ATTR(rps) -#define GEN6_GT_ATTR GEN6_ATTR(gt) +#define GEN6_RPS_ATTR GEN6_ATTR(, rps) +#define GEN6_GT_ATTR GEN6_ATTR(dev_, gt) static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR; static const struct attribute * const gen6_gt_attrs[] = GEN6_GT_ATTR; -static ssize_t punit_req_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t punit_req_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 preq = intel_rps_read_punit_req_frequency(>->rps); return sysfs_emit(buff, "%u\n", preq); @@ -508,17 +464,17 @@ static ssize_t punit_req_freq_mhz_show(struct device *dev, struct intel_gt_bool_throttle_attr { struct attribute attr; - ssize_t (*show)(struct device *dev, struct device_attribute *attr, + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, char *buf); i915_reg_t reg32; u32 mask; }; -static ssize_t throttle_reason_bool_show(struct device *dev, - struct device_attribute *attr, +static ssize_t throttle_reason_bool_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_gt_bool_throttle_attr *t_attr = (struct intel_gt_bool_throttle_attr *) attr; bool val = rps_read_mask_mmio(>->rps, t_attr->reg32, t_attr->mask); @@ -534,7 +490,7 @@ struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \ .mask = mask__, \ } -static DEVICE_ATTR_RO(punit_req_freq_mhz); +INTEL_GT_ATTR_RO(punit_req_freq_mhz); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK); @@ -597,8 +553,8 @@ static const struct attribute *throttle_reason_attrs[] = { #define U8_8_VAL_MASK 0xffff #define U8_8_SCALE_TO_VALUE "0.00390625" -static ssize_t freq_factor_scale_show(struct device *dev, - struct device_attribute *attr, +static ssize_t freq_factor_scale_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { return sysfs_emit(buff, "%s\n", U8_8_SCALE_TO_VALUE); @@ -610,11 +566,11 @@ static u32 media_ratio_mode_to_factor(u32 mode) return !mode ? mode : 256 / mode; } -static ssize_t media_freq_factor_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_freq_factor_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_guc_slpc *slpc = >->uc.guc.slpc; intel_wakeref_t wakeref; u32 mode; @@ -641,11 +597,11 @@ static ssize_t media_freq_factor_show(struct device *dev, return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode)); } -static ssize_t media_freq_factor_store(struct device *dev, - struct device_attribute *attr, +static ssize_t media_freq_factor_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buff, size_t count) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_guc_slpc *slpc = >->uc.guc.slpc; u32 factor, mode; int err; @@ -670,11 +626,11 @@ static ssize_t media_freq_factor_store(struct device *dev, return err ?: count; } -static ssize_t media_RP0_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_RP0_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 val; int err; @@ -691,11 +647,11 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev, return sysfs_emit(buff, "%u\n", val); } -static ssize_t media_RPn_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_RPn_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 val; int err; @@ -712,17 +668,17 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev, return sysfs_emit(buff, "%u\n", val); } -static DEVICE_ATTR_RW(media_freq_factor); -static struct device_attribute dev_attr_media_freq_factor_scale = +INTEL_GT_ATTR_RW(media_freq_factor); +static struct kobj_attribute attr_media_freq_factor_scale = __ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL); -static DEVICE_ATTR_RO(media_RP0_freq_mhz); -static DEVICE_ATTR_RO(media_RPn_freq_mhz); +INTEL_GT_ATTR_RO(media_RP0_freq_mhz); +INTEL_GT_ATTR_RO(media_RPn_freq_mhz); static const struct attribute *media_perf_power_attrs[] = { - &dev_attr_media_freq_factor.attr, - &dev_attr_media_freq_factor_scale.attr, - &dev_attr_media_RP0_freq_mhz.attr, - &dev_attr_media_RPn_freq_mhz.attr, + &attr_media_freq_factor.attr, + &attr_media_freq_factor_scale.attr, + &attr_media_RP0_freq_mhz.attr, + &attr_media_RPn_freq_mhz.attr, NULL }; @@ -754,20 +710,29 @@ static const struct attribute * const rps_defaults_attrs[] = { NULL }; -static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj, - const struct attribute * const *attrs) +static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj) { + const struct attribute * const *attrs; + struct attribute *vlv_attr; int ret; if (GRAPHICS_VER(gt->i915) < 6) return 0; + if (is_object_gt(kobj)) { + attrs = gen6_rps_attrs; + vlv_attr = &attr_rps_vlv_rpe_freq_mhz.attr; + } else { + attrs = gen6_gt_attrs; + vlv_attr = &dev_attr_gt_vlv_rpe_freq_mhz.attr; + } + ret = sysfs_create_files(kobj, attrs); if (ret) return ret; if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) - ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr); + ret = sysfs_create_file(kobj, vlv_attr); return ret; } @@ -778,9 +743,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) intel_sysfs_rc6_init(gt, kobj); - ret = is_object_gt(kobj) ? - intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) : - intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs); + ret = intel_sysfs_rps_init(gt, kobj); if (ret) drm_warn(>->i915->drm, "failed to create gt%u RPS sysfs files (%pe)", @@ -790,7 +753,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) if (!is_object_gt(kobj)) return; - ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr); + ret = sysfs_create_file(kobj, &attr_punit_req_freq_mhz.attr); if (ret) drm_warn(>->i915->drm, "failed to create gt%u punit_req_freq_mhz sysfs (%pe)", From 0a14c3ded80c7dd399e68f361ee3338b5271c98e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Jan 2023 16:49:46 -0700 Subject: [PATCH 349/543] io_uring/io-wq: free worker if task_work creation is canceled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit af82425c6a2d2f347c79b63ce74fca6dc6be157f upstream. If we cancel the task_work, the worker will never come into existance. As this is the last reference to it, ensure that we get it freed appropriately. Cc: stable@vger.kernel.org Reported-by: 진호 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io-wq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 6f1d0e5df23a..992dcd9f8c4c 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1230,6 +1230,7 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); + kfree(worker); } } From e62e6258ab140b55bbef1f0d288263b75e2f0c20 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 8 Jan 2023 10:39:17 -0700 Subject: [PATCH 350/543] io_uring/io-wq: only free worker if it was allocated for creation commit e6db6f9398dadcbc06318a133d4c44a2d3844e61 upstream. We have two types of task_work based creation, one is using an existing worker to setup a new one (eg when going to sleep and we have no free workers), and the other is allocating a new worker. Only the latter should be freed when we cancel task_work creation for a new worker. Fixes: af82425c6a2d ("io_uring/io-wq: free worker if task_work creation is canceled") Reported-by: syzbot+d56ec896af3637bdb7e4@syzkaller.appspotmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io-wq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 992dcd9f8c4c..411bb2d1acd4 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1230,7 +1230,12 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); - kfree(worker); + /* + * Only the worker continuation helper has worker allocated and + * hence needs freeing. + */ + if (cb->func == create_worker_cont) + kfree(worker); } } From 73a630b359c852d6edc113588c513ddbcd3be47a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 08:51:19 -0700 Subject: [PATCH 351/543] block: handle bio_split_to_limits() NULL return commit 613b14884b8595e20b9fac4126bf627313827fbe upstream. This can't happen right now, but in preparation for allowing bio_split_to_limits() returning NULL if it ended the bio, check for it in all the callers. Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-merge.c | 4 +++- block/blk-mq.c | 5 ++++- drivers/block/drbd/drbd_req.c | 2 ++ drivers/block/ps3vram.c | 2 ++ drivers/md/dm.c | 2 ++ drivers/md/md.c | 2 ++ drivers/nvme/host/multipath.c | 2 ++ drivers/s390/block/dcssblk.c | 2 ++ 8 files changed, 19 insertions(+), 2 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index f46c87ef951d..84f03d066cb3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -358,11 +358,13 @@ struct bio *__bio_split_to_limits(struct bio *bio, struct queue_limits *lim, default: split = bio_split_rw(bio, lim, nr_segs, bs, get_max_io_size(bio, lim) << SECTOR_SHIFT); + if (IS_ERR(split)) + return NULL; break; } if (split) { - /* there isn't chance to merge the splitted bio */ + /* there isn't chance to merge the split bio */ split->bi_opf |= REQ_NOMERGE; blkcg_bio_issue_init(split); diff --git a/block/blk-mq.c b/block/blk-mq.c index 0b855e033a83..63abbe342b28 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2919,8 +2919,11 @@ void blk_mq_submit_bio(struct bio *bio) blk_status_t ret; bio = blk_queue_bounce(bio, q); - if (bio_may_exceed_limits(bio, &q->limits)) + if (bio_may_exceed_limits(bio, &q->limits)) { bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); + if (!bio) + return; + } if (!bio_integrity_prep(bio)) return; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 7f9bcc82fc9c..d700bf06b534 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1607,6 +1607,8 @@ void drbd_submit_bio(struct bio *bio) struct drbd_device *device = bio->bi_bdev->bd_disk->private_data; bio = bio_split_to_limits(bio); + if (!bio) + return; /* * what we "blindly" assume: diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index c76e0148eada..574e470b220b 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -587,6 +587,8 @@ static void ps3vram_submit_bio(struct bio *bio) dev_dbg(&dev->core, "%s\n", __func__); bio = bio_split_to_limits(bio); + if (!bio) + return; spin_lock_irq(&priv->lock); busy = !bio_list_empty(&priv->list); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e30c2d2bc9c7..d49809e9db96 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1755,6 +1755,8 @@ static void dm_split_and_process_bio(struct mapped_device *md, * otherwise associated queue_limits won't be imposed. */ bio = bio_split_to_limits(bio); + if (!bio) + return; } init_clone_info(&ci, md, map, bio, is_abnormal); diff --git a/drivers/md/md.c b/drivers/md/md.c index fd82881761d3..b911085060dc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -443,6 +443,8 @@ static void md_submit_bio(struct bio *bio) } bio = bio_split_to_limits(bio); + if (!bio) + return; if (mddev->ro == 1 && unlikely(rw == WRITE)) { if (bio_sectors(bio) != 0) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 7e025b8948cb..d09ed0070174 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -351,6 +351,8 @@ static void nvme_ns_head_submit_bio(struct bio *bio) * pool from the original queue to allocate the bvecs from. */ bio = bio_split_to_limits(bio); + if (!bio) + return; srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index b392b9f5482e..c0f85ffb2b62 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -865,6 +865,8 @@ dcssblk_submit_bio(struct bio *bio) unsigned long bytes_done; bio = bio_split_to_limits(bio); + if (!bio) + return; bytes_done = 0; dev_info = bio->bi_bdev->bd_disk->private_data; From 86cd9d9ebda6460a2ec2c9e9e5970da12c6b8fce Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Thu, 22 Dec 2022 21:53:02 +0100 Subject: [PATCH 352/543] Revert "usb: ulpi: defer ulpi_register on ulpi_read_id timeout" commit b659b613cea2ae39746ca8bd2b69d1985dd9d770 upstream. This reverts commit 8a7b31d545d3a15f0e6f5984ae16f0ca4fd76aac. This patch results in some qemu test failures, specifically xilinx-zynq-a9 machine and zynq-zc702 as well as zynq-zed devicetree files, when trying to boot from USB drive. Link: https://lore.kernel.org/lkml/20221220194334.GA942039@roeck-us.net/ Fixes: 8a7b31d545d3 ("usb: ulpi: defer ulpi_register on ulpi_read_id timeout") Cc: stable@vger.kernel.org Reported-by: Guenter Roeck Signed-off-by: Ferry Toth Link: https://lore.kernel.org/r/20221222205302.45761-1-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 60e8174686a1..d7c8461976ce 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -207,7 +207,7 @@ static int ulpi_read_id(struct ulpi *ulpi) /* Test the interface */ ret = ulpi_write(ulpi, ULPI_SCRATCH, 0xaa); if (ret < 0) - return ret; + goto err; ret = ulpi_read(ulpi, ULPI_SCRATCH); if (ret < 0) From 046c9972dd40775cff4d12294ca14b0b624f2c35 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 13 Oct 2022 08:47:29 -0500 Subject: [PATCH 353/543] pinctrl: amd: Add dynamic debugging for active GPIOs commit 1d66e379731f79ae5039a869c0fde22a4f6a6a91 upstream. Some laptops have been reported to wake up from s2idle when plugging in the AC adapter or by closing the lid. This is a surprising behavior that is further clarified by commit cb3e7d624c3ff ("PM: wakeup: Add extra debugging statement for multiple active IRQs"). With that commit in place the following interaction can be seen when the lid is closed: [ 28.946038] PM: suspend-to-idle [ 28.946083] ACPI: EC: ACPI EC GPE status set [ 28.946101] ACPI: PM: Rearming ACPI SCI for wakeup [ 28.950152] Timekeeping suspended for 3.320 seconds [ 28.950152] PM: Triggering wakeup from IRQ 9 [ 28.950152] ACPI: EC: ACPI EC GPE status set [ 28.950152] ACPI: EC: ACPI EC GPE dispatched [ 28.995057] ACPI: EC: ACPI EC work flushed [ 28.995075] ACPI: PM: Rearming ACPI SCI for wakeup [ 28.995131] PM: Triggering wakeup from IRQ 9 [ 28.995271] ACPI: EC: ACPI EC GPE status set [ 28.995291] ACPI: EC: ACPI EC GPE dispatched [ 29.098556] ACPI: EC: ACPI EC work flushed [ 29.207020] ACPI: EC: ACPI EC work flushed [ 29.207037] ACPI: PM: Rearming ACPI SCI for wakeup [ 29.211095] Timekeeping suspended for 0.739 seconds [ 29.211095] PM: Triggering wakeup from IRQ 9 [ 29.211079] PM: Triggering wakeup from IRQ 7 [ 29.211095] ACPI: PM: ACPI non-EC GPE wakeup [ 29.211095] PM: resume from suspend-to-idle * IRQ9 on this laptop is used for the ACPI SCI. * IRQ7 on this laptop is used for the GPIO controller. What has occurred is when the lid was closed the EC woke up the SoC from it's deepest sleep state and the kernel's s2idle loop processed all EC events. When it was finished processing EC events, it checked for any other reasons to wake (break the s2idle loop). The IRQ for the GPIO controller was active so the loop broke, and then this IRQ was processed. This is not a kernel bug but it is certainly a surprising behavior, and to better debug it we should have a dynamic debugging message that we can enact to catch it. Acked-by: Basavaraj Natikar Acked-by: Kai-Heng Feng Acked-by: Mark Pearson Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20221013134729.5592-2-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 6be896871718..9bc6e3922e78 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -628,13 +628,15 @@ static bool do_amd_gpio_irq_handler(int irq, void *dev_id) /* Each status bit covers four pins */ for (i = 0; i < 4; i++) { regval = readl(regs + i); - /* caused wake on resume context for shared IRQ */ - if (irq < 0 && (regval & BIT(WAKE_STS_OFF))) { + + if (regval & PIN_IRQ_PENDING) dev_dbg(&gpio_dev->pdev->dev, - "Waking due to GPIO %d: 0x%x", + "GPIO %d is active: 0x%x", irqnr + i, regval); + + /* caused wake on resume context for shared IRQ */ + if (irq < 0 && (regval & BIT(WAKE_STS_OFF))) return true; - } if (!(regval & PIN_IRQ_PENDING) || !(regval & BIT(INTERRUPT_MASK_OFF))) From 21e996306a6afaae88295858de0ffb8955173a15 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 18 Jan 2023 11:58:34 +0100 Subject: [PATCH 354/543] Linux 6.1.7 Link: https://lore.kernel.org/r/20230116154803.321528435@linuxfoundation.org Tested-by: Salvatore Bonaccorso Tested-by: Ronald Warsow Tested-by: Conor Dooley Tested-by: Shuah Khan Tested-by: Justin M. Forbes Tested-by: Takeshi Ogasawara Tested-by: Rudi Heitbaum Tested-by: Bagas Sanjaya Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Jon Hunter Tested-by: Sudip Mukherjee Tested-by: Allen Pais Link: https://lore.kernel.org/r/20230117124546.116438951@linuxfoundation.org Tested-by: Allen Pais Tested-by: Ronald Warsow Tested-by: Ron Economos Tested-by: Takeshi Ogasawara Tested-by: Guenter Roeck Tested-by: Kelsey Steele Tested-by: Bagas Sanjaya Tested-by: Fenil Jain Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 19e8c6dec6e5..7eb6793ecfbf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 6 +SUBLEVEL = 7 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth From 34762a3d6a85ac0c8044ff346a0dff260515cdf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 6 Dec 2022 14:07:49 +0100 Subject: [PATCH 355/543] dma-buf: fix dma_buf_export init order v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit f728a5ea27c92133893590e731ce10f6561ced87 ] The init order and resulting error handling in dma_buf_export was pretty messy. Subordinate objects like the file and the sysfs kernel objects were initializing and wiring itself up with the object in the wrong order resulting not only in complicating and partially incorrect error handling, but also in publishing only halve initialized DMA-buf objects. Clean this up thoughtfully by allocating the file independent of the DMA-buf object. Then allocate and initialize the DMA-buf object itself, before publishing it through sysfs. If everything works as expected the file is then connected with the DMA-buf object and publish it through debugfs. Also adds the missing dma_resv_fini() into the error handling. v2: add some missing changes to dma_bug_getfile() and a missing NULL check in dma_buf_file_release() Signed-off-by: Christian König Reviewed-by: Michael J. Ruhl Reviewed-by: T.J. Mercier Acked-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20221209071535.933698-1-christian.koenig@amd.com Signed-off-by: Sasha Levin --- drivers/dma-buf/dma-buf-sysfs-stats.c | 7 +- drivers/dma-buf/dma-buf-sysfs-stats.h | 4 +- drivers/dma-buf/dma-buf.c | 94 +++++++++++++-------------- 3 files changed, 48 insertions(+), 57 deletions(-) diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c index 2bba0babcb62..4b680e10c15a 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.c +++ b/drivers/dma-buf/dma-buf-sysfs-stats.c @@ -168,14 +168,11 @@ void dma_buf_uninit_sysfs_statistics(void) kset_unregister(dma_buf_stats_kset); } -int dma_buf_stats_setup(struct dma_buf *dmabuf) +int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) { struct dma_buf_sysfs_entry *sysfs_entry; int ret; - if (!dmabuf || !dmabuf->file) - return -EINVAL; - if (!dmabuf->exp_name) { pr_err("exporter name must not be empty if stats needed\n"); return -EINVAL; @@ -192,7 +189,7 @@ int dma_buf_stats_setup(struct dma_buf *dmabuf) /* create the directory for buffer stats */ ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL, - "%lu", file_inode(dmabuf->file)->i_ino); + "%lu", file_inode(file)->i_ino); if (ret) goto err_sysfs_dmabuf; diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.h b/drivers/dma-buf/dma-buf-sysfs-stats.h index a49c6e2650cc..7a8a995b75ba 100644 --- a/drivers/dma-buf/dma-buf-sysfs-stats.h +++ b/drivers/dma-buf/dma-buf-sysfs-stats.h @@ -13,7 +13,7 @@ int dma_buf_init_sysfs_statistics(void); void dma_buf_uninit_sysfs_statistics(void); -int dma_buf_stats_setup(struct dma_buf *dmabuf); +int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file); void dma_buf_stats_teardown(struct dma_buf *dmabuf); #else @@ -25,7 +25,7 @@ static inline int dma_buf_init_sysfs_statistics(void) static inline void dma_buf_uninit_sysfs_statistics(void) {} -static inline int dma_buf_stats_setup(struct dma_buf *dmabuf) +static inline int dma_buf_stats_setup(struct dma_buf *dmabuf, struct file *file) { return 0; } diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index e6f36c014c4c..eb6b59363c4f 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -95,10 +95,11 @@ static int dma_buf_file_release(struct inode *inode, struct file *file) return -EINVAL; dmabuf = file->private_data; - - mutex_lock(&db_list.lock); - list_del(&dmabuf->list_node); - mutex_unlock(&db_list.lock); + if (dmabuf) { + mutex_lock(&db_list.lock); + list_del(&dmabuf->list_node); + mutex_unlock(&db_list.lock); + } return 0; } @@ -523,17 +524,17 @@ static inline int is_dma_buf_file(struct file *file) return file->f_op == &dma_buf_fops; } -static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) +static struct file *dma_buf_getfile(size_t size, int flags) { static atomic64_t dmabuf_inode = ATOMIC64_INIT(0); - struct file *file; struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb); + struct file *file; if (IS_ERR(inode)) return ERR_CAST(inode); - inode->i_size = dmabuf->size; - inode_set_bytes(inode, dmabuf->size); + inode->i_size = size; + inode_set_bytes(inode, size); /* * The ->i_ino acquired from get_next_ino() is not unique thus @@ -547,8 +548,6 @@ static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags) flags, &dma_buf_fops); if (IS_ERR(file)) goto err_alloc_file; - file->private_data = dmabuf; - file->f_path.dentry->d_fsdata = dmabuf; return file; @@ -614,19 +613,11 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) size_t alloc_size = sizeof(struct dma_buf); int ret; - if (!exp_info->resv) - alloc_size += sizeof(struct dma_resv); - else - /* prevent &dma_buf[1] == dma_buf->resv */ - alloc_size += 1; - - if (WARN_ON(!exp_info->priv - || !exp_info->ops - || !exp_info->ops->map_dma_buf - || !exp_info->ops->unmap_dma_buf - || !exp_info->ops->release)) { + if (WARN_ON(!exp_info->priv || !exp_info->ops + || !exp_info->ops->map_dma_buf + || !exp_info->ops->unmap_dma_buf + || !exp_info->ops->release)) return ERR_PTR(-EINVAL); - } if (WARN_ON(exp_info->ops->cache_sgt_mapping && (exp_info->ops->pin || exp_info->ops->unpin))) @@ -638,10 +629,21 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) if (!try_module_get(exp_info->owner)) return ERR_PTR(-ENOENT); + file = dma_buf_getfile(exp_info->size, exp_info->flags); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err_module; + } + + if (!exp_info->resv) + alloc_size += sizeof(struct dma_resv); + else + /* prevent &dma_buf[1] == dma_buf->resv */ + alloc_size += 1; dmabuf = kzalloc(alloc_size, GFP_KERNEL); if (!dmabuf) { ret = -ENOMEM; - goto err_module; + goto err_file; } dmabuf->priv = exp_info->priv; @@ -653,44 +655,36 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info) init_waitqueue_head(&dmabuf->poll); dmabuf->cb_in.poll = dmabuf->cb_out.poll = &dmabuf->poll; dmabuf->cb_in.active = dmabuf->cb_out.active = 0; - - if (!resv) { - resv = (struct dma_resv *)&dmabuf[1]; - dma_resv_init(resv); - } - dmabuf->resv = resv; - - file = dma_buf_getfile(dmabuf, exp_info->flags); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto err_dmabuf; - } - - dmabuf->file = file; - mutex_init(&dmabuf->lock); INIT_LIST_HEAD(&dmabuf->attachments); + if (!resv) { + dmabuf->resv = (struct dma_resv *)&dmabuf[1]; + dma_resv_init(dmabuf->resv); + } else { + dmabuf->resv = resv; + } + + ret = dma_buf_stats_setup(dmabuf, file); + if (ret) + goto err_dmabuf; + + file->private_data = dmabuf; + file->f_path.dentry->d_fsdata = dmabuf; + dmabuf->file = file; + mutex_lock(&db_list.lock); list_add(&dmabuf->list_node, &db_list.head); mutex_unlock(&db_list.lock); - ret = dma_buf_stats_setup(dmabuf); - if (ret) - goto err_sysfs; - return dmabuf; -err_sysfs: - /* - * Set file->f_path.dentry->d_fsdata to NULL so that when - * dma_buf_release() gets invoked by dentry_ops, it exits - * early before calling the release() dma_buf op. - */ - file->f_path.dentry->d_fsdata = NULL; - fput(file); err_dmabuf: + if (!resv) + dma_resv_fini(dmabuf->resv); kfree(dmabuf); +err_file: + fput(file); err_module: module_put(exp_info->owner); return ERR_PTR(ret); From d1607059375d5a3926ba73d00010ea1d96bbd0cc Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Wed, 14 Dec 2022 11:06:07 +0900 Subject: [PATCH 356/543] btrfs: fix trace event name typo for FLUSH_DELAYED_REFS [ Upstream commit 0a3212de8ab3e2ce5808c6265855e528d4a6767b ] Fix a typo of printing FLUSH_DELAYED_REFS event in flush_space() as FLUSH_ELAYED_REFS. Reviewed-by: Johannes Thumshirn Signed-off-by: Naohiro Aota Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- include/trace/events/btrfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index ed50e81174bf..5e10b5b1d16c 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -98,7 +98,7 @@ struct raid56_bio_trace_info; EM( FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT") \ EM( FLUSH_DELALLOC_FULL, "FLUSH_DELALLOC_FULL") \ EM( FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR") \ - EM( FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS") \ + EM( FLUSH_DELAYED_REFS, "FLUSH_DELAYED_REFS") \ EM( ALLOC_CHUNK, "ALLOC_CHUNK") \ EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \ EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \ From 77043645aa6c5f8b388c2d99b5f3164307acf448 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 13 Dec 2022 23:15:04 +0200 Subject: [PATCH 357/543] wifi: iwlwifi: fw: skip PPAG for JF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 1c4c0b28b517d778d37900deedfe91088839f07a ] For JF RFs we don't support PPAG, but many firmware images lie about it. Always skip support for JF to avoid firmware errors when sending the command. Reported-and-tested-by: Íñigo Huguet Link: https://lore.kernel.org/linux-wireless/CACT4oufQsqHGp6bah2c4+jPn2wG1oZqY=UKa_TmPx=F6Lxng8Q@mail.gmail.com Signed-off-by: Johannes Berg Signed-off-by: Gregory Greenman Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20221213225723.2a43415d8990.I9ac210740a45b41f1b2e15274e1daf4284f2808a@changeid Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index e6d64152c81a..a02e5a67b706 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -1106,6 +1106,11 @@ int iwl_read_ppag_table(struct iwl_fw_runtime *fwrt, union iwl_ppag_table_cmd *c int i, j, num_sub_bands; s8 *gain; + /* many firmware images for JF lie about this */ + if (CSR_HW_RFID_TYPE(fwrt->trans->hw_rf_id) == + CSR_HW_RFID_TYPE(CSR_HW_RF_ID_TYPE_JF)) + return -EOPNOTSUPP; + if (!fw_has_capa(&fwrt->fw->ucode_capa, IWL_UCODE_TLV_CAPA_SET_PPAG)) { IWL_DEBUG_RADIO(fwrt, "PPAG capability not supported by FW, command not sent.\n"); From 16e7fb3cc305272a26d2af19a72767f67fca5d7f Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 20 Dec 2022 12:31:29 -0500 Subject: [PATCH 358/543] pNFS/filelayout: Fix coalescing test for single DS [ Upstream commit a6b9d2fa0024e7e399c26facd0fb466b7396e2b9 ] When there is a single DS no striping constraints need to be placed on the IO. When such constraint is applied then buffered reads don't coalesce to the DS's rsize. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Signed-off-by: Sasha Levin --- fs/nfs/filelayout/filelayout.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index ad34a33b0737..4974cd18ca46 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -783,6 +783,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, return &fl->generic_hdr; } +static bool +filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg) +{ + return flseg->num_fh > 1; +} + /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * @@ -803,6 +809,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, size = pnfs_generic_pg_test(pgio, prev, req); if (!size) return 0; + else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg))) + return size; /* see if req and prev are in the same stripe */ if (prev) { From 2e5d5c4ae77dedc7eba0e496474ab93f05b25adf Mon Sep 17 00:00:00 2001 From: Hao Sun Date: Thu, 22 Dec 2022 10:44:14 +0800 Subject: [PATCH 359/543] selftests/bpf: check null propagation only neither reg is PTR_TO_BTF_ID [ Upstream commit cedebd74cf3883f0384af9ec26b4e6f8f1964dd4 ] Verify that nullness information is not porpagated in the branches of register to register JEQ and JNE operations if one of them is PTR_TO_BTF_ID. Implement this in C level so we can use CO-RE. Signed-off-by: Hao Sun Suggested-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20221222024414.29539-2-sunhao.th@gmail.com Signed-off-by: Martin KaFai Lau Signed-off-by: Sasha Levin --- .../bpf/prog_tests/jeq_infer_not_null.c | 9 ++++ .../bpf/progs/jeq_infer_not_null_fail.c | 42 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c create mode 100644 tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c diff --git a/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c new file mode 100644 index 000000000000..3add34df5767 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "jeq_infer_not_null_fail.skel.h" + +void test_jeq_infer_not_null(void) +{ + RUN_TESTS(jeq_infer_not_null_fail); +} diff --git a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c new file mode 100644 index 000000000000..f46965053acb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, u64); + __type(value, u64); +} m_hash SEC(".maps"); + +SEC("?raw_tp") +__failure __msg("R8 invalid mem access 'map_value_or_null") +int jeq_infer_not_null_ptr_to_btfid(void *ctx) +{ + struct bpf_map *map = (struct bpf_map *)&m_hash; + struct bpf_map *inner_map = map->inner_map_meta; + u64 key = 0, ret = 0, *val; + + val = bpf_map_lookup_elem(map, &key); + /* Do not mark ptr as non-null if one of them is + * PTR_TO_BTF_ID (R9), reject because of invalid + * access to map value (R8). + * + * Here, we need to inline those insns to access + * R8 directly, since compiler may use other reg + * once it figures out val==inner_map. + */ + asm volatile("r8 = %[val];\n" + "r9 = %[inner_map];\n" + "if r8 != r9 goto +1;\n" + "%[ret] = *(u64 *)(r8 +0);\n" + : [ret] "+r"(ret) + : [inner_map] "r"(inner_map), [val] "r"(val) + : "r8", "r9"); + + return ret; +} From a65644cef5dbba3b21be958e742702fb30524014 Mon Sep 17 00:00:00 2001 From: Anuradha Weeraman Date: Sun, 25 Dec 2022 23:12:22 +0530 Subject: [PATCH 360/543] net: ethernet: marvell: octeontx2: Fix uninitialized variable warning [ Upstream commit d3805695fe1e7383517903715cefc9bbdcffdc90 ] Fix for uninitialized variable warning. Addresses-Coverity: ("Uninitialized scalar variable") Signed-off-by: Anuradha Weeraman Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c index fa8029a94068..eb25e458266c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c @@ -589,7 +589,7 @@ int rvu_mbox_handler_mcs_free_resources(struct rvu *rvu, u16 pcifunc = req->hdr.pcifunc; struct mcs_rsrc_map *map; struct mcs *mcs; - int rc; + int rc = 0; if (req->mcs_id >= rvu->mcs_blk_cnt) return MCS_AF_ERR_INVALID_MCSID; From dc6466fcdefe0ec08f5b8192d40d5df89b96b5b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ricardo=20Ca=C3=B1uelo?= Date: Wed, 12 Oct 2022 08:29:49 +0200 Subject: [PATCH 361/543] tools/virtio: initialize spinlocks in vring_test.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c262f75cb6bb5a63828e72ce3b8fe808e5029479 ] The virtio_device vqs_list spinlocks must be initialized before use to prevent functions that manipulate the device virtualqueues, such as vring_new_virtqueue(), from blocking indefinitely. Signed-off-by: Ricardo Cañuelo Message-Id: <20221012062949.1526176-1-ricardo.canuelo@collabora.com> Signed-off-by: Michael S. Tsirkin Reviewed-by: Xuan Zhuo Signed-off-by: Sasha Levin --- tools/virtio/vringh_test.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index fa87b58bd5fa..98ff808d6f0c 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -308,6 +308,7 @@ static int parallel_test(u64 features, gvdev.vdev.features = features; INIT_LIST_HEAD(&gvdev.vdev.vqs); + spin_lock_init(&gvdev.vdev.vqs_list_lock); gvdev.to_host_fd = to_host[1]; gvdev.notifies = 0; @@ -455,6 +456,7 @@ int main(int argc, char *argv[]) getrange = getrange_iov; vdev.features = 0; INIT_LIST_HEAD(&vdev.vqs); + spin_lock_init(&vdev.vqs_list_lock); while (argv[1]) { if (strcmp(argv[1], "--indirect") == 0) From 6dbffca77f120cb70e0d5326571a81038282b309 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Nov 2022 15:17:53 +0200 Subject: [PATCH 362/543] vdpa/mlx5: Return error on vlan ctrl commands if not supported MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5aec804936bbff182081f1cdc271fcb76af1a4ff ] Check if VIRTIO_NET_F_CTRL_VLAN is negotiated and return error if control VQ command is received. Signed-off-by: Eli Cohen Message-Id: <20221114131759.57883-3-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eugenio Pérez Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 444d6572b2d0..b06260a37680 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1823,6 +1823,9 @@ static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd) size_t read; u16 id; + if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN))) + return status; + switch (cmd) { case VIRTIO_NET_CTRL_VLAN_ADD: read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan)); From 909b96557b38ea4bf99f5ab7dc8d6d339a96ab60 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Nov 2022 15:17:55 +0200 Subject: [PATCH 363/543] vdpa/mlx5: Avoid using reslock in event_handler [ Upstream commit 0dbc1b4ae07d003b2e88ba9d4142846320f8e349 ] event_handler runs under atomic context and may not acquire reslock. We can still guarantee that the handler won't be called after suspend by clearing nb_registered, unregistering the handler and flushing the workqueue. Signed-off-by: Eli Cohen Message-Id: <20221114131759.57883-5-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index b06260a37680..98dd8ce8af26 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2845,8 +2845,8 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev) int i; down_write(&ndev->reslock); - mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); ndev->nb_registered = false; + mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); flush_workqueue(ndev->mvdev.wq); for (i = 0; i < ndev->cur_num_vqs; i++) { mvq = &ndev->vqs[i]; @@ -3024,7 +3024,7 @@ static void update_carrier(struct work_struct *work) else ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); - if (ndev->config_cb.callback) + if (ndev->nb_registered && ndev->config_cb.callback) ndev->config_cb.callback(ndev->config_cb.private); kfree(wqent); @@ -3041,21 +3041,13 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p switch (eqe->sub_type) { case MLX5_PORT_CHANGE_SUBTYPE_DOWN: case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: - down_read(&ndev->reslock); - if (!ndev->nb_registered) { - up_read(&ndev->reslock); - return NOTIFY_DONE; - } wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); - if (!wqent) { - up_read(&ndev->reslock); + if (!wqent) return NOTIFY_DONE; - } wqent->mvdev = &ndev->mvdev; INIT_WORK(&wqent->work, update_carrier); queue_work(ndev->mvdev.wq, &wqent->work); - up_read(&ndev->reslock); ret = NOTIFY_OK; break; default: @@ -3242,8 +3234,8 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct workqueue_struct *wq; if (ndev->nb_registered) { - mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); ndev->nb_registered = false; + mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); } wq = mvdev->wq; mvdev->wq = NULL; From af3fca732e9ab84134547c0e36c5a453b2848ddb Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 14 Nov 2022 15:17:56 +0200 Subject: [PATCH 364/543] vdpa/mlx5: Avoid overwriting CVQ iotlb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 38fc462f57ef4e5dc722bab6824854b105de8aa2 ] When qemu uses different address spaces for data and control virtqueues, the current code would overwrite the control virtqueue iotlb through the dup_iotlb call. Fix this by referring to the address space identifier and the group to asid mapping to determine which mapping needs to be updated. We also move the address space logic from mlx5 net to core directory. Reported-by: Eugenio Pérez Signed-off-by: Eli Cohen Message-Id: <20221114131759.57883-6-elic@nvidia.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Acked-by: Eugenio Pérez Signed-off-by: Sasha Levin --- drivers/vdpa/mlx5/core/mlx5_vdpa.h | 5 +-- drivers/vdpa/mlx5/core/mr.c | 44 ++++++++++++++++----------- drivers/vdpa/mlx5/net/mlx5_vnet.c | 49 ++++++------------------------ 3 files changed, 39 insertions(+), 59 deletions(-) diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h index 6af9fdbb86b7..058fbe28107e 100644 --- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h +++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h @@ -116,8 +116,9 @@ int mlx5_vdpa_create_mkey(struct mlx5_vdpa_dev *mvdev, u32 *mkey, u32 *in, int inlen); int mlx5_vdpa_destroy_mkey(struct mlx5_vdpa_dev *mvdev, u32 mkey); int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - bool *change_map); -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb); + bool *change_map, unsigned int asid); +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + unsigned int asid); void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev); #define mlx5_vdpa_warn(__dev, format, ...) \ diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c index a639b9208d41..a4d7ee2339fa 100644 --- a/drivers/vdpa/mlx5/core/mr.c +++ b/drivers/vdpa/mlx5/core/mr.c @@ -511,7 +511,8 @@ out: mutex_unlock(&mr->mkey_mtx); } -static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, unsigned int asid) { struct mlx5_vdpa_mr *mr = &mvdev->mr; int err; @@ -519,42 +520,49 @@ static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb if (mr->initialized) return 0; - if (iotlb) - err = create_user_mr(mvdev, iotlb); - else - err = create_dma_mr(mvdev, mr); + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { + if (iotlb) + err = create_user_mr(mvdev, iotlb); + else + err = create_dma_mr(mvdev, mr); - if (err) - return err; + if (err) + return err; + } - err = dup_iotlb(mvdev, iotlb); - if (err) - goto out_err; + if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) { + err = dup_iotlb(mvdev, iotlb); + if (err) + goto out_err; + } mr->initialized = true; return 0; out_err: - if (iotlb) - destroy_user_mr(mvdev, mr); - else - destroy_dma_mr(mvdev, mr); + if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { + if (iotlb) + destroy_user_mr(mvdev, mr); + else + destroy_dma_mr(mvdev, mr); + } return err; } -int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + unsigned int asid) { int err; mutex_lock(&mvdev->mr.mkey_mtx); - err = _mlx5_vdpa_create_mr(mvdev, iotlb); + err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); mutex_unlock(&mvdev->mr.mkey_mtx); return err; } int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, - bool *change_map) + bool *change_map, unsigned int asid) { struct mlx5_vdpa_mr *mr = &mvdev->mr; int err = 0; @@ -566,7 +574,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io *change_map = true; } if (!*change_map) - err = _mlx5_vdpa_create_mr(mvdev, iotlb); + err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid); mutex_unlock(&mr->mkey_mtx); return err; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 98dd8ce8af26..3a6dbbc6440d 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2394,7 +2394,8 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev) } } -static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, + struct vhost_iotlb *iotlb, unsigned int asid) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); int err; @@ -2406,7 +2407,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb teardown_driver(ndev); mlx5_vdpa_destroy_mr(mvdev); - err = mlx5_vdpa_create_mr(mvdev, iotlb); + err = mlx5_vdpa_create_mr(mvdev, iotlb, asid); if (err) goto err_mr; @@ -2587,7 +2588,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev) ++mvdev->generation; if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { - if (mlx5_vdpa_create_mr(mvdev, NULL)) + if (mlx5_vdpa_create_mr(mvdev, NULL, 0)) mlx5_vdpa_warn(mvdev, "create MR failed\n"); } up_write(&ndev->reslock); @@ -2623,41 +2624,20 @@ static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev) return mvdev->generation; } -static int set_map_control(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) -{ - u64 start = 0ULL, last = 0ULL - 1; - struct vhost_iotlb_map *map; - int err = 0; - - spin_lock(&mvdev->cvq.iommu_lock); - vhost_iotlb_reset(mvdev->cvq.iotlb); - - for (map = vhost_iotlb_itree_first(iotlb, start, last); map; - map = vhost_iotlb_itree_next(map, start, last)) { - err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, - map->last, map->addr, map->perm); - if (err) - goto out; - } - -out: - spin_unlock(&mvdev->cvq.iommu_lock); - return err; -} - -static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb) +static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb, + unsigned int asid) { bool change_map; int err; - err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); + err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid); if (err) { mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); return err; } if (change_map) - err = mlx5_vdpa_change_map(mvdev, iotlb); + err = mlx5_vdpa_change_map(mvdev, iotlb, asid); return err; } @@ -2670,16 +2650,7 @@ static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid, int err = -EINVAL; down_write(&ndev->reslock); - if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) { - err = set_map_data(mvdev, iotlb); - if (err) - goto out; - } - - if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) - err = set_map_control(mvdev, iotlb); - -out: + err = set_map_data(mvdev, iotlb, asid); up_write(&ndev->reslock); return err; } @@ -3182,7 +3153,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, goto err_mpfs; if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) { - err = mlx5_vdpa_create_mr(mvdev, NULL); + err = mlx5_vdpa_create_mr(mvdev, NULL, 0); if (err) goto err_res; } From ab23a6dbfc7232a6d5b6baff781ad1f3d32dbdb0 Mon Sep 17 00:00:00 2001 From: Angus Chen Date: Tue, 1 Nov 2022 19:16:54 +0800 Subject: [PATCH 365/543] virtio_pci: modify ENOENT to EINVAL [ Upstream commit b66ead2d0ecac00c3a06a6218af5411cb5fcb5d5 ] Virtio_crypto use max_data_queues+1 to setup vqs, we use vp_modern_get_num_queues to protect the vq range in setup_vq. We could enter index >= vp_modern_get_num_queues(mdev) in setup_vq if common->num_queues is not set well,and it return -ENOENT. It is better to use -EINVAL instead. Signed-off-by: Angus Chen Message-Id: <20221101111655.1947-1-angus.chen@jaguarmicro.com> Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_pci_modern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index c3b9f2761849..edf2e18014cd 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -303,7 +303,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, int err; if (index >= vp_modern_get_num_queues(mdev)) - return ERR_PTR(-ENOENT); + return ERR_PTR(-EINVAL); /* Check if queue is either not available or already active. */ num = vp_modern_get_queue_size(mdev, index); From c6c6af90101f9f7dd2dc0330f48497cc55b37941 Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 28 Nov 2022 07:57:15 -0800 Subject: [PATCH 366/543] vduse: Validate vq_num in vduse_validate_config() [ Upstream commit 937c783aa3d8d77963ec91918d3298edb45b9161 ] Add a limit to 'config->vq_num' which is user controlled data which comes from an vduse_ioctl to prevent large memory allocations. Micheal says - This limit is somewhat arbitrary. However, currently virtio pci and ccw are limited to a 16 bit vq number. While MMIO isn't it is also isn't used with lots of VQs due to current lack of support for per-vq interrupts. Thus, the 0xffff limit on number of VQs corresponding to a 16-bit VQ number seems sufficient for now. This is found using static analysis with smatch. Suggested-by: Michael S. Tsirkin Signed-off-by: Harshit Mogalapalli Message-Id: <20221128155717.2579992-1-harshit.m.mogalapalli@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vdpa/vdpa_user/vduse_dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 35dceee3ed56..31017ebc4d7c 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1440,6 +1440,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config) if (config->config_size > PAGE_SIZE) return false; + if (config->vq_num > 0xffff) + return false; + if (!device_is_allowed(config->device_id)) return false; From 11891c80f1a9ea51ab15c0ce019977bcd3e229d3 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 14 Dec 2022 13:43:06 +0800 Subject: [PATCH 367/543] vdpa_sim_net: should not drop the multicast/broadcast packet [ Upstream commit 72455a1142527e607e1d69439f3ffa2ef6d09e26 ] In the receive_filter(), should not drop the packet with the broadcast/multicast address. Add the check for this Signed-off-by: Cindy Lu Message-Id: <20221214054306.24145-1-lulu@redhat.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c index 11f5a121df24..584b975a98a7 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -62,6 +62,9 @@ static bool receive_filter(struct vdpasim *vdpasim, size_t len) if (len < ETH_ALEN + hdr_len) return false; + if (is_broadcast_ether_addr(vdpasim->buffer + hdr_len) || + is_multicast_ether_addr(vdpasim->buffer + hdr_len)) + return true; if (!strncmp(vdpasim->buffer + hdr_len, vio_config->mac, ETH_ALEN)) return true; From 27b5107850688d27a5621ad954de14eee7279c76 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Mon, 26 Dec 2022 14:48:23 +0300 Subject: [PATCH 368/543] net/ethtool/ioctl: return -EOPNOTSUPP if we have no phy stats [ Upstream commit 9deb1e9fb88b1120a908676fa33bdf9e2eeaefce ] It's not very useful to copy back an empty ethtool_stats struct and return 0 if we didn't actually have any stats. This also allows for further simplification of this function in the future commits. Signed-off-by: Daniil Tatianin Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ethtool/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 81fe2422fe58..038398d41a93 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2094,7 +2094,8 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr) return n_stats; if (n_stats > S32_MAX / sizeof(u64)) return -ENOMEM; - WARN_ON_ONCE(!n_stats); + if (WARN_ON_ONCE(!n_stats)) + return -EOPNOTSUPP; if (copy_from_user(&stats, useraddr, sizeof(stats))) return -EFAULT; From 315a779c838d3dbc1a645069d6f028bc1e953f73 Mon Sep 17 00:00:00 2001 From: Chunhao Lin Date: Mon, 26 Dec 2022 20:31:52 +0800 Subject: [PATCH 369/543] r8169: move rtl_wol_enable_rx() and rtl_prepare_power_down() [ Upstream commit ad425666a1f05d9b215a84cf010c3789b2ea8206 ] There is no functional change. Moving these two functions for following patch "r8169: fix dmar pte write access is not set error". Signed-off-by: Chunhao Lin Reviewed-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 44 +++++++++++------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index fe8dc8e0522b..316c925c956f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2207,28 +2207,6 @@ static int rtl_set_mac_address(struct net_device *dev, void *p) return 0; } -static void rtl_wol_enable_rx(struct rtl8169_private *tp) -{ - if (tp->mac_version >= RTL_GIGA_MAC_VER_25) - RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) | - AcceptBroadcast | AcceptMulticast | AcceptMyPhys); -} - -static void rtl_prepare_power_down(struct rtl8169_private *tp) -{ - if (tp->dash_type != RTL_DASH_NONE) - return; - - if (tp->mac_version == RTL_GIGA_MAC_VER_32 || - tp->mac_version == RTL_GIGA_MAC_VER_33) - rtl_ephy_write(tp, 0x19, 0xff64); - - if (device_may_wakeup(tp_to_dev(tp))) { - phy_speed_down(tp->phydev, false); - rtl_wol_enable_rx(tp); - } -} - static void rtl_init_rxcfg(struct rtl8169_private *tp) { switch (tp->mac_version) { @@ -2452,6 +2430,28 @@ static void rtl_enable_rxdvgate(struct rtl8169_private *tp) rtl_wait_txrx_fifo_empty(tp); } +static void rtl_wol_enable_rx(struct rtl8169_private *tp) +{ + if (tp->mac_version >= RTL_GIGA_MAC_VER_25) + RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) | + AcceptBroadcast | AcceptMulticast | AcceptMyPhys); +} + +static void rtl_prepare_power_down(struct rtl8169_private *tp) +{ + if (tp->dash_type != RTL_DASH_NONE) + return; + + if (tp->mac_version == RTL_GIGA_MAC_VER_32 || + tp->mac_version == RTL_GIGA_MAC_VER_33) + rtl_ephy_write(tp, 0x19, 0xff64); + + if (device_may_wakeup(tp_to_dev(tp))) { + phy_speed_down(tp->phydev, false); + rtl_wol_enable_rx(tp); + } +} + static void rtl_set_tx_config_registers(struct rtl8169_private *tp) { u32 val = TX_DMA_BURST << TxDMAShift | From af0bedfbcb81412bb95738937267744f45358dee Mon Sep 17 00:00:00 2001 From: Chunhao Lin Date: Mon, 26 Dec 2022 20:31:53 +0800 Subject: [PATCH 370/543] r8169: fix dmar pte write access is not set error [ Upstream commit bb41c13c05c23d9bc46b4e37d8914078c6a40e3a ] When close device, if wol is enabled, rx will be enabled. When open device it will cause rx packet to be dma to the wrong memory address after pci_set_master() and system log will show blow messages. DMAR: DRHD: handling fault status reg 3 DMAR: [DMA Write] Request device [02:00.0] PASID ffffffff fault addr ffdd4000 [fault reason 05] PTE Write access is not set In this patch, driver disable tx/rx when close device. If wol is enabled, only enable rx filter and disable rxdv_gate(if support) to let hardware only receive packet to fifo but not to dma it. Signed-off-by: Chunhao Lin Reviewed-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 316c925c956f..cabed1b7b45e 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2435,6 +2435,9 @@ static void rtl_wol_enable_rx(struct rtl8169_private *tp) if (tp->mac_version >= RTL_GIGA_MAC_VER_25) RTL_W32(tp, RxConfig, RTL_R32(tp, RxConfig) | AcceptBroadcast | AcceptMulticast | AcceptMyPhys); + + if (tp->mac_version >= RTL_GIGA_MAC_VER_40) + rtl_disable_rxdvgate(tp); } static void rtl_prepare_power_down(struct rtl8169_private *tp) @@ -3869,7 +3872,7 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp) netdev_reset_queue(tp->dev); } -static void rtl8169_cleanup(struct rtl8169_private *tp, bool going_down) +static void rtl8169_cleanup(struct rtl8169_private *tp) { napi_disable(&tp->napi); @@ -3881,9 +3884,6 @@ static void rtl8169_cleanup(struct rtl8169_private *tp, bool going_down) rtl_rx_close(tp); - if (going_down && tp->dev->wol_enabled) - goto no_reset; - switch (tp->mac_version) { case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: @@ -3904,7 +3904,7 @@ static void rtl8169_cleanup(struct rtl8169_private *tp, bool going_down) } rtl_hw_reset(tp); -no_reset: + rtl8169_tx_clear(tp); rtl8169_init_ring_indexes(tp); } @@ -3915,7 +3915,7 @@ static void rtl_reset_work(struct rtl8169_private *tp) netif_stop_queue(tp->dev); - rtl8169_cleanup(tp, false); + rtl8169_cleanup(tp); for (i = 0; i < NUM_RX_DESC; i++) rtl8169_mark_to_asic(tp->RxDescArray + i); @@ -4601,7 +4601,7 @@ static void rtl8169_down(struct rtl8169_private *tp) pci_clear_master(tp->pci_dev); rtl_pci_commit(tp); - rtl8169_cleanup(tp, true); + rtl8169_cleanup(tp); rtl_disable_exit_l1(tp); rtl_prepare_power_down(tp); } From 6c27fc15747967e350fa9f253a034ff03e943ccf Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Fri, 16 Dec 2022 14:18:54 -0800 Subject: [PATCH 371/543] bpf: keep a reference to the mm, in case the task is dead. [ Upstream commit 7ff94f276f8ea05df82eb115225e9b26f47a3347 ] Fix the system crash that happens when a task iterator travel through vma of tasks. In task iterators, we used to access mm by following the pointer on the task_struct; however, the death of a task will clear the pointer, even though we still hold the task_struct. That can cause an unexpected crash for a null pointer when an iterator is visiting a task that dies during the visit. Keeping a reference of mm on the iterator ensures we always have a valid pointer to mm. Co-developed-by: Song Liu Signed-off-by: Song Liu Signed-off-by: Kui-Feng Lee Reported-by: Nathan Slingerland Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20221216221855.4122288-2-kuifeng@meta.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/task_iter.c | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index c2a2182ce570..c4ab9d6cdbe9 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -438,6 +438,7 @@ struct bpf_iter_seq_task_vma_info { */ struct bpf_iter_seq_task_common common; struct task_struct *task; + struct mm_struct *mm; struct vm_area_struct *vma; u32 tid; unsigned long prev_vm_start; @@ -456,16 +457,19 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) enum bpf_task_vma_iter_find_op op; struct vm_area_struct *curr_vma; struct task_struct *curr_task; + struct mm_struct *curr_mm; u32 saved_tid = info->tid; /* If this function returns a non-NULL vma, it holds a reference to - * the task_struct, and holds read lock on vma->mm->mmap_lock. + * the task_struct, holds a refcount on mm->mm_users, and holds + * read lock on vma->mm->mmap_lock. * If this function returns NULL, it does not hold any reference or * lock. */ if (info->task) { curr_task = info->task; curr_vma = info->vma; + curr_mm = info->mm; /* In case of lock contention, drop mmap_lock to unblock * the writer. * @@ -504,13 +508,15 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) * 4.2) VMA2 and VMA2' covers different ranges, process * VMA2'. */ - if (mmap_lock_is_contended(curr_task->mm)) { + if (mmap_lock_is_contended(curr_mm)) { info->prev_vm_start = curr_vma->vm_start; info->prev_vm_end = curr_vma->vm_end; op = task_vma_iter_find_vma; - mmap_read_unlock(curr_task->mm); - if (mmap_read_lock_killable(curr_task->mm)) + mmap_read_unlock(curr_mm); + if (mmap_read_lock_killable(curr_mm)) { + mmput(curr_mm); goto finish; + } } else { op = task_vma_iter_next_vma; } @@ -535,42 +541,47 @@ again: op = task_vma_iter_find_vma; } - if (!curr_task->mm) + curr_mm = get_task_mm(curr_task); + if (!curr_mm) goto next_task; - if (mmap_read_lock_killable(curr_task->mm)) + if (mmap_read_lock_killable(curr_mm)) { + mmput(curr_mm); goto finish; + } } switch (op) { case task_vma_iter_first_vma: - curr_vma = find_vma(curr_task->mm, 0); + curr_vma = find_vma(curr_mm, 0); break; case task_vma_iter_next_vma: - curr_vma = find_vma(curr_task->mm, curr_vma->vm_end); + curr_vma = find_vma(curr_mm, curr_vma->vm_end); break; case task_vma_iter_find_vma: /* We dropped mmap_lock so it is necessary to use find_vma * to find the next vma. This is similar to the mechanism * in show_smaps_rollup(). */ - curr_vma = find_vma(curr_task->mm, info->prev_vm_end - 1); + curr_vma = find_vma(curr_mm, info->prev_vm_end - 1); /* case 1) and 4.2) above just use curr_vma */ /* check for case 2) or case 4.1) above */ if (curr_vma && curr_vma->vm_start == info->prev_vm_start && curr_vma->vm_end == info->prev_vm_end) - curr_vma = find_vma(curr_task->mm, curr_vma->vm_end); + curr_vma = find_vma(curr_mm, curr_vma->vm_end); break; } if (!curr_vma) { /* case 3) above, or case 2) 4.1) with vma->next == NULL */ - mmap_read_unlock(curr_task->mm); + mmap_read_unlock(curr_mm); + mmput(curr_mm); goto next_task; } info->task = curr_task; info->vma = curr_vma; + info->mm = curr_mm; return curr_vma; next_task: @@ -579,6 +590,7 @@ next_task: put_task_struct(curr_task); info->task = NULL; + info->mm = NULL; info->tid++; goto again; @@ -587,6 +599,7 @@ finish: put_task_struct(curr_task); info->task = NULL; info->vma = NULL; + info->mm = NULL; return NULL; } @@ -658,7 +671,9 @@ static void task_vma_seq_stop(struct seq_file *seq, void *v) */ info->prev_vm_start = ~0UL; info->prev_vm_end = info->vma->vm_end; - mmap_read_unlock(info->task->mm); + mmap_read_unlock(info->mm); + mmput(info->mm); + info->mm = NULL; put_task_struct(info->task); info->task = NULL; } From 307e8d9eef653b0157073b65819415b778c220ad Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 12 Dec 2022 13:04:11 +0100 Subject: [PATCH 372/543] RDMA/srp: Move large values to a new enum for gcc13 [ Upstream commit 56c5dab20a6391604df9521f812c01d1e3fe1bd0 ] Since gcc13, each member of an enum has the same type as the enum [1]. And that is inherited from its members. Provided these two: SRP_TAG_NO_REQ = ~0U, SRP_TAG_TSK_MGMT = 1U << 31 all other members are unsigned ints. Esp. with SRP_MAX_SGE and SRP_TSK_MGMT_SQ_SIZE and their use in min(), this results in the following warnings: include/linux/minmax.h:20:35: error: comparison of distinct pointer types lacks a cast drivers/infiniband/ulp/srp/ib_srp.c:563:42: note: in expansion of macro 'min' include/linux/minmax.h:20:35: error: comparison of distinct pointer types lacks a cast drivers/infiniband/ulp/srp/ib_srp.c:2369:27: note: in expansion of macro 'min' So move the large values away to a separate enum, so that they don't affect other members. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36113 Link: https://lore.kernel.org/r/20221212120411.13750-1-jirislaby@kernel.org Signed-off-by: Jiri Slaby (SUSE) Reviewed-by: Bart Van Assche Signed-off-by: Leon Romanovsky Signed-off-by: Sasha Levin --- drivers/infiniband/ulp/srp/ib_srp.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 00b0068fda20..5d94db453df3 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -62,9 +62,6 @@ enum { SRP_DEFAULT_CMD_SQ_SIZE = SRP_DEFAULT_QUEUE_SIZE - SRP_RSP_SQ_SIZE - SRP_TSK_MGMT_SQ_SIZE, - SRP_TAG_NO_REQ = ~0U, - SRP_TAG_TSK_MGMT = 1U << 31, - SRP_MAX_PAGES_PER_MR = 512, SRP_MAX_ADD_CDB_LEN = 16, @@ -79,6 +76,11 @@ enum { sizeof(struct srp_imm_buf), }; +enum { + SRP_TAG_NO_REQ = ~0U, + SRP_TAG_TSK_MGMT = BIT(31), +}; + enum srp_target_state { SRP_TARGET_SCANNING, SRP_TARGET_LIVE, From 5744ba445c9a129514b0d29da1a81de9d1edaa9e Mon Sep 17 00:00:00 2001 From: Po-Hsu Lin Date: Thu, 29 Dec 2022 13:41:06 +0800 Subject: [PATCH 373/543] selftests: net: fix cmsg_so_mark.sh test hang [ Upstream commit 1573c6882018f69991aead951d09423ce978adac ] This cmsg_so_mark.sh test will hang on non-amd64 systems because of the infinity loop for argument parsing in cmsg_sender. Variable "o" in cs_parse_args() for taking getopt() should be an int, otherwise it will be 255 when getopt() returns -1 on non-amd64 system and thus causing infinity loop. Link: https://lore.kernel.org/lkml/CA+G9fYsM2k7mrF7W4V_TrZ-qDauWM394=8yEJ=-t1oUg8_40YA@mail.gmail.com/t/ Signed-off-by: Po-Hsu Lin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/cmsg_sender.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 75dd83e39207..24b21b15ed3f 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -110,7 +110,7 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) static void cs_parse_args(int argc, char *argv[]) { - char o; + int o; while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) { switch (o) { From 853ffa1511b058c79a4c9bb1407b3b20ce311792 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 26 Dec 2022 09:00:40 +0800 Subject: [PATCH 374/543] btrfs: always report error in run_one_delayed_ref() [ Upstream commit 39f501d68ec1ed5cd5c66ac6ec2a7131c517bb92 ] Currently we have a btrfs_debug() for run_one_delayed_ref() failure, but if end users hit such problem, there will be no chance that btrfs_debug() is enabled. This can lead to very little useful info for debugging. This patch will: - Add extra info for error reporting Including: * logical bytenr * num_bytes * type * action * ref_mod - Replace the btrfs_debug() with btrfs_err() - Move the error reporting into run_one_delayed_ref() This is to avoid use-after-free, the @node can be freed in the caller. This error should only be triggered at most once. As if run_one_delayed_ref() failed, we trigger the error message, then causing the call chain to error out: btrfs_run_delayed_refs() `- btrfs_run_delayed_refs() `- btrfs_run_delayed_refs_for_head() `- run_one_delayed_ref() And we will abort the current transaction in btrfs_run_delayed_refs(). If we have to run delayed refs for the abort transaction, run_one_delayed_ref() will just cleanup the refs and do nothing, thus no new error messages would be output. Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent-tree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2801c991814f..571fcc5ae4dc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1706,6 +1706,11 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, BUG(); if (ret && insert_reserved) btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1); + if (ret < 0) + btrfs_err(trans->fs_info, +"failed to run delayed ref for logical %llu num_bytes %llu type %u action %u ref_mod %d: %d", + node->bytenr, node->num_bytes, node->type, + node->action, node->ref_mod, ret); return ret; } @@ -1947,8 +1952,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans, if (ret) { unselect_delayed_ref_head(delayed_refs, locked_ref); btrfs_put_delayed_ref(ref); - btrfs_debug(fs_info, "run_one_delayed_ref returned %d", - ret); return ret; } From 8ca718778b77d5cc207890e03df67f8f630f6eaf Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 3 Jan 2023 10:24:11 -0500 Subject: [PATCH 375/543] x86/asm: Fix an assembler warning with current binutils [ Upstream commit 55d235361fccef573990dfa5724ab453866e7816 ] Fix a warning: "found `movsd'; assuming `movsl' was meant" Signed-off-by: Mikulas Patocka Signed-off-by: Ingo Molnar Cc: linux-kernel@vger.kernel.org Signed-off-by: Sasha Levin --- arch/x86/lib/iomap_copy_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S index a1f9416bf67a..6ff2f56cb0f7 100644 --- a/arch/x86/lib/iomap_copy_64.S +++ b/arch/x86/lib/iomap_copy_64.S @@ -10,6 +10,6 @@ */ SYM_FUNC_START(__iowrite32_copy) movl %edx,%ecx - rep movsd + rep movsl RET SYM_FUNC_END(__iowrite32_copy) From 1c38cdc747f00daf7394535eae5afc4c503c59bb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Dec 2022 16:14:10 -0800 Subject: [PATCH 376/543] f2fs: let's avoid panic if extent_tree is not created [ Upstream commit df9d44b645b83fffccfb4e28c1f93376585fdec8 ] This patch avoids the below panic. pc : __lookup_extent_tree+0xd8/0x760 lr : f2fs_do_write_data_page+0x104/0x87c sp : ffffffc010cbb3c0 x29: ffffffc010cbb3e0 x28: 0000000000000000 x27: ffffff8803e7f020 x26: ffffff8803e7ed40 x25: ffffff8803e7f020 x24: ffffffc010cbb460 x23: ffffffc010cbb480 x22: 0000000000000000 x21: 0000000000000000 x20: ffffffff22e90900 x19: 0000000000000000 x18: ffffffc010c5d080 x17: 0000000000000000 x16: 0000000000000020 x15: ffffffdb1acdbb88 x14: ffffff888759e2b0 x13: 0000000000000000 x12: ffffff802da49000 x11: 000000000a001200 x10: ffffff8803e7ed40 x9 : ffffff8023195800 x8 : ffffff802da49078 x7 : 0000000000000001 x6 : 0000000000000000 x5 : 0000000000000006 x4 : ffffffc010cbba28 x3 : 0000000000000000 x2 : ffffffc010cbb480 x1 : 0000000000000000 x0 : ffffff8803e7ed40 Call trace: __lookup_extent_tree+0xd8/0x760 f2fs_do_write_data_page+0x104/0x87c f2fs_write_single_data_page+0x420/0xb60 f2fs_write_cache_pages+0x418/0xb1c __f2fs_write_data_pages+0x428/0x58c f2fs_write_data_pages+0x30/0x40 do_writepages+0x88/0x190 __writeback_single_inode+0x48/0x448 writeback_sb_inodes+0x468/0x9e8 __writeback_inodes_wb+0xb8/0x2a4 wb_writeback+0x33c/0x740 wb_do_writeback+0x2b4/0x400 wb_workfn+0xe4/0x34c process_one_work+0x24c/0x5bc worker_thread+0x3e8/0xa50 kthread+0x150/0x1b4 Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/extent_cache.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 932c070173b9..6c9e6f78a3e3 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -415,7 +415,8 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_node *en; bool ret = false; - f2fs_bug_on(sbi, !et); + if (!et) + return false; trace_f2fs_lookup_extent_tree_start(inode, pgofs); From 66c8b7b6e4bd159009231f6148476894163bec30 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 28 Dec 2022 06:34:54 -0500 Subject: [PATCH 377/543] perf/x86/rapl: Treat Tigerlake like Icelake [ Upstream commit c07311b5509f6035f1dd828db3e90ff4859cf3b9 ] Since Tigerlake seems to have inherited its cstates and other RAPL power caps from Icelake, assume it also follows Icelake for its RAPL events. Signed-off-by: Chris Wilson Signed-off-by: Rodrigo Vivi Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Zhang Rui Link: https://lore.kernel.org/r/20221228113454.1199118-1-rodrigo.vivi@intel.com Signed-off-by: Sasha Levin --- arch/x86/events/rapl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index a829492bca4c..ae5779ea4417 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -800,6 +800,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl), From 060266625604b34080fc92c538cc9087f16d8b5f Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 29 Dec 2022 12:33:56 -0300 Subject: [PATCH 378/543] cifs: fix race in assemble_neg_contexts() [ Upstream commit 775e44d6d86dca400d614cbda5dab4def4951fe7 ] Serialise access of TCP_Server_Info::hostname in assemble_neg_contexts() by holding the server's mutex otherwise it might end up accessing an already-freed hostname pointer from cifs_reconnect() or cifs_resolve_server(). Signed-off-by: Paulo Alcantara (SUSE) Reviewed-by: Enzo Matsumiya Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/smb2pdu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 4ac5b1bfaf78..727f16b426be 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -541,9 +541,10 @@ static void assemble_neg_contexts(struct smb2_negotiate_req *req, struct TCP_Server_Info *server, unsigned int *total_len) { - char *pneg_ctxt; - char *hostname = NULL; unsigned int ctxt_len, neg_context_count; + struct TCP_Server_Info *pserver; + char *pneg_ctxt; + char *hostname; if (*total_len > 200) { /* In case length corrupted don't want to overrun smb buffer */ @@ -574,8 +575,9 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, * secondary channels don't have the hostname field populated * use the hostname field in the primary channel instead */ - hostname = CIFS_SERVER_IS_CHAN(server) ? - server->primary_server->hostname : server->hostname; + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + cifs_server_lock(pserver); + hostname = pserver->hostname; if (hostname && (hostname[0] != 0)) { ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt, hostname); @@ -584,6 +586,7 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, neg_context_count = 3; } else neg_context_count = 2; + cifs_server_unlock(pserver); build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt); *total_len += sizeof(struct smb2_posix_neg_context); From 6fb614c68406fc4dba35a85676ae4ca5360077ce Mon Sep 17 00:00:00 2001 From: Aaron Thompson Date: Wed, 4 Jan 2023 10:07:37 +0000 Subject: [PATCH 379/543] memblock tests: Fix compilation error. [ Upstream commit 340726747336716350eb5a928b860a29db955f05 ] Commit cf4694be2b2cf ("tools: Add atomic_test_and_set_bit()") changed tools/arch/x86/include/asm/atomic.h to include , which causes 'make -C tools/testing/memblock' to fail with: In file included from ../../include/asm/atomic.h:6, from ../../include/linux/atomic.h:5, from ./linux/mmzone.h:5, from ../../include/linux/mm.h:5, from ../../include/linux/pfn.h:5, from ./linux/memory_hotplug.h:6, from ./linux/init.h:7, from ./linux/memblock.h:11, from tests/common.h:8, from tests/basic_api.h:5, from main.c:2: ../../include/asm/../../arch/x86/include/asm/atomic.h:11:10: fatal error: asm/asm.h: No such file or directory 11 | #include | ^~~~~~~~~~~ Create a symlink to asm/asm.h in the same manner as the existing one to asm/cmpxchg.h. Signed-off-by: Aaron Thompson Link: https://lore.kernel.org/r/010101857c402765-96e2dbc6-b82b-47e2-a437-4834dbe0b96b-000000@us-west-2.amazonses.com Signed-off-by: Mike Rapoport (IBM) Signed-off-by: Sasha Levin --- tools/testing/memblock/.gitignore | 1 + tools/testing/memblock/Makefile | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/memblock/.gitignore b/tools/testing/memblock/.gitignore index 654338e0be52..4cc7cd5aac2b 100644 --- a/tools/testing/memblock/.gitignore +++ b/tools/testing/memblock/.gitignore @@ -1,4 +1,5 @@ main memblock.c linux/memblock.h +asm/asm.h asm/cmpxchg.h diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile index 246f7ac8489b..575e98fddc21 100644 --- a/tools/testing/memblock/Makefile +++ b/tools/testing/memblock/Makefile @@ -29,13 +29,14 @@ include: ../../../include/linux/memblock.h ../../include/linux/*.h \ @mkdir -p linux test -L linux/memblock.h || ln -s ../../../../include/linux/memblock.h linux/memblock.h + test -L asm/asm.h || ln -s ../../../arch/x86/include/asm/asm.h asm/asm.h test -L asm/cmpxchg.h || ln -s ../../../arch/x86/include/asm/cmpxchg.h asm/cmpxchg.h memblock.c: $(EXTR_SRC) test -L memblock.c || ln -s $(EXTR_SRC) memblock.c clean: - $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/cmpxchg.h + $(RM) $(TARGETS) $(OFILES) linux/memblock.h memblock.c asm/asm.h asm/cmpxchg.h help: @echo 'Memblock simulator' From f5f3c924f4fdc87ac4a9ae5a0cb8e013a98dd8a5 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 4 Jan 2023 22:58:30 +0800 Subject: [PATCH 380/543] perf/x86/rapl: Add support for Intel Meteor Lake [ Upstream commit f52853a668bfeddd79f319d536a506f68cc2b478 ] Meteor Lake RAPL support is the same as previous Sky Lake. Add Meteor Lake model for RAPL. Signed-off-by: Zhang Rui Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230104145831.25498-1-rui.zhang@intel.com Signed-off-by: Sasha Levin --- arch/x86/events/rapl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index ae5779ea4417..589c6885560d 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -809,6 +809,8 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &model_skl), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); From 3848b6b2a696b16934857b98af21b219dc74cb5d Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 4 Jan 2023 22:58:31 +0800 Subject: [PATCH 381/543] perf/x86/rapl: Add support for Intel Emerald Rapids [ Upstream commit 57512b57dcfaf63c52d8ad2fb35321328cde31b0 ] Emerald Rapids RAPL support is the same as previous Sapphire Rapids. Add Emerald Rapids model for RAPL. Signed-off-by: Zhang Rui Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230104145831.25498-2-rui.zhang@intel.com Signed-off-by: Sasha Levin --- arch/x86/events/rapl.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 589c6885560d..52e6e7ed4f78 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -806,6 +806,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &model_skl), From b019db2fcf09bc087bd46cf4178256aac5538ab4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 3 Jan 2023 12:00:32 -0600 Subject: [PATCH 382/543] of: fdt: Honor CONFIG_CMDLINE* even without /chosen node, take 2 [ Upstream commit 064e32dc5b03114d0767893fecdaf7b5dfd8c286 ] I do not read a strict requirement on /chosen node in either ePAPR or in Documentation/devicetree. Help text for CONFIG_CMDLINE and CONFIG_CMDLINE_EXTEND doesn't make their behavior explicitly dependent on the presence of /chosen or the presense of /chosen/bootargs. However the early check for /chosen and bailing out in early_init_dt_scan_chosen() skips CONFIG_CMDLINE handling which is not really related to /chosen node or the particular method of passing cmdline from bootloader. This leads to counterintuitive combinations (assuming CONFIG_CMDLINE_EXTEND=y): a) bootargs="foo", CONFIG_CMDLINE="bar" => cmdline=="foo bar" b) /chosen missing, CONFIG_CMDLINE="bar" => cmdline=="" c) bootargs="", CONFIG_CMDLINE="bar" => cmdline==" bar" Rework early_init_dt_scan_chosen() so that the cmdline config options are always handled. [commit msg written by Alexander Sverdlin] Cc: Alexander Sverdlin Cc: Linus Walleij Cc: Arnd Bergmann Tested-by: Geoff Levand Reviewed-by: Alexander Sverdlin Link: https://lore.kernel.org/r/20230103-dt-cmdline-fix-v1-2-7038e88b18b6@kernel.org Signed-off-by: Rob Herring Signed-off-by: Sasha Levin --- drivers/of/fdt.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 4f88e8bbdd27..f08b25195ae7 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1163,18 +1163,32 @@ int __init early_init_dt_scan_chosen(char *cmdline) if (node < 0) node = fdt_path_offset(fdt, "/chosen@0"); if (node < 0) - return -ENOENT; + /* Handle the cmdline config options even if no /chosen node */ + goto handle_cmdline; chosen_node_offset = node; early_init_dt_check_for_initrd(node); early_init_dt_check_for_elfcorehdr(node); + rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l); + if (rng_seed && l > 0) { + add_bootloader_randomness(rng_seed, l); + + /* try to clear seed so it won't be found. */ + fdt_nop_property(initial_boot_params, node, "rng-seed"); + + /* update CRC check value */ + of_fdt_crc32 = crc32_be(~0, initial_boot_params, + fdt_totalsize(initial_boot_params)); + } + /* Retrieve command line */ p = of_get_flat_dt_prop(node, "bootargs", &l); if (p != NULL && l > 0) strscpy(cmdline, p, min(l, COMMAND_LINE_SIZE)); +handle_cmdline: /* * CONFIG_CMDLINE is meant to be a default in case nothing else * managed to set the command line, unless CONFIG_CMDLINE_FORCE @@ -1195,18 +1209,6 @@ int __init early_init_dt_scan_chosen(char *cmdline) pr_debug("Command line is: %s\n", (char *)cmdline); - rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l); - if (rng_seed && l > 0) { - add_bootloader_randomness(rng_seed, l); - - /* try to clear seed so it won't be found. */ - fdt_nop_property(initial_boot_params, node, "rng-seed"); - - /* update CRC check value */ - of_fdt_crc32 = crc32_be(~0, initial_boot_params, - fdt_totalsize(initial_boot_params)); - } - return 0; } From 4e18618ced3b3f79432f2428ac88bbedc6e6c955 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 15 Dec 2022 18:02:28 +0100 Subject: [PATCH 383/543] fbdev: omapfb: avoid stack overflow warning [ Upstream commit 634cf6ead93988b0da9ac054521ab63a3ba189db ] The dsi_irq_stats structure is a little too big to fit on the stack of a 32-bit task, depending on the specific gcc options: fbdev/omap2/omapfb/dss/dsi.c: In function 'dsi_dump_dsidev_irqs': fbdev/omap2/omapfb/dss/dsi.c:1621:1: error: the frame size of 1064 bytes is larger than 1024 bytes [-Werror=frame-larger-than=] Since this is only a debugfs file, performance is not critical, so just dynamically allocate it, and print an error message in there in place of a failure code when the allocation fails. Signed-off-by: Arnd Bergmann Signed-off-by: Helge Deller Signed-off-by: Sasha Levin --- drivers/video/fbdev/omap2/omapfb/dss/dsi.c | 28 ++++++++++++++-------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c index 54b0f034c2ed..7cddb7b8ae34 100644 --- a/drivers/video/fbdev/omap2/omapfb/dss/dsi.c +++ b/drivers/video/fbdev/omap2/omapfb/dss/dsi.c @@ -1536,22 +1536,28 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, { struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev); unsigned long flags; - struct dsi_irq_stats stats; + struct dsi_irq_stats *stats; + + stats = kzalloc(sizeof(*stats), GFP_KERNEL); + if (!stats) { + seq_printf(s, "out of memory\n"); + return; + } spin_lock_irqsave(&dsi->irq_stats_lock, flags); - stats = dsi->irq_stats; + *stats = dsi->irq_stats; memset(&dsi->irq_stats, 0, sizeof(dsi->irq_stats)); dsi->irq_stats.last_reset = jiffies; spin_unlock_irqrestore(&dsi->irq_stats_lock, flags); seq_printf(s, "period %u ms\n", - jiffies_to_msecs(jiffies - stats.last_reset)); + jiffies_to_msecs(jiffies - stats->last_reset)); - seq_printf(s, "irqs %d\n", stats.irq_count); + seq_printf(s, "irqs %d\n", stats->irq_count); #define PIS(x) \ - seq_printf(s, "%-20s %10d\n", #x, stats.dsi_irqs[ffs(DSI_IRQ_##x)-1]) + seq_printf(s, "%-20s %10d\n", #x, stats->dsi_irqs[ffs(DSI_IRQ_##x)-1]) seq_printf(s, "-- DSI%d interrupts --\n", dsi->module_id + 1); PIS(VC0); @@ -1575,10 +1581,10 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, #define PIS(x) \ seq_printf(s, "%-20s %10d %10d %10d %10d\n", #x, \ - stats.vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \ - stats.vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \ - stats.vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \ - stats.vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]); + stats->vc_irqs[0][ffs(DSI_VC_IRQ_##x)-1], \ + stats->vc_irqs[1][ffs(DSI_VC_IRQ_##x)-1], \ + stats->vc_irqs[2][ffs(DSI_VC_IRQ_##x)-1], \ + stats->vc_irqs[3][ffs(DSI_VC_IRQ_##x)-1]); seq_printf(s, "-- VC interrupts --\n"); PIS(CS); @@ -1594,7 +1600,7 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, #define PIS(x) \ seq_printf(s, "%-20s %10d\n", #x, \ - stats.cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]); + stats->cio_irqs[ffs(DSI_CIO_IRQ_##x)-1]); seq_printf(s, "-- CIO interrupts --\n"); PIS(ERRSYNCESC1); @@ -1618,6 +1624,8 @@ static void dsi_dump_dsidev_irqs(struct platform_device *dsidev, PIS(ULPSACTIVENOT_ALL0); PIS(ULPSACTIVENOT_ALL1); #undef PIS + + kfree(stats); } static void dsi1_dump_irqs(struct seq_file *s) From 3b062a48b0de25f3233c2189d54aee9ec9bde163 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 19 Dec 2022 13:32:51 -0800 Subject: [PATCH 384/543] Bluetooth: hci_sync: Fix use HCI_OP_LE_READ_BUFFER_SIZE_V2 commit 3a4d29b6d631bb00236a98887e1039bbfc1b6ab5 upstream. Don't try to use HCI_OP_LE_READ_BUFFER_SIZE_V2 if controller don't support ISO channels, but in order to check if ISO channels are supported HCI_OP_LE_READ_LOCAL_FEATURES needs to be done earlier so the features bits can be checked on hci_le_read_buffer_size_sync. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216817 Fixes: c1631dbc00c1 ("Bluetooth: hci_sync: Fix hci_read_buffer_size_sync") Cc: stable@vger.kernel.org # 6.1 Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sync.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 3a68d9bc43b8..8d6c8cbfe1de 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -3554,7 +3554,7 @@ static const struct hci_init_stage hci_init2[] = { static int hci_le_read_buffer_size_sync(struct hci_dev *hdev) { /* Use Read LE Buffer Size V2 if supported */ - if (hdev->commands[41] & 0x20) + if (iso_capable(hdev) && hdev->commands[41] & 0x20) return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_BUFFER_SIZE_V2, 0, NULL, HCI_CMD_TIMEOUT); @@ -3579,10 +3579,10 @@ static int hci_le_read_supported_states_sync(struct hci_dev *hdev) /* LE Controller init stage 2 command sequence */ static const struct hci_init_stage le_init2[] = { - /* HCI_OP_LE_READ_BUFFER_SIZE */ - HCI_INIT(hci_le_read_buffer_size_sync), /* HCI_OP_LE_READ_LOCAL_FEATURES */ HCI_INIT(hci_le_read_local_features_sync), + /* HCI_OP_LE_READ_BUFFER_SIZE */ + HCI_INIT(hci_le_read_buffer_size_sync), /* HCI_OP_LE_READ_SUPPORTED_STATES */ HCI_INIT(hci_le_read_supported_states_sync), {} From ea3ebda47dd56f6e1c62f2e0e1b6e1b0a973e447 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 29 Dec 2022 11:28:29 +0100 Subject: [PATCH 385/543] Bluetooth: hci_qca: Fix driver shutdown on closed serdev commit 272970be3dabd24cbe50e393ffee8f04aec3b9a8 upstream. The driver shutdown callback (which sends EDL_SOC_RESET to the device over serdev) should not be invoked when HCI device is not open (e.g. if hci_dev_open_sync() failed), because the serdev and its TTY are not open either. Also skip this step if device is powered off (qca_power_shutdown()). The shutdown callback causes use-after-free during system reboot with Qualcomm Atheros Bluetooth: Unable to handle kernel paging request at virtual address 0072662f67726fd7 ... CPU: 6 PID: 1 Comm: systemd-shutdow Tainted: G W 6.1.0-rt5-00325-g8a5f56bcfcca #8 Hardware name: Qualcomm Technologies, Inc. Robotics RB5 (DT) Call trace: tty_driver_flush_buffer+0x4/0x30 serdev_device_write_flush+0x24/0x34 qca_serdev_shutdown+0x80/0x130 [hci_uart] device_shutdown+0x15c/0x260 kernel_restart+0x48/0xac KASAN report: BUG: KASAN: use-after-free in tty_driver_flush_buffer+0x1c/0x50 Read of size 8 at addr ffff16270c2e0018 by task systemd-shutdow/1 CPU: 7 PID: 1 Comm: systemd-shutdow Not tainted 6.1.0-next-20221220-00014-gb85aaf97fb01-dirty #28 Hardware name: Qualcomm Technologies, Inc. Robotics RB5 (DT) Call trace: dump_backtrace.part.0+0xdc/0xf0 show_stack+0x18/0x30 dump_stack_lvl+0x68/0x84 print_report+0x188/0x488 kasan_report+0xa4/0xf0 __asan_load8+0x80/0xac tty_driver_flush_buffer+0x1c/0x50 ttyport_write_flush+0x34/0x44 serdev_device_write_flush+0x48/0x60 qca_serdev_shutdown+0x124/0x274 device_shutdown+0x1e8/0x350 kernel_restart+0x48/0xb0 __do_sys_reboot+0x244/0x2d0 __arm64_sys_reboot+0x54/0x70 invoke_syscall+0x60/0x190 el0_svc_common.constprop.0+0x7c/0x160 do_el0_svc+0x44/0xf0 el0_svc+0x2c/0x6c el0t_64_sync_handler+0xbc/0x140 el0t_64_sync+0x190/0x194 Fixes: 7e7bbddd029b ("Bluetooth: hci_qca: Fix qca6390 enable failure after warm reboot") Cc: Signed-off-by: Krzysztof Kozlowski Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/hci_qca.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index bae9b2a408d9..e4398590b0ed 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2157,10 +2157,17 @@ static void qca_serdev_shutdown(struct device *dev) int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); struct serdev_device *serdev = to_serdev_device(dev); struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); + struct hci_uart *hu = &qcadev->serdev_hu; + struct hci_dev *hdev = hu->hdev; + struct qca_data *qca = hu->priv; const u8 ibs_wake_cmd[] = { 0xFD }; const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 }; if (qcadev->btsoc_type == QCA_QCA6390) { + if (test_bit(QCA_BT_OFF, &qca->flags) || + !test_bit(HCI_RUNNING, &hdev->flags)) + return; + serdev_device_write_flush(serdev); ret = serdev_device_write_buf(serdev, ibs_wake_cmd, sizeof(ibs_wake_cmd)); From 1590cee5342ebdb31c48448736081201daad9828 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Wed, 11 Jan 2023 12:24:19 +0100 Subject: [PATCH 386/543] wifi: brcmfmac: fix regression for Broadcom PCIe wifi devices commit ed05cb177ae5cd7f02f1d6e7706ba627d30f1696 upstream. A sanity check was introduced considering maximum flowrings above 256 as insane and effectively aborting the device probe. This resulted in regression for number of users as the value turns out to be sane after all. Fixes: 2aca4f3734bd ("brcmfmac: return error when getting invalid max_flowrings from dongle") Reported-by: chainofflowers Link: https://lore.kernel.org/all/4781984.GXAFRqVoOG@luna/ Reported-by: Christian Marillat Link: https://bugzilla.kernel.org/show_bug.cgi?id=216894 Cc: stable@vger.kernel.org Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230111112419.24185-1-arend.vanspriel@broadcom.com Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c index 5630f6e718e1..067ea019b110 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/pcie.c @@ -1218,7 +1218,7 @@ static int brcmf_pcie_init_ringbuffers(struct brcmf_pciedev_info *devinfo) BRCMF_NROF_H2D_COMMON_MSGRINGS; max_completionrings = BRCMF_NROF_D2H_COMMON_MSGRINGS; } - if (max_flowrings > 256) { + if (max_flowrings > 512) { brcmf_err(bus, "invalid max_flowrings(%d)\n", max_flowrings); return -EIO; } From 10644c9828c8e1024e4a41dd2efd707e748f7fbc Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 14 Dec 2022 14:03:26 +0100 Subject: [PATCH 387/543] wifi: mac80211: fix MLO + AP_VLAN check commit f216033d770f7ca0eda491fe01a9f02e7af59576 upstream. Instead of preventing adding AP_VLAN to MLO enabled APs, this check was preventing adding more than one 4-addr AP_VLAN regardless of the MLO status. Fix this by adding missing extra checks. Fixes: ae960ee90bb1 ("wifi: mac80211: prevent VLANs on MLDs") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20221214130326.37756-1-nbd@nbd.name Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/iface.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 46f08ec5ed76..8375900f210e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -364,7 +364,9 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, /* No support for VLAN with MLO yet */ if (iftype == NL80211_IFTYPE_AP_VLAN && - nsdata->wdev.use_4addr) + sdata->wdev.use_4addr && + nsdata->vif.type == NL80211_IFTYPE_AP && + nsdata->vif.valid_links) return -EOPNOTSUPP; /* From b8d1d53bef5e536fe92369886264ad75d569bdb7 Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Wed, 21 Dec 2022 10:56:16 -0800 Subject: [PATCH 388/543] wifi: mac80211: reset multiple BSSID options in stop_ap() commit 0eb38842ada035d71bb06fb9116f26f24ee0f998 upstream. Reset multiple BSSID options when all AP related configurations are reset in ieee80211_stop_ap(). Stale values result in HWSIM test failures (e.g. p2p_group_cli_invalid), if run after 'he_ap_ema'. Reported-by: Jouni Malinen Signed-off-by: Aloka Dixit Link: https://lore.kernel.org/r/20221221185616.11514-1-quic_alokad@quicinc.com Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/cfg.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8c8ef87997a8..3c66e03774fb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -147,6 +147,7 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata, link_conf->bssid_index = 0; link_conf->nontransmitted = false; link_conf->ema_ap = false; + link_conf->bssid_indicator = 0; if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev) return -EINVAL; @@ -1511,6 +1512,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, kfree(link_conf->ftmr_params); link_conf->ftmr_params = NULL; + sdata->vif.mbssid_tx_vif = NULL; + link_conf->bssid_index = 0; + link_conf->nontransmitted = false; + link_conf->ema_ap = false; + link_conf->bssid_indicator = 0; + __sta_info_flush(sdata, true); ieee80211_free_keys(sdata, true); From c838df8461a601b20dc1b9fb1834d2aad8e2f949 Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Fri, 30 Dec 2022 13:18:50 +0100 Subject: [PATCH 389/543] wifi: mac80211: sdata can be NULL during AMPDU start commit 69403bad97aa0162e3d7911b27e25abe774093df upstream. ieee80211_tx_ba_session_handle_start() may get NULL for sdata when a deauthentication is ongoing. Here a trace triggering the race with the hostapd test multi_ap_fronthaul_on_ap: (gdb) list *drv_ampdu_action+0x46 0x8b16 is in drv_ampdu_action (net/mac80211/driver-ops.c:396). 391 int ret = -EOPNOTSUPP; 392 393 might_sleep(); 394 395 sdata = get_bss_sdata(sdata); 396 if (!check_sdata_in_driver(sdata)) 397 return -EIO; 398 399 trace_drv_ampdu_action(local, sdata, params); 400 wlan0: moving STA 02:00:00:00:03:00 to state 3 wlan0: associated wlan0: deauthenticating from 02:00:00:00:03:00 by local choice (Reason: 3=DEAUTH_LEAVING) wlan3.sta1: Open BA session requested for 02:00:00:00:00:00 tid 0 wlan3.sta1: dropped frame to 02:00:00:00:00:00 (unauthorized port) wlan0: moving STA 02:00:00:00:03:00 to state 2 wlan0: moving STA 02:00:00:00:03:00 to state 1 wlan0: Removed STA 02:00:00:00:03:00 wlan0: Destroyed STA 02:00:00:00:03:00 BUG: unable to handle page fault for address: fffffffffffffb48 PGD 11814067 P4D 11814067 PUD 11816067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 2 PID: 133397 Comm: kworker/u16:1 Tainted: G W 6.1.0-rc8-wt+ #59 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.0-20220807_005459-localhost 04/01/2014 Workqueue: phy3 ieee80211_ba_session_work [mac80211] RIP: 0010:drv_ampdu_action+0x46/0x280 [mac80211] Code: 53 48 89 f3 be 89 01 00 00 e8 d6 43 bf ef e8 21 46 81 f0 83 bb a0 1b 00 00 04 75 0e 48 8b 9b 28 0d 00 00 48 81 eb 10 0e 00 00 <8b> 93 58 09 00 00 f6 c2 20 0f 84 3b 01 00 00 8b 05 dd 1c 0f 00 85 RSP: 0018:ffffc900025ebd20 EFLAGS: 00010287 RAX: 0000000000000000 RBX: fffffffffffff1f0 RCX: ffff888102228240 RDX: 0000000080000000 RSI: ffffffff918c5de0 RDI: ffff888102228b40 RBP: ffffc900025ebd40 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000000 R12: ffff888118c18ec0 R13: 0000000000000000 R14: ffffc900025ebd60 R15: ffff888018b7efb8 FS: 0000000000000000(0000) GS:ffff88817a600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffffffffffb48 CR3: 0000000105228006 CR4: 0000000000170ee0 Call Trace: ieee80211_tx_ba_session_handle_start+0xd0/0x190 [mac80211] ieee80211_ba_session_work+0xff/0x2e0 [mac80211] process_one_work+0x29f/0x620 worker_thread+0x4d/0x3d0 ? process_one_work+0x620/0x620 kthread+0xfb/0x120 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20221230121850.218810-2-alexander@wetzel-home.de Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/agg-tx.c | 6 +++++- net/mac80211/driver-ops.c | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 07c892aa8c73..b2e40465289d 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -491,7 +491,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) { struct tid_ampdu_tx *tid_tx; struct ieee80211_local *local = sta->local; - struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_sub_if_data *sdata; struct ieee80211_ampdu_params params = { .sta = &sta->sta, .action = IEEE80211_AMPDU_TX_START, @@ -521,6 +521,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ synchronize_net(); + sdata = sta->sdata; params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); tid_tx->ssn = params.ssn; @@ -534,6 +535,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state); } else if (ret) { + if (!sdata) + return; + ht_dbg(sdata, "BA request denied - HW unavailable for %pM tid %d\n", sta->sta.addr, tid); diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 5392ffa18270..c08d3c9a4a17 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -391,6 +391,9 @@ int drv_ampdu_action(struct ieee80211_local *local, might_sleep(); + if (!sdata) + return -EIO; + sdata = get_bss_sdata(sdata); if (!check_sdata_in_driver(sdata)) return -EIO; From a57c981d9f24d2bd89eaa76dc477e8ca252e22e8 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 30 Dec 2022 21:07:47 +0100 Subject: [PATCH 390/543] wifi: mac80211: fix initialization of rx->link and rx->link_sta commit e66b7920aa5ac5b1a1997a454004ba9246a3c005 upstream. There are some codepaths that do not initialize rx->link_sta properly. This causes a crash in places which assume that rx->link_sta is valid if rx->sta is valid. One known instance is triggered by __ieee80211_rx_h_amsdu being called from fast-rx. It results in a crash like this one: BUG: kernel NULL pointer dereference, address: 00000000000000a8 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP PTI CPU: 1 PID: 506 Comm: mt76-usb-rx phy Tainted: G E 6.1.0-debian64x+1.7 #3 Hardware name: ZOTAC ZBOX-ID92/ZBOX-IQ01/ZBOX-ID92/ZBOX-IQ01, BIOS B220P007 05/21/2014 RIP: 0010:ieee80211_deliver_skb+0x62/0x1f0 [mac80211] Code: 00 48 89 04 24 e8 9e a7 c3 df 89 c0 48 03 1c c5 a0 ea 39 a1 4c 01 6b 08 48 ff 03 48 83 7d 28 00 74 11 48 8b 45 30 48 63 55 44 <48> 83 84 d0 a8 00 00 00 01 41 8b 86 c0 11 00 00 8d 50 fd 83 fa 01 RSP: 0018:ffff999040803b10 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffffb9903f496480 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffff999040803ce0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8d21828ac900 R13: 000000000000004a R14: ffff8d2198ed89c0 R15: ffff8d2198ed8000 FS: 0000000000000000(0000) GS:ffff8d24afe80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000a8 CR3: 0000000429810002 CR4: 00000000001706e0 Call Trace: __ieee80211_rx_h_amsdu+0x1b5/0x240 [mac80211] ? ieee80211_prepare_and_rx_handle+0xcdd/0x1320 [mac80211] ? __local_bh_enable_ip+0x3b/0xa0 ieee80211_prepare_and_rx_handle+0xcdd/0x1320 [mac80211] ? prepare_transfer+0x109/0x1a0 [xhci_hcd] ieee80211_rx_list+0xa80/0xda0 [mac80211] mt76_rx_complete+0x207/0x2e0 [mt76] mt76_rx_poll_complete+0x357/0x5a0 [mt76] mt76u_rx_worker+0x4f5/0x600 [mt76_usb] ? mt76_get_min_avg_rssi+0x140/0x140 [mt76] __mt76_worker_fn+0x50/0x80 [mt76] kthread+0xed/0x120 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x22/0x30 Since the initialization of rx->link and rx->link_sta is rather convoluted and duplicated in many places, clean it up by using a helper function to set it. Fixes: ccdde7c74ffd ("wifi: mac80211: properly implement MLO key handling") Fixes: b320d6c456ff ("wifi: mac80211: use correct rx link_sta instead of default") Signed-off-by: Felix Fietkau Link: https://lore.kernel.org/r/20221230200747.19040-1-nbd@nbd.name [remove unnecessary rx->sta->sta.mlo check] Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/rx.c | 222 +++++++++++++++++++++------------------------- 1 file changed, 99 insertions(+), 123 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f99416d2e144..3262ebb24092 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4070,6 +4070,58 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) #undef CALL_RXH } +static bool +ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id) +{ + if (!sta->mlo) + return false; + + return !!(sta->valid_links & BIT(link_id)); +} + +static bool ieee80211_rx_data_set_link(struct ieee80211_rx_data *rx, + u8 link_id) +{ + rx->link_id = link_id; + rx->link = rcu_dereference(rx->sdata->link[link_id]); + + if (!rx->sta) + return rx->link; + + if (!ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta, link_id)) + return false; + + rx->link_sta = rcu_dereference(rx->sta->link[link_id]); + + return rx->link && rx->link_sta; +} + +static bool ieee80211_rx_data_set_sta(struct ieee80211_rx_data *rx, + struct ieee80211_sta *pubsta, + int link_id) +{ + struct sta_info *sta; + + sta = container_of(pubsta, struct sta_info, sta); + + rx->link_id = link_id; + rx->sta = sta; + + if (sta) { + rx->local = sta->sdata->local; + if (!rx->sdata) + rx->sdata = sta->sdata; + rx->link_sta = &sta->deflink; + } + + if (link_id < 0) + rx->link = &rx->sdata->deflink; + else if (!ieee80211_rx_data_set_link(rx, link_id)) + return false; + + return true; +} + /* * This function makes calls into the RX path, therefore * it has to be invoked under RCU read lock. @@ -4078,16 +4130,19 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) { struct sk_buff_head frames; struct ieee80211_rx_data rx = { - .sta = sta, - .sdata = sta->sdata, - .local = sta->local, /* This is OK -- must be QoS data frame */ .security_idx = tid, .seqno_idx = tid, - .link_id = -1, }; struct tid_ampdu_rx *tid_agg_rx; - u8 link_id; + int link_id = -1; + + /* FIXME: statistics won't be right with this */ + if (sta->sta.valid_links) + link_id = ffs(sta->sta.valid_links) - 1; + + if (!ieee80211_rx_data_set_sta(&rx, &sta->sta, link_id)) + return; tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) @@ -4107,10 +4162,6 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) }; drv_event_callback(rx.local, rx.sdata, &event); } - /* FIXME: statistics won't be right with this */ - link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0; - rx.link = rcu_dereference(sta->sdata->link[link_id]); - rx.link_sta = rcu_dereference(sta->link[link_id]); ieee80211_rx_handlers(&rx, &frames); } @@ -4126,7 +4177,6 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, /* This is OK -- must be QoS data frame */ .security_idx = tid, .seqno_idx = tid, - .link_id = -1, }; int i, diff; @@ -4137,10 +4187,8 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid, sta = container_of(pubsta, struct sta_info, sta); - rx.sta = sta; - rx.sdata = sta->sdata; - rx.link = &rx.sdata->deflink; - rx.local = sta->local; + if (!ieee80211_rx_data_set_sta(&rx, pubsta, -1)) + return; rcu_read_lock(); tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); @@ -4527,15 +4575,6 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->sta_mtx); } -static bool -ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id) -{ - if (!sta->mlo) - return false; - - return !!(sta->valid_links & BIT(link_id)); -} - static void ieee80211_rx_8023(struct ieee80211_rx_data *rx, struct ieee80211_fast_rx *fast_rx, int orig_len) @@ -4646,7 +4685,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, struct sk_buff *skb = rx->skb; struct ieee80211_hdr *hdr = (void *)skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - struct sta_info *sta = rx->sta; int orig_len = skb->len; int hdrlen = ieee80211_hdrlen(hdr->frame_control); int snap_offs = hdrlen; @@ -4658,7 +4696,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; } addrs __aligned(2); - struct link_sta_info *link_sta; struct ieee80211_sta_rx_stats *stats; /* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write @@ -4761,18 +4798,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx, drop: dev_kfree_skb(skb); - if (rx->link_id >= 0) { - link_sta = rcu_dereference(sta->link[rx->link_id]); - if (!link_sta) - return true; - } else { - link_sta = &sta->deflink; - } - if (fast_rx->uses_rss) - stats = this_cpu_ptr(link_sta->pcpu_rx_stats); + stats = this_cpu_ptr(rx->link_sta->pcpu_rx_stats); else - stats = &link_sta->rx_stats; + stats = &rx->link_sta->rx_stats; stats->dropped++; return true; @@ -4790,8 +4819,8 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_hdr *hdr = (void *)skb->data; - struct link_sta_info *link_sta = NULL; - struct ieee80211_link_data *link; + struct link_sta_info *link_sta = rx->link_sta; + struct ieee80211_link_data *link = rx->link; rx->skb = skb; @@ -4813,35 +4842,6 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, if (!ieee80211_accept_frame(rx)) return false; - if (rx->link_id >= 0) { - link = rcu_dereference(rx->sdata->link[rx->link_id]); - - /* we might race link removal */ - if (!link) - return true; - rx->link = link; - - if (rx->sta) { - rx->link_sta = - rcu_dereference(rx->sta->link[rx->link_id]); - if (!rx->link_sta) - return true; - } - } else { - if (rx->sta) - rx->link_sta = &rx->sta->deflink; - - rx->link = &sdata->deflink; - } - - if (unlikely(!is_multicast_ether_addr(hdr->addr1) && - rx->link_id >= 0 && rx->sta && rx->sta->sta.mlo)) { - link_sta = rcu_dereference(rx->sta->link[rx->link_id]); - - if (WARN_ON_ONCE(!link_sta)) - return true; - } - if (!consume) { struct skb_shared_hwtstamps *shwt; @@ -4861,7 +4861,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, shwt->hwtstamp = skb_hwtstamps(skb)->hwtstamp; } - if (unlikely(link_sta)) { + if (unlikely(rx->sta && rx->sta->sta.mlo)) { /* translate to MLD addresses */ if (ether_addr_equal(link->conf->addr, hdr->addr1)) ether_addr_copy(hdr->addr1, rx->sdata->vif.addr); @@ -4891,6 +4891,7 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw, struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_fast_rx *fast_rx; struct ieee80211_rx_data rx; + int link_id = -1; memset(&rx, 0, sizeof(rx)); rx.skb = skb; @@ -4907,12 +4908,8 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw, if (!pubsta) goto drop; - rx.sta = container_of(pubsta, struct sta_info, sta); - rx.sdata = rx.sta->sdata; - - if (status->link_valid && - !ieee80211_rx_is_valid_sta_link_id(pubsta, status->link_id)) - goto drop; + if (status->link_valid) + link_id = status->link_id; /* * TODO: Should the frame be dropped if the right link_id is not @@ -4921,19 +4918,8 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw, * link_id is used only for stats purpose and updating the stats on * the deflink is fine? */ - if (status->link_valid) - rx.link_id = status->link_id; - - if (rx.link_id >= 0) { - struct ieee80211_link_data *link; - - link = rcu_dereference(rx.sdata->link[rx.link_id]); - if (!link) - goto drop; - rx.link = link; - } else { - rx.link = &rx.sdata->deflink; - } + if (!ieee80211_rx_data_set_sta(&rx, pubsta, link_id)) + goto drop; fast_rx = rcu_dereference(rx.sta->fast_rx); if (!fast_rx) @@ -4951,6 +4937,8 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx, { struct link_sta_info *link_sta; struct ieee80211_hdr *hdr = (void *)skb->data; + struct sta_info *sta; + int link_id = -1; /* * Look up link station first, in case there's a @@ -4960,24 +4948,19 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx, */ link_sta = link_sta_info_get_bss(rx->sdata, hdr->addr2); if (link_sta) { - rx->sta = link_sta->sta; - rx->link_id = link_sta->link_id; + sta = link_sta->sta; + link_id = link_sta->link_id; } else { struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - rx->sta = sta_info_get_bss(rx->sdata, hdr->addr2); - if (rx->sta) { - if (status->link_valid && - !ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta, - status->link_id)) - return false; - - rx->link_id = status->link_valid ? status->link_id : -1; - } else { - rx->link_id = -1; - } + sta = sta_info_get_bss(rx->sdata, hdr->addr2); + if (status->link_valid) + link_id = status->link_id; } + if (!ieee80211_rx_data_set_sta(rx, &sta->sta, link_id)) + return false; + return ieee80211_prepare_and_rx_handle(rx, skb, consume); } @@ -5036,19 +5019,15 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (ieee80211_is_data(fc)) { struct sta_info *sta, *prev_sta; - u8 link_id = status->link_id; + int link_id = -1; + + if (status->link_valid) + link_id = status->link_id; if (pubsta) { - rx.sta = container_of(pubsta, struct sta_info, sta); - rx.sdata = rx.sta->sdata; - - if (status->link_valid && - !ieee80211_rx_is_valid_sta_link_id(pubsta, link_id)) + if (!ieee80211_rx_data_set_sta(&rx, pubsta, link_id)) goto out; - if (status->link_valid) - rx.link_id = status->link_id; - /* * In MLO connection, fetch the link_id using addr2 * when the driver does not pass link_id in status. @@ -5066,7 +5045,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!link_sta) goto out; - rx.link_id = link_sta->link_id; + ieee80211_rx_data_set_link(&rx, link_sta->link_id); } if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) @@ -5082,30 +5061,27 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, continue; } - if ((status->link_valid && - !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta, - link_id)) || - (!status->link_valid && prev_sta->sta.mlo)) + rx.sdata = prev_sta->sdata; + if (!ieee80211_rx_data_set_sta(&rx, &prev_sta->sta, + link_id)) + goto out; + + if (!status->link_valid && prev_sta->sta.mlo) continue; - rx.link_id = status->link_valid ? link_id : -1; - rx.sta = prev_sta; - rx.sdata = prev_sta->sdata; ieee80211_prepare_and_rx_handle(&rx, skb, false); prev_sta = sta; } if (prev_sta) { - if ((status->link_valid && - !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta, - link_id)) || - (!status->link_valid && prev_sta->sta.mlo)) + rx.sdata = prev_sta->sdata; + if (!ieee80211_rx_data_set_sta(&rx, &prev_sta->sta, + link_id)) goto out; - rx.link_id = status->link_valid ? link_id : -1; - rx.sta = prev_sta; - rx.sdata = prev_sta->sdata; + if (!status->link_valid && prev_sta->sta.mlo) + goto out; if (ieee80211_prepare_and_rx_handle(&rx, skb, true)) return; From 1442d51026c58e7c11dd5f9b19650632a48676d4 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 9 Jan 2023 20:55:21 +0000 Subject: [PATCH 391/543] nommu: fix memory leak in do_mmap() error path commit 7f31cced5724e6d414fe750aa1cd7e7b578ec22f upstream. The preallocation of the maple tree nodes may leak if the error path to "error_just_free" is taken. Fix this by moving the freeing of the maple tree nodes to a shared location for all error paths. Link: https://lkml.kernel.org/r/20230109205507.955577-1-Liam.Howlett@oracle.com Fixes: 8220543df148 ("nommu: remove uses of VMA linked list") Signed-off-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yu Zhao Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/nommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/nommu.c b/mm/nommu.c index 214c70e1d059..c8252f01d5db 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1240,6 +1240,7 @@ share: error_just_free: up_write(&nommu_region_sem); error: + mas_destroy(&mas); if (region->vm_file) fput(region->vm_file); kmem_cache_free(vm_region_jar, region); @@ -1250,7 +1251,6 @@ error: sharing_violation: up_write(&nommu_region_sem); - mas_destroy(&mas); pr_warn("Attempt to share mismatched mappings\n"); ret = -EINVAL; goto error; From 6447569f4fce2c759e701bd45ea72ff802e817a0 Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 9 Jan 2023 20:57:21 +0000 Subject: [PATCH 392/543] nommu: fix do_munmap() error path commit 80be727ec87225797771a39f3e6801baf291faaf upstream. When removing a VMA from the tree fails due to no memory, do not free the VMA since a reference still exists. Link: https://lkml.kernel.org/r/20230109205708.956103-1-Liam.Howlett@oracle.com Fixes: 8220543df148 ("nommu: remove uses of VMA linked list") Signed-off-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yu Zhao Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/nommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/nommu.c b/mm/nommu.c index c8252f01d5db..844af5be7640 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1509,7 +1509,8 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list erase_whole_vma: if (delete_vma_from_mm(vma)) ret = -ENOMEM; - delete_vma(mm, vma); + else + delete_vma(mm, vma); return ret; } From e733121383c059cfa0db8f6bb7f7bc80f56ce7ef Mon Sep 17 00:00:00 2001 From: Liam Howlett Date: Mon, 9 Jan 2023 20:58:20 +0000 Subject: [PATCH 393/543] nommu: fix split_vma() map_count error commit fd9edbdbdcde6b489ce59f326755ef16a2ffadd7 upstream. During the maple tree conversion of nommu, an error in counting the VMAs was introduced by counting the existing VMA again. The counting used to be decremented by one and incremented by two, but now it only increments by two. Fix the counting error by moving the increment outside the setup_vma_to_mm() function to the callers. Link: https://lkml.kernel.org/r/20230109205809.956325-1-Liam.Howlett@oracle.com Fixes: 8220543df148 ("nommu: remove uses of VMA linked list") Signed-off-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Cc: Vlastimil Babka Cc: Yu Zhao Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/nommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mm/nommu.c b/mm/nommu.c index 844af5be7640..5b83938ecb67 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -559,7 +559,6 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas) static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm) { - mm->map_count++; vma->vm_mm = mm; /* add the VMA to the mapping */ @@ -587,6 +586,7 @@ static void mas_add_vma_to_mm(struct ma_state *mas, struct mm_struct *mm, BUG_ON(!vma->vm_region); setup_vma_to_mm(vma, mm); + mm->map_count++; /* add the VMA to the tree */ vma_mas_store(vma, mas); @@ -1347,6 +1347,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_file) return -ENOMEM; + mm = vma->vm_mm; if (mm->map_count >= sysctl_max_map_count) return -ENOMEM; @@ -1398,6 +1399,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, mas_set_range(&mas, vma->vm_start, vma->vm_end - 1); mas_store(&mas, vma); vma_mas_store(new, &mas); + mm->map_count++; return 0; err_mas_preallocate: From 6d114116da1be5995207becafa4be4d64f20591d Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 6 Jan 2023 22:30:14 +0300 Subject: [PATCH 394/543] proc: fix PIE proc-empty-vm, proc-pid-vm tests commit 5316a017d093f644675a56523bcf5787ba8f4fef upstream. vsyscall detection code uses direct call to the beginning of the vsyscall page: asm ("call %P0" :: "i" (0xffffffffff600000)) It generates "call rel32" instruction but it is not relocated if binary is PIE, so binary segfaults into random userspace address and vsyscall page status is detected incorrectly. Do more direct: asm ("call *%rax") which doesn't do need any relocaltions. Mark g_vsyscall as volatile for a good measure, I didn't find instruction setting it to 0. Now the code is obviously correct: xor eax, eax mov rdi, rbp mov rsi, rbp mov DWORD PTR [rip+0x2d15], eax # g_vsyscall = 0 mov rax, 0xffffffffff600000 call rax mov DWORD PTR [rip+0x2d02], 1 # g_vsyscall = 1 mov eax, DWORD PTR ds:0xffffffffff600000 mov DWORD PTR [rip+0x2cf1], 2 # g_vsyscall = 2 mov edi, [rip+0x2ceb] # exit(g_vsyscall) call exit Note: fixed proc-empty-vm test oopses 5.19.0-28-generic kernel but this is separate story. Link: https://lkml.kernel.org/r/Y7h2xvzKLg36DSq8@p183 Fixes: 5bc73bb3451b9 ("proc: test how it holds up with mapping'less process") Signed-off-by: Alexey Dobriyan Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/proc/proc-empty-vm.c | 12 +++++++----- tools/testing/selftests/proc/proc-pid-vm.c | 9 +++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index d95b1cb43d9d..7588428b8fcd 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -25,6 +25,7 @@ #undef NDEBUG #include #include +#include #include #include #include @@ -41,7 +42,7 @@ * 1: vsyscall VMA is --xp vsyscall=xonly * 2: vsyscall VMA is r-xp vsyscall=emulate */ -static int g_vsyscall; +static volatile int g_vsyscall; static const char *g_proc_pid_maps_vsyscall; static const char *g_proc_pid_smaps_vsyscall; @@ -147,11 +148,12 @@ static void vsyscall(void) g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; asm volatile ( - "call %P0" - : - : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) - : "rax", "rcx", "r11" + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" ); g_vsyscall = 1; diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 69551bfa215c..cacbd2a4aec9 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -257,11 +257,12 @@ static void vsyscall(void) g_vsyscall = 0; /* gettimeofday(NULL, NULL); */ + uint64_t rax = 0xffffffffff600000; asm volatile ( - "call %P0" - : - : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) - : "rax", "rcx", "r11" + "call *%[rax]" + : [rax] "+a" (rax) + : "D" (NULL), "S" (NULL) + : "rcx", "r11" ); g_vsyscall = 1; From 51a7ad5b60efac65691729d10745c28fa1016b96 Mon Sep 17 00:00:00 2001 From: "Shawn.Shao" Date: Thu, 12 Jan 2023 13:58:02 +0800 Subject: [PATCH 395/543] Add exception protection processing for vd in axi_chan_handle_err function commit 57054fe516d59d03a7bcf1888e82479ccc244f87 upstream. Since there is no protection for vd, a kernel panic will be triggered here in exceptional cases. You can refer to the processing of axi_chan_block_xfer_complete function The triggered kernel panic is as follows: [ 67.848444] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000060 [ 67.848447] Mem abort info: [ 67.848449] ESR = 0x96000004 [ 67.848451] EC = 0x25: DABT (current EL), IL = 32 bits [ 67.848454] SET = 0, FnV = 0 [ 67.848456] EA = 0, S1PTW = 0 [ 67.848458] Data abort info: [ 67.848460] ISV = 0, ISS = 0x00000004 [ 67.848462] CM = 0, WnR = 0 [ 67.848465] user pgtable: 4k pages, 48-bit VAs, pgdp=00000800c4c0b000 [ 67.848468] [0000000000000060] pgd=0000000000000000, p4d=0000000000000000 [ 67.848472] Internal error: Oops: 96000004 [#1] SMP [ 67.848475] Modules linked in: dmatest [ 67.848479] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.10.100-emu_x2rc+ #11 [ 67.848483] pstate: 62000085 (nZCv daIf -PAN -UAO +TCO BTYPE=--) [ 67.848487] pc : axi_chan_handle_err+0xc4/0x230 [ 67.848491] lr : axi_chan_handle_err+0x30/0x230 [ 67.848493] sp : ffff0803fe55ae50 [ 67.848495] x29: ffff0803fe55ae50 x28: ffff800011212200 [ 67.848500] x27: ffff0800c42c0080 x26: ffff0800c097c080 [ 67.848504] x25: ffff800010d33880 x24: ffff80001139d850 [ 67.848508] x23: ffff0800c097c168 x22: 0000000000000000 [ 67.848512] x21: 0000000000000080 x20: 0000000000002000 [ 67.848517] x19: ffff0800c097c080 x18: 0000000000000000 [ 67.848521] x17: 0000000000000000 x16: 0000000000000000 [ 67.848525] x15: 0000000000000000 x14: 0000000000000000 [ 67.848529] x13: 0000000000000000 x12: 0000000000000040 [ 67.848533] x11: ffff0800c0400248 x10: ffff0800c040024a [ 67.848538] x9 : ffff800010576cd4 x8 : ffff0800c0400270 [ 67.848542] x7 : 0000000000000000 x6 : ffff0800c04003e0 [ 67.848546] x5 : ffff0800c0400248 x4 : ffff0800c4294480 [ 67.848550] x3 : dead000000000100 x2 : dead000000000122 [ 67.848555] x1 : 0000000000000100 x0 : ffff0800c097c168 [ 67.848559] Call trace: [ 67.848562] axi_chan_handle_err+0xc4/0x230 [ 67.848566] dw_axi_dma_interrupt+0xf4/0x590 [ 67.848569] __handle_irq_event_percpu+0x60/0x220 [ 67.848573] handle_irq_event+0x64/0x120 [ 67.848576] handle_fasteoi_irq+0xc4/0x220 [ 67.848580] __handle_domain_irq+0x80/0xe0 [ 67.848583] gic_handle_irq+0xc0/0x138 [ 67.848585] el1_irq+0xc8/0x180 [ 67.848588] arch_cpu_idle+0x14/0x2c [ 67.848591] default_idle_call+0x40/0x16c [ 67.848594] do_idle+0x1f0/0x250 [ 67.848597] cpu_startup_entry+0x2c/0x60 [ 67.848600] rest_init+0xc0/0xcc [ 67.848603] arch_call_rest_init+0x14/0x1c [ 67.848606] start_kernel+0x4cc/0x500 [ 67.848610] Code: eb0002ff 9a9f12d6 f2fbd5a2 f2fbd5a3 (a94602c1) [ 67.848613] ---[ end trace 585a97036f88203a ]--- Signed-off-by: Shawn.Shao Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230112055802.1764-1-shawn.shao@jaguarmicro.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c index a183d93bd7e2..bf85aa0979ec 100644 --- a/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c +++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c @@ -1018,6 +1018,11 @@ static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status) /* The bad descriptor currently is in the head of vc list */ vd = vchan_next_desc(&chan->vc); + if (!vd) { + dev_err(chan2dev(chan), "BUG: %s, IRQ with no descriptors\n", + axi_chan_name(chan)); + goto out; + } /* Remove the completed descriptor from issued list */ list_del(&vd->node); @@ -1032,6 +1037,7 @@ static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status) /* Try to restart the controller */ axi_chan_start_first_queued(chan); +out: spin_unlock_irqrestore(&chan->vc.lock, flags); } From 87615c0e5351c828888f96e8eb0dd96fff947dad Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 17 Jan 2023 11:42:16 +0800 Subject: [PATCH 396/543] LoongArch: Add HWCAP_LOONGARCH_CPUCFG to elf_hwcap commit d52fec86a465355b379e839fa372ead0334d62e6 upstream. HWCAP_LOONGARCH_CPUCFG is missing in elf_hwcap, so add it for glibc's later use. Cc: stable@vger.kernel.org Reported-by: Yinyu Cai Signed-off-by: Huacai Chen Signed-off-by: Greg Kroah-Hartman --- arch/loongarch/kernel/cpu-probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/cpu-probe.c b/arch/loongarch/kernel/cpu-probe.c index 255a09876ef2..3a3fce2d7846 100644 --- a/arch/loongarch/kernel/cpu-probe.c +++ b/arch/loongarch/kernel/cpu-probe.c @@ -94,7 +94,7 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c) c->options = LOONGARCH_CPU_CPUCFG | LOONGARCH_CPU_CSR | LOONGARCH_CPU_TLB | LOONGARCH_CPU_VINT | LOONGARCH_CPU_WATCH; - elf_hwcap |= HWCAP_LOONGARCH_CRC32; + elf_hwcap = HWCAP_LOONGARCH_CPUCFG | HWCAP_LOONGARCH_CRC32; config = read_cpucfg(LOONGARCH_CPUCFG1); if (config & CPUCFG1_UAL) { From d3317baa34e99c10c08a8f3280cda062d7112c3a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 6 Jan 2023 17:43:06 +0900 Subject: [PATCH 397/543] zonefs: Detect append writes at invalid locations commit a608da3bd730d718f2d3ebec1c26f9865f8f17ce upstream. Using REQ_OP_ZONE_APPEND operations for synchronous writes to sequential files succeeds regardless of the zone write pointer position, as long as the target zone is not full. This means that if an external (buggy) application writes to the zone of a sequential file underneath the file system, subsequent file write() operation will succeed but the file size will not be correct and the file will contain invalid data written by another application. Modify zonefs_file_dio_append() to check the written sector of an append write (returned in bio->bi_iter.bi_sector) and return -EIO if there is a mismatch with the file zone wp offset field. This change triggers a call to zonefs_io_error() and a zone check. Modify zonefs_io_error_cb() to not expose the unexpected data after the current inode size when the errors=remount-ro mode is used. Other error modes are correctly handled already. Fixes: 02ef12a663c7 ("zonefs: use REQ_OP_ZONE_APPEND for sync DIO") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Signed-off-by: Greg Kroah-Hartman --- fs/zonefs/super.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 2c53fbb8d918..a9c5c3f720ad 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -442,6 +442,10 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, data_size = zonefs_check_zone_condition(inode, zone, false, false); } + } else if (sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_RO && + data_size > isize) { + /* Do not expose garbage data */ + data_size = isize; } /* @@ -805,6 +809,24 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) ret = submit_bio_wait(bio); + /* + * If the file zone was written underneath the file system, the zone + * write pointer may not be where we expect it to be, but the zone + * append write can still succeed. So check manually that we wrote where + * we intended to, that is, at zi->i_wpoffset. + */ + if (!ret) { + sector_t wpsector = + zi->i_zsector + (zi->i_wpoffset >> SECTOR_SHIFT); + + if (bio->bi_iter.bi_sector != wpsector) { + zonefs_warn(inode->i_sb, + "Corrupted write pointer %llu for zone at %llu\n", + wpsector, zi->i_zsector); + ret = -EIO; + } + } + zonefs_file_write_dio_end_io(iocb, size, ret, 0); trace_zonefs_file_dio_append(inode, size, ret); From 0bf463939c09e5b2c35c71ed74a5fd60a74d6a04 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 5 Jan 2023 14:53:56 +0900 Subject: [PATCH 398/543] nilfs2: fix general protection fault in nilfs_btree_insert() commit 7633355e5c7f29c049a9048e461427d1d8ed3051 upstream. If nilfs2 reads a corrupted disk image and tries to reads a b-tree node block by calling __nilfs_btree_get_block() against an invalid virtual block address, it returns -ENOENT because conversion of the virtual block address to a disk block address fails. However, this return value is the same as the internal code that b-tree lookup routines return to indicate that the block being searched does not exist, so functions that operate on that b-tree may misbehave. When nilfs_btree_insert() receives this spurious 'not found' code from nilfs_btree_do_lookup(), it misunderstands that the 'not found' check was successful and continues the insert operation using incomplete lookup path data, causing the following crash: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] ... RIP: 0010:nilfs_btree_get_nonroot_node fs/nilfs2/btree.c:418 [inline] RIP: 0010:nilfs_btree_prepare_insert fs/nilfs2/btree.c:1077 [inline] RIP: 0010:nilfs_btree_insert+0x6d3/0x1c10 fs/nilfs2/btree.c:1238 Code: bc 24 80 00 00 00 4c 89 f8 48 c1 e8 03 42 80 3c 28 00 74 08 4c 89 ff e8 4b 02 92 fe 4d 8b 3f 49 83 c7 28 4c 89 f8 48 c1 e8 03 <42> 80 3c 28 00 74 08 4c 89 ff e8 2e 02 92 fe 4d 8b 3f 49 83 c7 02 ... Call Trace: nilfs_bmap_do_insert fs/nilfs2/bmap.c:121 [inline] nilfs_bmap_insert+0x20d/0x360 fs/nilfs2/bmap.c:147 nilfs_get_block+0x414/0x8d0 fs/nilfs2/inode.c:101 __block_write_begin_int+0x54c/0x1a80 fs/buffer.c:1991 __block_write_begin fs/buffer.c:2041 [inline] block_write_begin+0x93/0x1e0 fs/buffer.c:2102 nilfs_write_begin+0x9c/0x110 fs/nilfs2/inode.c:261 generic_perform_write+0x2e4/0x5e0 mm/filemap.c:3772 __generic_file_write_iter+0x176/0x400 mm/filemap.c:3900 generic_file_write_iter+0xab/0x310 mm/filemap.c:3932 call_write_iter include/linux/fs.h:2186 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x7dc/0xc50 fs/read_write.c:584 ksys_write+0x177/0x2a0 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd ... This patch fixes the root cause of this problem by replacing the error code that __nilfs_btree_get_block() returns on block address conversion failure from -ENOENT to another internal code -EINVAL which means that the b-tree metadata is corrupted. By returning -EINVAL, it propagates without glitches, and for all relevant b-tree operations, functions in the upper bmap layer output an error message indicating corrupted b-tree metadata via nilfs_bmap_convert_error(), and code -EIO will be eventually returned as it should be. Link: https://lkml.kernel.org/r/000000000000bd89e205f0e38355@google.com Link: https://lkml.kernel.org/r/20230105055356.8811-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+ede796cecd5296353515@syzkaller.appspotmail.com Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/btree.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index b9d15c3df3cc..40ce92a332fe 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -480,9 +480,18 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, &submit_ptr); if (ret) { - if (ret != -EEXIST) - return ret; - goto out_check; + if (likely(ret == -EEXIST)) + goto out_check; + if (ret == -ENOENT) { + /* + * Block address translation failed due to invalid + * value of 'ptr'. In this case, return internal code + * -EINVAL (broken bmap) to notify bmap layer of fatal + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } if (ra) { From 1cb76f56694965541d07c78b988f3edc837bf561 Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Sat, 24 Dec 2022 00:20:35 -0800 Subject: [PATCH 399/543] mm/shmem: restore SHMEM_HUGE_DENY precedence over MADV_COLLAPSE commit 3de0c269adc6c2fac0bb1fb11965f0de699dc32b upstream. SHMEM_HUGE_DENY is for emergency use by the admin, to disable allocation of shmem huge pages if, for example, a dangerous bug is found in their usage: see "deny" in Documentation/mm/transhuge.rst. An app using madvise(,,MADV_COLLAPSE) should not be allowed to override it: restore its precedence over shmem_huge_force. Restore SHMEM_HUGE_DENY precedence over MADV_COLLAPSE. Link: https://lkml.kernel.org/r/20221224082035.3197140-2-zokeefe@google.com Fixes: 7c6c6cc4d3a2 ("mm/shmem: add flag to enforce shmem THP in hugepage_vma_check()") Signed-off-by: Zach O'Keefe Suggested-by: Hugh Dickins Acked-by: David Hildenbrand Cc: Yang Shi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/shmem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/shmem.c b/mm/shmem.c index 82911fefc2d5..a8d9fd039d0a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -472,12 +472,10 @@ bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))) return false; - if (shmem_huge_force) - return true; - if (shmem_huge == SHMEM_HUGE_FORCE) - return true; if (shmem_huge == SHMEM_HUGE_DENY) return false; + if (shmem_huge_force || shmem_huge == SHMEM_HUGE_FORCE) + return true; switch (SHMEM_SB(inode->i_sb)->huge) { case SHMEM_HUGE_ALWAYS: From 63f71b8609013c80936e17aa6d25c215c4218a03 Mon Sep 17 00:00:00 2001 From: James Houghton Date: Wed, 4 Jan 2023 23:19:10 +0000 Subject: [PATCH 400/543] hugetlb: unshare some PMDs when splitting VMAs commit b30c14cd61025eeea2f2e8569606cd167ba9ad2d upstream. PMD sharing can only be done in PUD_SIZE-aligned pieces of VMAs; however, it is possible that HugeTLB VMAs are split without unsharing the PMDs first. Without this fix, it is possible to hit the uffd-wp-related WARN_ON_ONCE in hugetlb_change_protection [1]. The key there is that hugetlb_unshare_all_pmds will not attempt to unshare PMDs in non-PUD_SIZE-aligned sections of the VMA. It might seem ideal to unshare in hugetlb_vm_op_open, but we need to unshare in both the new and old VMAs, so unsharing in hugetlb_vm_op_split seems natural. [1]: https://lore.kernel.org/linux-mm/CADrL8HVeOkj0QH5VZZbRzybNE8CG-tEGFshnA+bG9nMgcWtBSg@mail.gmail.com/ Link: https://lkml.kernel.org/r/20230104231910.1464197-1-jthoughton@google.com Fixes: 6dfeaff93be1 ("hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp") Signed-off-by: James Houghton Reviewed-by: Mike Kravetz Acked-by: Peter Xu Cc: Axel Rasmussen Cc: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 9c251faeb6f5..e706086023ee 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -94,6 +94,8 @@ static int hugetlb_acct_memory(struct hstate *h, long delta); static void hugetlb_vma_lock_free(struct vm_area_struct *vma); static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); +static void hugetlb_unshare_pmds(struct vm_area_struct *vma, + unsigned long start, unsigned long end); static inline bool subpool_is_free(struct hugepage_subpool *spool) { @@ -4825,6 +4827,25 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr) { if (addr & ~(huge_page_mask(hstate_vma(vma)))) return -EINVAL; + + /* + * PMD sharing is only possible for PUD_SIZE-aligned address ranges + * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this + * split, unshare PMDs in the PUD_SIZE interval surrounding addr now. + */ + if (addr & ~PUD_MASK) { + /* + * hugetlb_vm_op_split is called right before we attempt to + * split the VMA. We will need to unshare PMDs in the old and + * new VMAs, so let's unshare before we split. + */ + unsigned long floor = addr & PUD_MASK; + unsigned long ceil = floor + PUD_SIZE; + + if (floor >= vma->vm_start && ceil <= vma->vm_end) + hugetlb_unshare_pmds(vma, floor, ceil); + } + return 0; } @@ -7386,26 +7407,21 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason) } } -/* - * This function will unconditionally remove all the shared pmd pgtable entries - * within the specific vma for a hugetlbfs memory range. - */ -void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) +static void hugetlb_unshare_pmds(struct vm_area_struct *vma, + unsigned long start, + unsigned long end) { struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); struct mm_struct *mm = vma->vm_mm; struct mmu_notifier_range range; - unsigned long address, start, end; + unsigned long address; spinlock_t *ptl; pte_t *ptep; if (!(vma->vm_flags & VM_MAYSHARE)) return; - start = ALIGN(vma->vm_start, PUD_SIZE); - end = ALIGN_DOWN(vma->vm_end, PUD_SIZE); - if (start >= end) return; @@ -7437,6 +7453,16 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) mmu_notifier_invalidate_range_end(&range); } +/* + * This function will unconditionally remove all the shared pmd pgtable entries + * within the specific vma for a hugetlbfs memory range. + */ +void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) +{ + hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE), + ALIGN_DOWN(vma->vm_end, PUD_SIZE)); +} + #ifdef CONFIG_CMA static bool cma_reserve_called __initdata; From 48b94e49980b088881029ee1c4bb01851bb7ff0c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 22 Dec 2022 12:41:50 -0800 Subject: [PATCH 401/543] mm/khugepaged: fix collapse_pte_mapped_thp() to allow anon_vma commit ab0c3f1251b4670978fde0bd54161795a139b060 upstream. uprobe_write_opcode() uses collapse_pte_mapped_thp() to restore huge pmd, when removing a breakpoint from hugepage text: vma->anon_vma is always set in that case, so undo the prohibition. And MADV_COLLAPSE ought to be able to collapse some page tables in a vma which happens to have anon_vma set from CoWing elsewhere. Is anon_vma lock required? Almost not: if any page other than expected subpage of the non-anon huge page is found in the page table, collapse is aborted without making any change. However, it is possible that an anon page was CoWed from this extent in another mm or vma, in which case a concurrent lookup might look here: so keep it away while clearing pmd (but perhaps we shall go back to using pmd_lock() there in future). Note that collapse_pte_mapped_thp() is exceptional in freeing a page table without having cleared its ptes: I'm uneasy about that, and had thought pte_clear()ing appropriate; but exclusive i_mmap lock does fix the problem, and we would have to move the mmu_notification if clearing those ptes. What this fixes is not a dangerous instability. But I suggest Cc stable because uprobes "healing" has regressed in that way, so this should follow 8d3c106e19e8 into those stable releases where it was backported (and may want adjustment there - I'll supply backports as needed). Link: https://lkml.kernel.org/r/b740c9fb-edba-92ba-59fb-7a5592e5dfc@google.com Fixes: 8d3c106e19e8 ("mm/khugepaged: take the right locks for page table retraction") Signed-off-by: Hugh Dickins Acked-by: David Hildenbrand Cc: Jann Horn Cc: Yang Shi Cc: Zach O'Keefe Cc: Song Liu Cc: [5.4+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/khugepaged.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 3703a56571c1..bb1d79a14feb 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1467,14 +1467,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false)) return SCAN_VMA_CHECK; - /* - * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings - * that got written to. Without this, we'd have to also lock the - * anon_vma if one exists. - */ - if (vma->anon_vma) - return SCAN_VMA_CHECK; - /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ if (userfaultfd_wp(vma)) return SCAN_PTE_UFFD_WP; @@ -1574,8 +1566,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, } /* step 4: remove pte entries */ + /* we make no change to anon, but protect concurrent anon page lookup */ + if (vma->anon_vma) + anon_vma_lock_write(vma->anon_vma); + collapse_and_free_pmd(mm, vma, haddr, pmd); + if (vma->anon_vma) + anon_vma_unlock_write(vma->anon_vma); i_mmap_unlock_write(vma->vm_file->f_mapping); maybe_install_pmd: From 2b5412ca6fd33523a138131abdd8ddf873af913e Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 12 Jan 2023 19:04:17 +0100 Subject: [PATCH 402/543] serial: stm32: Merge hard IRQ and threaded IRQ handling into single IRQ handler commit f24771b62a83239f0dce816bddf0f6807f436235 upstream. Requesting an interrupt with IRQF_ONESHOT will run the primary handler in the hard-IRQ context even in the force-threaded mode. The force-threaded mode is used by PREEMPT_RT in order to avoid acquiring sleeping locks (spinlock_t) in hard-IRQ context. This combination makes it impossible and leads to "sleeping while atomic" warnings. Use one interrupt handler for both handlers (primary and secondary) and drop the IRQF_ONESHOT flag which is not needed. Fixes: e359b4411c283 ("serial: stm32: fix threaded interrupt handling") Reviewed-by: Sebastian Andrzej Siewior Tested-by: Valentin Caron # V3 Signed-off-by: Marek Vasut Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230112180417.25595-1-marex@denx.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index b8aed28b8f17..c28e2f10fb02 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -798,23 +798,9 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) spin_unlock(&port->lock); } - if (stm32_usart_rx_dma_enabled(port)) - return IRQ_WAKE_THREAD; - else - return IRQ_HANDLED; -} - -static irqreturn_t stm32_usart_threaded_interrupt(int irq, void *ptr) -{ - struct uart_port *port = ptr; - struct tty_port *tport = &port->state->port; - struct stm32_port *stm32_port = to_stm32_port(port); - unsigned int size; - unsigned long flags; - /* Receiver timeout irq for DMA RX */ - if (!stm32_port->throttled) { - spin_lock_irqsave(&port->lock, flags); + if (stm32_usart_rx_dma_enabled(port) && !stm32_port->throttled) { + spin_lock(&port->lock); size = stm32_usart_receive_chars(port, false); uart_unlock_and_check_sysrq_irqrestore(port, flags); if (size) @@ -1016,10 +1002,8 @@ static int stm32_usart_startup(struct uart_port *port) u32 val; int ret; - ret = request_threaded_irq(port->irq, stm32_usart_interrupt, - stm32_usart_threaded_interrupt, - IRQF_ONESHOT | IRQF_NO_SUSPEND, - name, port); + ret = request_irq(port->irq, stm32_usart_interrupt, + IRQF_NO_SUSPEND, name, port); if (ret) return ret; @@ -1602,13 +1586,6 @@ static int stm32_usart_of_dma_rx_probe(struct stm32_port *stm32port, struct dma_slave_config config; int ret; - /* - * Using DMA and threaded handler for the console could lead to - * deadlocks. - */ - if (uart_console(port)) - return -ENODEV; - stm32port->rx_buf = dma_alloc_coherent(dev, RX_BUF_L, &stm32port->rx_dma_buf, GFP_KERNEL); From 806e5ac71d343e6096a90dea0c6a07b0c71ddef0 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Jan 2023 11:16:59 +0100 Subject: [PATCH 403/543] Revert "serial: stm32: Merge hard IRQ and threaded IRQ handling into single IRQ handler" commit 2cbafffbf69addd7509072f4be5917f81d238cf6 upstream. This reverts commit f24771b62a83239f0dce816bddf0f6807f436235 as it is reported to break the build. Reported-by: kernel test robot Link: https://lore.kernel.org/r/202301200130.ttBiTzfO-lkp@intel.com Fixes: f24771b62a83 ("serial: stm32: Merge hard IRQ and threaded IRQ handling into single IRQ handler") Cc: Sebastian Andrzej Siewior Cc: Valentin Caron # V3 Cc: Marek Vasut Cc: Johan Hovold Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index c28e2f10fb02..b8aed28b8f17 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -798,9 +798,23 @@ static irqreturn_t stm32_usart_interrupt(int irq, void *ptr) spin_unlock(&port->lock); } + if (stm32_usart_rx_dma_enabled(port)) + return IRQ_WAKE_THREAD; + else + return IRQ_HANDLED; +} + +static irqreturn_t stm32_usart_threaded_interrupt(int irq, void *ptr) +{ + struct uart_port *port = ptr; + struct tty_port *tport = &port->state->port; + struct stm32_port *stm32_port = to_stm32_port(port); + unsigned int size; + unsigned long flags; + /* Receiver timeout irq for DMA RX */ - if (stm32_usart_rx_dma_enabled(port) && !stm32_port->throttled) { - spin_lock(&port->lock); + if (!stm32_port->throttled) { + spin_lock_irqsave(&port->lock, flags); size = stm32_usart_receive_chars(port, false); uart_unlock_and_check_sysrq_irqrestore(port, flags); if (size) @@ -1002,8 +1016,10 @@ static int stm32_usart_startup(struct uart_port *port) u32 val; int ret; - ret = request_irq(port->irq, stm32_usart_interrupt, - IRQF_NO_SUSPEND, name, port); + ret = request_threaded_irq(port->irq, stm32_usart_interrupt, + stm32_usart_threaded_interrupt, + IRQF_ONESHOT | IRQF_NO_SUSPEND, + name, port); if (ret) return ret; @@ -1586,6 +1602,13 @@ static int stm32_usart_of_dma_rx_probe(struct stm32_port *stm32port, struct dma_slave_config config; int ret; + /* + * Using DMA and threaded handler for the console could lead to + * deadlocks. + */ + if (uart_console(port)) + return -ENODEV; + stm32port->rx_buf = dma_alloc_coherent(dev, RX_BUF_L, &stm32port->rx_dma_buf, GFP_KERNEL); From 0a8a71c4cb7d9538619b827d7440ad13d2b4478f Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Mon, 16 Jan 2023 16:22:10 +0200 Subject: [PATCH 404/543] xhci-pci: set the dma max_seg_size commit 93915a4170e9defd56a767a18e6c4076f3d18609 upstream. Allow devices to have dma operations beyond 64K, and avoid warnings such as: xhci_hcd 0000:00:14.0: mapping sg segment longer than device claims to support [len=98304] [max=65536] Cc: stable@vger.kernel.org Cc: Takashi Iwai Signed-off-by: Ricardo Ribalda Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230116142216.1141605-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index f98cf30a3c1a..0d9cf96be06d 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -455,6 +455,8 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) if (xhci->quirks & XHCI_DEFAULT_PM_RUNTIME_ALLOW) pm_runtime_allow(&dev->dev); + dma_set_max_seg_size(&dev->dev, UINT_MAX); + return 0; put_usb3_hcd: From 08864dc14a6803f0377ca77b9740b26db30c020f Mon Sep 17 00:00:00 2001 From: Jimmy Hu Date: Mon, 16 Jan 2023 16:22:11 +0200 Subject: [PATCH 405/543] usb: xhci: Check endpoint is valid before dereferencing it commit e8fb5bc76eb86437ab87002d4a36d6da02165654 upstream. When the host controller is not responding, all URBs queued to all endpoints need to be killed. This can cause a kernel panic if we dereference an invalid endpoint. Fix this by using xhci_get_virt_ep() helper to find the endpoint and checking if the endpoint is valid before dereferencing it. [233311.853271] xhci-hcd xhci-hcd.1.auto: xHCI host controller not responding, assume dead [233311.853393] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000e8 [233311.853964] pc : xhci_hc_died+0x10c/0x270 [233311.853971] lr : xhci_hc_died+0x1ac/0x270 [233311.854077] Call trace: [233311.854085] xhci_hc_died+0x10c/0x270 [233311.854093] xhci_stop_endpoint_command_watchdog+0x100/0x1a4 [233311.854105] call_timer_fn+0x50/0x2d4 [233311.854112] expire_timers+0xac/0x2e4 [233311.854118] run_timer_softirq+0x300/0xabc [233311.854127] __do_softirq+0x148/0x528 [233311.854135] irq_exit+0x194/0x1a8 [233311.854143] __handle_domain_irq+0x164/0x1d0 [233311.854149] gic_handle_irq.22273+0x10c/0x188 [233311.854156] el1_irq+0xfc/0x1a8 [233311.854175] lpm_cpuidle_enter+0x25c/0x418 [msm_pm] [233311.854185] cpuidle_enter_state+0x1f0/0x764 [233311.854194] do_idle+0x594/0x6ac [233311.854201] cpu_startup_entry+0x7c/0x80 [233311.854209] secondary_start_kernel+0x170/0x198 Fixes: 50e8725e7c42 ("xhci: Refactor command watchdog and fix split string.") Cc: stable@vger.kernel.org Signed-off-by: Jimmy Hu Signed-off-by: Mathias Nyman Message-ID: <0fe978ed-8269-9774-1c40-f8a98c17e838@linux.intel.com> Link: https://lore.kernel.org/r/20230116142216.1141605-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 343709af4c16..dce02d0aad8d 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1170,7 +1170,10 @@ static void xhci_kill_endpoint_urbs(struct xhci_hcd *xhci, struct xhci_virt_ep *ep; struct xhci_ring *ring; - ep = &xhci->devs[slot_id]->eps[ep_index]; + ep = xhci_get_virt_ep(xhci, slot_id, ep_index); + if (!ep) + return; + if ((ep->ep_state & EP_HAS_STREAMS) || (ep->ep_state & EP_GETTING_NO_STREAMS)) { int stream_id; From ea2ee5e9991caf74e0604f994c1831a5867055b2 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 16 Jan 2023 16:22:12 +0200 Subject: [PATCH 406/543] xhci: Fix null pointer dereference when host dies commit a2bc47c43e70cf904b1af49f76d572326c08bca7 upstream. Make sure xhci_free_dev() and xhci_kill_endpoint_urbs() do not race and cause null pointer dereference when host suddenly dies. Usb core may call xhci_free_dev() which frees the xhci->devs[slot_id] virt device at the same time that xhci_kill_endpoint_urbs() tries to loop through all the device's endpoints, checking if there are any cancelled urbs left to give back. hold the xhci spinlock while freeing the virt device Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230116142216.1141605-4-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 79d7931c048a..50b41213e827 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -3974,6 +3974,7 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); struct xhci_virt_device *virt_dev; struct xhci_slot_ctx *slot_ctx; + unsigned long flags; int i, ret; /* @@ -4000,7 +4001,11 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) virt_dev->eps[i].ep_state &= ~EP_STOP_CMD_PENDING; virt_dev->udev = NULL; xhci_disable_slot(xhci, udev->slot_id); + + spin_lock_irqsave(&xhci->lock, flags); xhci_free_virt_device(xhci, udev->slot_id); + spin_unlock_irqrestore(&xhci->lock, flags); + } int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) From 60c1eb013e1eff6200e1beb462cdcc3a14d9a77f Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 16 Jan 2023 16:22:13 +0200 Subject: [PATCH 407/543] xhci: Add update_hub_device override for PCI xHCI hosts commit 23a3b8d5a2365653fd9bc5a9454d1e7f4facbf85 upstream. Allow PCI hosts to check and tune roothub and port settings before the hub is up and running. This override is needed to turn off U1 and U2 LPM for some ports based on per port ACPI _DSM, _UPC, or possibly vendor specific mmio values for Intel xHC hosts. Usb core calls the host update_hub_device once it creates a hub. Entering U1 or U2 link power save state on ports with this limitation will cause link to fail, turning the usb device unusable in that setup. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230116142216.1141605-5-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 9 +++++++++ drivers/usb/host/xhci.c | 5 ++++- drivers/usb/host/xhci.h | 4 ++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 0d9cf96be06d..1a390bfc19fa 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -78,9 +78,12 @@ static const char hcd_name[] = "xhci_hcd"; static struct hc_driver __read_mostly xhci_pci_hc_driver; static int xhci_pci_setup(struct usb_hcd *hcd); +static int xhci_pci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, + struct usb_tt *tt, gfp_t mem_flags); static const struct xhci_driver_overrides xhci_pci_overrides __initconst = { .reset = xhci_pci_setup, + .update_hub_device = xhci_pci_update_hub_device, }; /* called after powerup, by probe or system-pm "wakeup" */ @@ -386,6 +389,12 @@ static int xhci_pci_setup(struct usb_hcd *hcd) return xhci_pci_reinit(xhci, pdev); } +static int xhci_pci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, + struct usb_tt *tt, gfp_t mem_flags) +{ + return xhci_update_hub_device(hcd, hdev, tt, mem_flags); +} + /* * We need to register our own PCI probe function (instead of the USB core's * function) in order to create a second roothub under xHCI. diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 50b41213e827..89f92fc78bb1 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -5124,7 +5124,7 @@ static int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd, /* Once a hub descriptor is fetched for a device, we need to update the xHC's * internal data structures for the device. */ -static int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, +int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, struct usb_tt *tt, gfp_t mem_flags) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -5224,6 +5224,7 @@ static int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, xhci_free_command(xhci, config_cmd); return ret; } +EXPORT_SYMBOL_GPL(xhci_update_hub_device); static int xhci_get_frame(struct usb_hcd *hcd) { @@ -5507,6 +5508,8 @@ void xhci_init_driver(struct hc_driver *drv, drv->check_bandwidth = over->check_bandwidth; if (over->reset_bandwidth) drv->reset_bandwidth = over->reset_bandwidth; + if (over->update_hub_device) + drv->update_hub_device = over->update_hub_device; } } EXPORT_SYMBOL_GPL(xhci_init_driver); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index c9f06c5e4e9d..3edfacb93817 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1943,6 +1943,8 @@ struct xhci_driver_overrides { struct usb_host_endpoint *ep); int (*check_bandwidth)(struct usb_hcd *, struct usb_device *); void (*reset_bandwidth)(struct usb_hcd *, struct usb_device *); + int (*update_hub_device)(struct usb_hcd *hcd, struct usb_device *hdev, + struct usb_tt *tt, gfp_t mem_flags); }; #define XHCI_CFC_DELAY 10 @@ -2122,6 +2124,8 @@ int xhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, struct usb_host_endpoint *ep); int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); void xhci_reset_bandwidth(struct usb_hcd *hcd, struct usb_device *udev); +int xhci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, + struct usb_tt *tt, gfp_t mem_flags); int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id); int xhci_ext_cap_init(struct xhci_hcd *xhci); From 8c36de0935466ba17d5f0d57ed29f4b8c4d6cc9d Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 16 Jan 2023 16:22:14 +0200 Subject: [PATCH 408/543] xhci: Add a flag to disable USB3 lpm on a xhci root port level. commit 0522b9a1653048440da5f21747f21e498b9220d1 upstream. One USB3 roothub port may support link power management, while another root port on the same xHC can't due to different retimers used for the ports. This is the case with Intel Alder Lake, and possible future platforms where retimers used for USB4 ports cause too long exit latecy to enable native USB3 lpm U1 and U2 states. Add a flag in the xhci port structure to indicate if the port is lpm_incapable, and check it while calculating exit latency. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230116142216.1141605-6-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 8 ++++++++ drivers/usb/host/xhci.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 89f92fc78bb1..2b280beb0011 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -5049,6 +5049,7 @@ static int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd, struct usb_device *udev, enum usb3_link_state state) { struct xhci_hcd *xhci; + struct xhci_port *port; u16 hub_encoded_timeout; int mel; int ret; @@ -5065,6 +5066,13 @@ static int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd, if (xhci_check_tier_policy(xhci, udev, state) < 0) return USB3_LPM_DISABLED; + /* If connected to root port then check port can handle lpm */ + if (udev->parent && !udev->parent->parent) { + port = xhci->usb3_rhub.ports[udev->portnum - 1]; + if (port->lpm_incapable) + return USB3_LPM_DISABLED; + } + hub_encoded_timeout = xhci_calculate_lpm_timeout(hcd, udev, state); mel = calculate_max_exit_latency(udev, state, hub_encoded_timeout); if (mel < 0) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 3edfacb93817..dcee7f3207ad 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1735,6 +1735,7 @@ struct xhci_port { int hcd_portnum; struct xhci_hub *rhub; struct xhci_port_cap *port_cap; + unsigned int lpm_incapable:1; }; struct xhci_hub { From b7904d20157cb71587f2ae58ec7ed6b48204e88f Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 16 Jan 2023 16:22:15 +0200 Subject: [PATCH 409/543] usb: acpi: add helper to check port lpm capability using acpi _DSM commit cd702d18c882d5a4ea44bbdb38edd5d5577ef640 upstream. Add a helper to evaluate ACPI usb device specific method (_DSM) provided in case the USB3 port shouldn't enter U1 and U2 link states. This _DSM was added as port specific retimer configuration may lead to exit latencies growing beyond U1/U2 exit limits, and OS needs a way to find which ports can't support U1/U2 link power management states. This _DSM is also used by windows: Link: https://docs.microsoft.com/en-us/windows-hardware/drivers/bringup/usb-device-specific-method---dsm- Some patch issues found in testing resolved by Ron Lee Cc: stable@vger.kernel.org Tested-by: Ron Lee Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230116142216.1141605-7-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/usb-acpi.c | 65 +++++++++++++++++++++++++++++++++++++ include/linux/usb.h | 3 ++ 2 files changed, 68 insertions(+) diff --git a/drivers/usb/core/usb-acpi.c b/drivers/usb/core/usb-acpi.c index 6d93428432f1..533baa85083c 100644 --- a/drivers/usb/core/usb-acpi.c +++ b/drivers/usb/core/usb-acpi.c @@ -37,6 +37,71 @@ bool usb_acpi_power_manageable(struct usb_device *hdev, int index) } EXPORT_SYMBOL_GPL(usb_acpi_power_manageable); +#define UUID_USB_CONTROLLER_DSM "ce2ee385-00e6-48cb-9f05-2edb927c4899" +#define USB_DSM_DISABLE_U1_U2_FOR_PORT 5 + +/** + * usb_acpi_port_lpm_incapable - check if lpm should be disabled for a port. + * @hdev: USB device belonging to the usb hub + * @index: zero based port index + * + * Some USB3 ports may not support USB3 link power management U1/U2 states + * due to different retimer setup. ACPI provides _DSM method which returns 0x01 + * if U1 and U2 states should be disabled. Evaluate _DSM with: + * Arg0: UUID = ce2ee385-00e6-48cb-9f05-2edb927c4899 + * Arg1: Revision ID = 0 + * Arg2: Function Index = 5 + * Arg3: (empty) + * + * Return 1 if USB3 port is LPM incapable, negative on error, otherwise 0 + */ + +int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index) +{ + union acpi_object *obj; + acpi_handle port_handle; + int port1 = index + 1; + guid_t guid; + int ret; + + ret = guid_parse(UUID_USB_CONTROLLER_DSM, &guid); + if (ret) + return ret; + + port_handle = usb_get_hub_port_acpi_handle(hdev, port1); + if (!port_handle) { + dev_dbg(&hdev->dev, "port-%d no acpi handle\n", port1); + return -ENODEV; + } + + if (!acpi_check_dsm(port_handle, &guid, 0, + BIT(USB_DSM_DISABLE_U1_U2_FOR_PORT))) { + dev_dbg(&hdev->dev, "port-%d no _DSM function %d\n", + port1, USB_DSM_DISABLE_U1_U2_FOR_PORT); + return -ENODEV; + } + + obj = acpi_evaluate_dsm(port_handle, &guid, 0, + USB_DSM_DISABLE_U1_U2_FOR_PORT, NULL); + + if (!obj) + return -ENODEV; + + if (obj->type != ACPI_TYPE_INTEGER) { + dev_dbg(&hdev->dev, "evaluate port-%d _DSM failed\n", port1); + ACPI_FREE(obj); + return -EINVAL; + } + + if (obj->integer.value == 0x01) + ret = 1; + + ACPI_FREE(obj); + + return ret; +} +EXPORT_SYMBOL_GPL(usb_acpi_port_lpm_incapable); + /** * usb_acpi_set_power_state - control usb port's power via acpi power * resource diff --git a/include/linux/usb.h b/include/linux/usb.h index 9ff1ad4dfad1..6c95af3317f7 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -751,11 +751,14 @@ extern struct device *usb_intf_get_dma_device(struct usb_interface *intf); extern int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable); extern bool usb_acpi_power_manageable(struct usb_device *hdev, int index); +extern int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index); #else static inline int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable) { return 0; } static inline bool usb_acpi_power_manageable(struct usb_device *hdev, int index) { return true; } +static inline int usb_acpi_port_lpm_incapable(struct usb_device *hdev, int index) + { return 0; } #endif /* USB autosuspend and autoresume */ From 08bf23c33979e0f44ff9ad1849c08067c7c3a46b Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 16 Jan 2023 16:22:16 +0200 Subject: [PATCH 410/543] xhci: Detect lpm incapable xHC USB3 roothub ports from ACPI tables commit 74622f0a81d0c2bcfc39f9192b788124e8c7f0af upstream. USB3 ports on xHC hosts may have retimers that cause too long exit latency to work with native USB3 U1/U2 link power management states. For now only use usb_acpi_port_lpm_incapable() to evaluate if port lpm should be disabled while setting up the USB3 roothub. Other ways to identify lpm incapable ports can be added here later if ACPI _DSM does not exist. Limit this to Intel hosts for now, this is to my knowledge only an Intel issue. Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20230116142216.1141605-8-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 1a390bfc19fa..232e175e4e96 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -355,8 +355,38 @@ static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) NULL); ACPI_FREE(obj); } + +static void xhci_find_lpm_incapable_ports(struct usb_hcd *hcd, struct usb_device *hdev) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct xhci_hub *rhub = &xhci->usb3_rhub; + int ret; + int i; + + /* This is not the usb3 roothub we are looking for */ + if (hcd != rhub->hcd) + return; + + if (hdev->maxchild > rhub->num_ports) { + dev_err(&hdev->dev, "USB3 roothub port number mismatch\n"); + return; + } + + for (i = 0; i < hdev->maxchild; i++) { + ret = usb_acpi_port_lpm_incapable(hdev, i); + + dev_dbg(&hdev->dev, "port-%d disable U1/U2 _DSM: %d\n", i + 1, ret); + + if (ret >= 0) { + rhub->ports[i]->lpm_incapable = ret; + continue; + } + } +} + #else static void xhci_pme_acpi_rtd3_enable(struct pci_dev *dev) { } +static void xhci_find_lpm_incapable_ports(struct usb_hcd *hcd, struct usb_device *hdev) { } #endif /* CONFIG_ACPI */ /* called during probe() after chip reset completes */ @@ -392,6 +422,10 @@ static int xhci_pci_setup(struct usb_hcd *hcd) static int xhci_pci_update_hub_device(struct usb_hcd *hcd, struct usb_device *hdev, struct usb_tt *tt, gfp_t mem_flags) { + /* Check if acpi claims some USB3 roothub ports are lpm incapable */ + if (!hdev->parent) + xhci_find_lpm_incapable_ports(hcd, hdev); + return xhci_update_hub_device(hcd, hdev, tt, mem_flags); } From 91185568c99d60534bacf38439846103962d1e2c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Jan 2023 11:03:20 +0100 Subject: [PATCH 411/543] prlimit: do_prlimit needs to have a speculation check commit 739790605705ddcf18f21782b9c99ad7d53a8c11 upstream. do_prlimit() adds the user-controlled resource value to a pointer that will subsequently be dereferenced. In order to help prevent this codepath from being used as a spectre "gadget" a barrier needs to be added after checking the range. Reported-by: Jordy Zomer Tested-by: Jordy Zomer Suggested-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sys.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sys.c b/kernel/sys.c index 5fd54bf0e886..88b31f096fb2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1442,6 +1442,8 @@ static int do_prlimit(struct task_struct *tsk, unsigned int resource, if (resource >= RLIM_NLIMITS) return -EINVAL; + resource = array_index_nospec(resource, RLIM_NLIMITS); + if (new_rlim) { if (new_rlim->rlim_cur > new_rlim->rlim_max) return -EINVAL; From 4173c542195c699e691db677332d564311770d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duke=20Xin=28=E8=BE=9B=E5=AE=89=E6=96=87=29?= Date: Tue, 27 Dec 2022 01:44:30 -0800 Subject: [PATCH 412/543] USB: serial: option: add Quectel EM05-G (GR) modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6c331f32e32ac71eb3e8b93fceda2802d7ecb889 upstream. The EM05-G (GR) modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "RMNET" : AT + DIAG + NMEA + Modem + QMI "MBIM" : MBIM + AT + DIAG + NMEA + Modem The detailed description of the USB configuration for each mode as follows: RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0313 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0313 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin(辛安文) Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index dee79c7d82d5..276d2124225f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -256,6 +256,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM060K 0x030b #define QUECTEL_PRODUCT_EM05G_SG 0x0311 +#define QUECTEL_PRODUCT_EM05G_GR 0x0313 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 @@ -1161,6 +1162,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_GR, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff), .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, From 1e39e0b47ce6cee72e739daa28580d982e4dde35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duke=20Xin=28=E8=BE=9B=E5=AE=89=E6=96=87=29?= Date: Tue, 27 Dec 2022 01:28:25 -0800 Subject: [PATCH 413/543] USB: serial: option: add Quectel EM05-G (CS) modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit bb78654b0b46316dac687fd4b7dc7cce636f46cd upstream. The EM05-G (CS) modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "RMNET" : AT + DIAG + NMEA + Modem + QMI "MBIM" : MBIM + AT + DIAG + NMEA + Modem The detailed description of the USB configuration for each mode as follows: RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=030C Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=030C Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin(辛安文) Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 276d2124225f..dbd8a4198042 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -255,6 +255,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM060K 0x030b +#define QUECTEL_PRODUCT_EM05G_CS 0x030c #define QUECTEL_PRODUCT_EM05G_SG 0x0311 #define QUECTEL_PRODUCT_EM05G_GR 0x0313 #define QUECTEL_PRODUCT_EM12 0x0512 @@ -1164,6 +1165,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_GR, 0xff), .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_CS, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff), .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, From 1048bc1b852d56f0c1b34de55039eb5cf00a129c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duke=20Xin=28=E8=BE=9B=E5=AE=89=E6=96=87=29?= Date: Tue, 27 Dec 2022 01:51:27 -0800 Subject: [PATCH 414/543] USB: serial: option: add Quectel EM05-G (RS) modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b72d13977689f0c717444010e108c4f20658dfee upstream. The EM05-G (RS) modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "RMNET" : AT + DIAG + NMEA + Modem + QMI "MBIM" : MBIM + AT + DIAG + NMEA + Modem The detailed description of the USB configuration for each mode as follows: RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0314 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0314 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-G C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin(辛安文) Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index dbd8a4198042..0ffdba43de30 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -258,6 +258,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM05G_CS 0x030c #define QUECTEL_PRODUCT_EM05G_SG 0x0311 #define QUECTEL_PRODUCT_EM05G_GR 0x0313 +#define QUECTEL_PRODUCT_EM05G_RS 0x0314 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 @@ -1167,6 +1168,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_CS, 0xff), .driver_info = RSVD(6) | ZLP }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_RS, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff), .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) }, From 5fd46948fd5131ac5becd100ee15af1bca9fcd49 Mon Sep 17 00:00:00 2001 From: Ali Mirghasemi Date: Wed, 28 Dec 2022 15:08:47 +0330 Subject: [PATCH 415/543] USB: serial: option: add Quectel EC200U modem commit d9bbb15881046bd76f8710c76e26a740eee997ef upstream. Add support for EC200U modem 0x0901: EC200U - AT + AP + CP + NMEA + DIAG + MOS usb-device output: T: Bus=01 Lev=02 Prnt=02 Port=02 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0901 Rev= 3.18 S: Manufacturer=Android S: Product=Android C:* #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=400mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=06 Prot=00 I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=32ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=83(I) Atr=03(Int.) MxPS= 512 Ivl=4096ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=07(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=89(I) Atr=03(Int.) MxPS= 512 Ivl=4096ms I:* If#= 8 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=8b(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=08(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Ali Mirghasemi Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0ffdba43de30..bc956172ed1d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -262,6 +262,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 #define QUECTEL_PRODUCT_RM520N 0x0801 +#define QUECTEL_PRODUCT_EC200U 0x0901 #define QUECTEL_PRODUCT_EC200S_CN 0x6002 #define QUECTEL_PRODUCT_EC200T 0x6026 #define QUECTEL_PRODUCT_RM500K 0x7001 @@ -1189,6 +1190,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0xff, 0x30) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0x40) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) }, From 2c163640ed735f83abd5b623b018236e39a02f7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duke=20Xin=28=E8=BE=9B=E5=AE=89=E6=96=87=29?= Date: Sun, 15 Jan 2023 18:07:27 -0800 Subject: [PATCH 416/543] USB: serial: option: add Quectel EM05CN (SG) modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1541dd0097c0f8f470e76eddf5120fc55a7e3101 upstream. The EM05CN (SG) modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "MBIM" : AT + MBIM + DIAG + NMEA + MODEM "RMNET" : AT + DIAG + NMEA + Modem + QMI The detailed description of the USB configuration for each mode as follows: MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0310 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-CN C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 1 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 2 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0310 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-CN C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin(辛安文) Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index bc956172ed1d..953b12b97c06 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -256,6 +256,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM05G 0x030a #define QUECTEL_PRODUCT_EM060K 0x030b #define QUECTEL_PRODUCT_EM05G_CS 0x030c +#define QUECTEL_PRODUCT_EM05CN_SG 0x0310 #define QUECTEL_PRODUCT_EM05G_SG 0x0311 #define QUECTEL_PRODUCT_EM05G_GR 0x0313 #define QUECTEL_PRODUCT_EM05G_RS 0x0314 @@ -1163,6 +1164,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN_SG, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_GR, 0xff), From 32117ec839326e37dcbf22715019970363e1b41e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Duke=20Xin=28=E8=BE=9B=E5=AE=89=E6=96=87=29?= Date: Sun, 15 Jan 2023 18:33:28 -0800 Subject: [PATCH 417/543] USB: serial: option: add Quectel EM05CN modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 71dfd381a7c051f16a61f82fbd38a4cca563bdca upstream. The EM05CN modem has 2 USB configurations that are configurable via the AT command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with the following interfaces, respectively: "MBIM" : AT + MBIM + DIAG + NMEA + MODEM "RMNET" : AT + DIAG + NMEA + Modem + QMI The detailed description of the USB configuration for each mode as follows: MBIM Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0312 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-CN C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA A: FirstIf#= 1 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim E: Ad=89(I) Atr=03(Int.) MxPS= 64 Ivl=32ms I: If#= 2 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim I:* If#= 2 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms RMNET Mode -------------- T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 3 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=2c7c ProdID=0312 Rev= 3.18 S: Manufacturer=Quectel S: Product=Quectel EM05-CN C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Duke Xin(辛安文) Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 953b12b97c06..6b69d05e2fb0 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -258,6 +258,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EM05G_CS 0x030c #define QUECTEL_PRODUCT_EM05CN_SG 0x0310 #define QUECTEL_PRODUCT_EM05G_SG 0x0311 +#define QUECTEL_PRODUCT_EM05CN 0x0312 #define QUECTEL_PRODUCT_EM05G_GR 0x0313 #define QUECTEL_PRODUCT_EM05G_RS 0x0314 #define QUECTEL_PRODUCT_EM12 0x0512 @@ -1164,6 +1165,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, + { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN, 0xff), + .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05CN_SG, 0xff), .driver_info = RSVD(6) | ZLP }, { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff), From 5d6ecc63abb47e8301d7287f93290247e9878727 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 17 Jan 2023 17:39:32 +0100 Subject: [PATCH 418/543] staging: vchiq_arm: fix enum vchiq_status return types commit 7d83299351fe7c812c529f5e39fe63b5312e4233 upstream. gcc-13 notices a type mismatch between function declaration and definition for a few functions that have been converted from returning vchiq specific status values to regular error codes: drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c:662:5: error: conflicting types for 'vchiq_initialise' due to enum/integer mismatch; have 'int(struct vchiq_instance **)' [-Werror=enum-int-mismatch] drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c:1411:1: error: conflicting types for 'vchiq_use_internal' due to enum/integer mismatch; have 'int(struct vchiq_state *, struct vchiq_service *, enum USE_TYPE_E)' [-Werror=enum-int-mismatch] drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c:1468:1: error: conflicting types for 'vchiq_release_internal' due to enum/integer mismatch; have 'int(struct vchiq_state *, struct vchiq_service *)' [-Werror=enum-int-mismatch] Change the declarations to match the actual function definition. Fixes: a9fbd828be7f ("staging: vchiq_arm: drop enum vchiq_status from vchiq_*_internal") Cc: stable Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230117163957.1109872-1-arnd@kernel.org Signed-off-by: Greg Kroah-Hartman --- .../staging/vc04_services/include/linux/raspberrypi/vchiq.h | 2 +- drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h b/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h index db1441c0cc66..690ab7165b2c 100644 --- a/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h +++ b/drivers/staging/vc04_services/include/linux/raspberrypi/vchiq.h @@ -86,7 +86,7 @@ struct vchiq_service_params_kernel { struct vchiq_instance; -extern enum vchiq_status vchiq_initialise(struct vchiq_instance **pinstance); +extern int vchiq_initialise(struct vchiq_instance **pinstance); extern enum vchiq_status vchiq_shutdown(struct vchiq_instance *instance); extern enum vchiq_status vchiq_connect(struct vchiq_instance *instance); extern enum vchiq_status vchiq_open_service(struct vchiq_instance *instance, diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h index 2851ef6b9cd0..cd20eb18f275 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.h @@ -100,10 +100,10 @@ vchiq_dump_platform_use_state(struct vchiq_state *state); extern void vchiq_dump_service_use_state(struct vchiq_state *state); -extern enum vchiq_status +extern int vchiq_use_internal(struct vchiq_state *state, struct vchiq_service *service, enum USE_TYPE_E use_type); -extern enum vchiq_status +extern int vchiq_release_internal(struct vchiq_state *state, struct vchiq_service *service); From aa5b95982f45a137e692f313fded5647cf48cba1 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 20 Jan 2023 14:53:30 +0100 Subject: [PATCH 419/543] USB: misc: iowarrior: fix up header size for USB_DEVICE_ID_CODEMERCS_IOW100 commit 14ff7460bb58662d86aa50298943cc7d25532e28 upstream. The USB_DEVICE_ID_CODEMERCS_IOW100 header size was incorrect, it should be 12, not 13. Cc: stable Fixes: 17a82716587e ("USB: iowarrior: fix up report size handling for some devices") Reported-by: Christoph Jung Link: https://lore.kernel.org/r/20230120135330.3842518-1-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/iowarrior.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 988a8c02e7e2..b421f1326087 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -814,7 +814,7 @@ static int iowarrior_probe(struct usb_interface *interface, break; case USB_DEVICE_ID_CODEMERCS_IOW100: - dev->report_size = 13; + dev->report_size = 12; break; } } From cbb0da62573c8fb8a21de8fde272acc8756c4fb4 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 10 Jan 2023 17:32:52 +0000 Subject: [PATCH 420/543] usb: misc: onboard_hub: Invert driver registration order commit e5854355d76b8d768cea8e4fc3ce6dfdba25518a upstream. The onboard_hub 'driver' consists of two drivers, a platform driver and a USB driver. Currently when the onboard hub driver is initialized it first registers the platform driver, then the USB driver. This results in a race condition when the 'attach' work is executed, which is scheduled when the platform device is probed. The purpose of fhe 'attach' work is to bind elegible USB hub devices to the onboard_hub USB driver. This fails if the work runs before the USB driver has been registered. Register the USB driver first, then the platform driver. This increases the chances that the onboard_hub USB devices are probed before their corresponding platform device, which the USB driver tries to locate in _probe(). The driver already handles this situation and defers probing if the onboard hub platform device doesn't exist yet. Cc: stable@vger.kernel.org Fixes: 8bc063641ceb ("usb: misc: Add onboard_usb_hub driver") Link: https://lore.kernel.org/lkml/Y6W00vQm3jfLflUJ@hovoldconsulting.com/T/#m0d64295f017942fd988f7c53425db302d61952b4 Reported-by: Alexander Stein Signed-off-by: Matthias Kaehlcke Tested-by: Stefan Wahren Link: https://lore.kernel.org/r/20230110172954.v2.1.I75494ebee7027a50235ce4b1e930fa73a578fbe2@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_hub.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index d63c63942af1..03d5f4d5b1a5 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -431,13 +431,13 @@ static int __init onboard_hub_init(void) { int ret; - ret = platform_driver_register(&onboard_hub_driver); + ret = usb_register_device_driver(&onboard_hub_usbdev_driver, THIS_MODULE); if (ret) return ret; - ret = usb_register_device_driver(&onboard_hub_usbdev_driver, THIS_MODULE); + ret = platform_driver_register(&onboard_hub_driver); if (ret) - platform_driver_unregister(&onboard_hub_driver); + usb_deregister_device_driver(&onboard_hub_usbdev_driver); return ret; } From bab5687f4d1b01da8f2bdbcd38d7863ed17be555 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 10 Jan 2023 17:32:53 +0000 Subject: [PATCH 421/543] usb: misc: onboard_hub: Move 'attach' work to the driver commit cde37881e2e14590675d0acdfbad408300d9ca95 upstream. Currently each onboard_hub platform device owns an 'attach' work, which is scheduled when the device probes. With this deadlocks have been reported on a Raspberry Pi 3 B+ [1], which has nested onboard hubs. The flow of the deadlock is something like this (with the onboard_hub driver built as a module) [2]: - USB root hub is instantiated - core hub driver calls onboard_hub_create_pdevs(), which creates the 'raw' platform device for the 1st level hub - 1st level hub is probed by the core hub driver - core hub driver calls onboard_hub_create_pdevs(), which creates the 'raw' platform device for the 2nd level hub - onboard_hub platform driver is registered - platform device for 1st level hub is probed - schedules 'attach' work - platform device for 2nd level hub is probed - schedules 'attach' work - onboard_hub USB driver is registered - device (and parent) lock of hub is held while the device is re-probed with the onboard_hub driver - 'attach' work (running in another thread) calls driver_attach(), which blocks on one of the hub device locks - onboard_hub_destroy_pdevs() is called by the core hub driver when one of the hubs is detached - destroying the pdevs invokes onboard_hub_remove(), which waits for the 'attach' work to complete - waits forever, since the 'attach' work can't acquire the device lock Use a single work struct for the driver instead of having a work struct per onboard hub platform driver instance. With that it isn't necessary to cancel the work in onboard_hub_remove(), which fixes the deadlock. The work is only cancelled when the driver is unloaded. [1] https://lore.kernel.org/r/d04bcc45-3471-4417-b30b-5cf9880d785d@i2se.com/ [2] https://lore.kernel.org/all/Y6OrGbqaMy2iVDWB@google.com/ Cc: stable@vger.kernel.org Fixes: 8bc063641ceb ("usb: misc: Add onboard_usb_hub driver") Link: https://lore.kernel.org/r/d04bcc45-3471-4417-b30b-5cf9880d785d@i2se.com/ Link: https://lore.kernel.org/all/Y6OrGbqaMy2iVDWB@google.com/ Reported-by: Stefan Wahren Signed-off-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20230110172954.v2.2.I16b51f32db0c32f8a8532900bfe1c70c8572881a@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/onboard_usb_hub.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c index 03d5f4d5b1a5..044e75ad4d20 100644 --- a/drivers/usb/misc/onboard_usb_hub.c +++ b/drivers/usb/misc/onboard_usb_hub.c @@ -27,7 +27,10 @@ #include "onboard_usb_hub.h" +static void onboard_hub_attach_usb_driver(struct work_struct *work); + static struct usb_device_driver onboard_hub_usbdev_driver; +static DECLARE_WORK(attach_usb_driver_work, onboard_hub_attach_usb_driver); /************************** Platform driver **************************/ @@ -45,7 +48,6 @@ struct onboard_hub { bool is_powered_on; bool going_away; struct list_head udev_list; - struct work_struct attach_usb_driver_work; struct mutex lock; }; @@ -271,8 +273,7 @@ static int onboard_hub_probe(struct platform_device *pdev) * This needs to be done deferred to avoid self-deadlocks on systems * with nested onboard hubs. */ - INIT_WORK(&hub->attach_usb_driver_work, onboard_hub_attach_usb_driver); - schedule_work(&hub->attach_usb_driver_work); + schedule_work(&attach_usb_driver_work); return 0; } @@ -285,9 +286,6 @@ static int onboard_hub_remove(struct platform_device *pdev) hub->going_away = true; - if (&hub->attach_usb_driver_work != current_work()) - cancel_work_sync(&hub->attach_usb_driver_work); - mutex_lock(&hub->lock); /* unbind the USB devices to avoid dangling references to this device */ @@ -447,6 +445,8 @@ static void __exit onboard_hub_exit(void) { usb_deregister_device_driver(&onboard_hub_usbdev_driver); platform_driver_unregister(&onboard_hub_driver); + + cancel_work_sync(&attach_usb_driver_work); } module_exit(onboard_hub_exit); From a50c5c25b6e7d2824698c0e6385f882a18f4a498 Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 24 Nov 2022 17:49:39 +0000 Subject: [PATCH 422/543] misc: fastrpc: Fix use-after-free and race in fastrpc_map_find commit 9446fa1683a7e3937d9970248ced427c1983a1c5 upstream. Currently, there is a race window between the point when the mutex is unlocked in fastrpc_map_lookup and the reference count increasing (fastrpc_map_get) in fastrpc_map_find, which can also lead to use-after-free. So lets merge fastrpc_map_find into fastrpc_map_lookup which allows us to both protect the maps list by also taking the &fl->lock spinlock and the reference count, since the spinlock will be released only after. Add take_ref argument to make this suitable for all callers. Fixes: 8f6c1d8c4f0c ("misc: fastrpc: Add fdlist implementation") Cc: stable Co-developed-by: Ola Jeppsson Signed-off-by: Ola Jeppsson Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221124174941.418450-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 7ff0b63c25e3..fc2173c59f8b 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -333,30 +333,31 @@ static void fastrpc_map_get(struct fastrpc_map *map) static int fastrpc_map_lookup(struct fastrpc_user *fl, int fd, - struct fastrpc_map **ppmap) + struct fastrpc_map **ppmap, bool take_ref) { + struct fastrpc_session_ctx *sess = fl->sctx; struct fastrpc_map *map = NULL; + int ret = -ENOENT; - mutex_lock(&fl->mutex); + spin_lock(&fl->lock); list_for_each_entry(map, &fl->maps, node) { - if (map->fd == fd) { - *ppmap = map; - mutex_unlock(&fl->mutex); - return 0; + if (map->fd != fd) + continue; + + if (take_ref) { + ret = fastrpc_map_get(map); + if (ret) { + dev_dbg(sess->dev, "%s: Failed to get map fd=%d ret=%d\n", + __func__, fd, ret); + break; + } } + + *ppmap = map; + ret = 0; + break; } - mutex_unlock(&fl->mutex); - - return -ENOENT; -} - -static int fastrpc_map_find(struct fastrpc_user *fl, int fd, - struct fastrpc_map **ppmap) -{ - int ret = fastrpc_map_lookup(fl, fd, ppmap); - - if (!ret) - fastrpc_map_get(*ppmap); + spin_unlock(&fl->lock); return ret; } @@ -703,7 +704,7 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd, struct fastrpc_map *map = NULL; int err = 0; - if (!fastrpc_map_find(fl, fd, ppmap)) + if (!fastrpc_map_lookup(fl, fd, ppmap, true)) return 0; map = kzalloc(sizeof(*map), GFP_KERNEL); @@ -1026,7 +1027,7 @@ static int fastrpc_put_args(struct fastrpc_invoke_ctx *ctx, for (i = 0; i < FASTRPC_MAX_FDLIST; i++) { if (!fdlist[i]) break; - if (!fastrpc_map_lookup(fl, (int)fdlist[i], &mmap)) + if (!fastrpc_map_lookup(fl, (int)fdlist[i], &mmap, false)) fastrpc_map_put(mmap); } From 35ddd482345c43d9eec1f3406c0f20a95ed4054b Mon Sep 17 00:00:00 2001 From: Abel Vesa Date: Thu, 24 Nov 2022 17:49:40 +0000 Subject: [PATCH 423/543] misc: fastrpc: Don't remove map on creater_process and device_release commit 5bb96c8f9268e2fdb0e5321cbc358ee5941efc15 upstream. Do not remove the map from the list on error path in fastrpc_init_create_process, instead call fastrpc_map_put, to avoid use-after-free. Do not remove it on fastrpc_device_release either, call fastrpc_map_put instead. The fastrpc_free_map is the only proper place to remove the map. This is called only after the reference count is 0. Fixes: b49f6d83e290 ("misc: fastrpc: Fix a possible double free") Cc: stable Co-developed-by: Ola Jeppsson Signed-off-by: Ola Jeppsson Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221124174941.418450-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index fc2173c59f8b..9ecbf673d321 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -316,6 +316,13 @@ static void fastrpc_free_map(struct kref *ref) dma_buf_put(map->buf); } + if (map->fl) { + spin_lock(&map->fl->lock); + list_del(&map->node); + spin_unlock(&map->fl->lock); + map->fl = NULL; + } + kfree(map); } @@ -1266,12 +1273,7 @@ err_invoke: fl->init_mem = NULL; fastrpc_buf_free(imem); err_alloc: - if (map) { - spin_lock(&fl->lock); - list_del(&map->node); - spin_unlock(&fl->lock); - fastrpc_map_put(map); - } + fastrpc_map_put(map); err: kfree(args); @@ -1347,10 +1349,8 @@ static int fastrpc_device_release(struct inode *inode, struct file *file) fastrpc_context_put(ctx); } - list_for_each_entry_safe(map, m, &fl->maps, node) { - list_del(&map->node); + list_for_each_entry_safe(map, m, &fl->maps, node) fastrpc_map_put(map); - } list_for_each_entry_safe(buf, b, &fl->mmaps, node) { list_del(&buf->node); From 079c78c68714f7d8d58e66c477b0243b31806907 Mon Sep 17 00:00:00 2001 From: Ola Jeppsson Date: Thu, 24 Nov 2022 17:49:41 +0000 Subject: [PATCH 424/543] misc: fastrpc: Fix use-after-free race condition for maps commit 96b328d119eca7563c1edcc4e1039a62e6370ecb upstream. It is possible that in between calling fastrpc_map_get() until map->fl->lock is taken in fastrpc_free_map(), another thread can call fastrpc_map_lookup() and get a reference to a map that is about to be deleted. Rewrite fastrpc_map_get() to only increase the reference count of a map if it's non-zero. Propagate this to callers so they can know if a map is about to be deleted. Fixes this warning: refcount_t: addition on 0; use-after-free. WARNING: CPU: 5 PID: 10100 at lib/refcount.c:25 refcount_warn_saturate ... Call trace: refcount_warn_saturate [fastrpc_map_get inlined] [fastrpc_map_lookup inlined] fastrpc_map_create fastrpc_internal_invoke fastrpc_device_ioctl __arm64_sys_ioctl invoke_syscall Fixes: c68cfb718c8f ("misc: fastrpc: Add support for context Invoke method") Cc: stable Signed-off-by: Ola Jeppsson Signed-off-by: Abel Vesa Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20221124174941.418450-4-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 9ecbf673d321..80811e852d8f 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -332,10 +332,12 @@ static void fastrpc_map_put(struct fastrpc_map *map) kref_put(&map->refcount, fastrpc_free_map); } -static void fastrpc_map_get(struct fastrpc_map *map) +static int fastrpc_map_get(struct fastrpc_map *map) { - if (map) - kref_get(&map->refcount); + if (!map) + return -ENOENT; + + return kref_get_unless_zero(&map->refcount) ? 0 : -ENOENT; } From 6a1594a780e7f2dbdd54510dce6656f4c0b527b9 Mon Sep 17 00:00:00 2001 From: Flavio Suligoi Date: Mon, 19 Dec 2022 13:47:59 +0100 Subject: [PATCH 425/543] usb: core: hub: disable autosuspend for TI TUSB8041 commit 7171b0e261b17de96490adf053b8bb4b00061bcf upstream. The Texas Instruments TUSB8041 has an autosuspend problem at high temperature. If there is not USB traffic, after a couple of ms, the device enters in autosuspend mode. In this condition the external clock stops working, to save energy. When the USB activity turns on, ther hub exits the autosuspend state, the clock starts running again and all works fine. At ambient temperature all works correctly, but at high temperature, when the USB activity turns on, the external clock doesn't restart and the hub disappears from the USB bus. Disabling the autosuspend mode for this hub solves the issue. Signed-off-by: Flavio Suligoi Cc: stable Acked-by: Alan Stern Link: https://lore.kernel.org/r/20221219124759.3207032-1-f.suligoi@asem.it Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index bbab424b0d55..0aaaadb02cc6 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -44,6 +44,9 @@ #define USB_PRODUCT_USB5534B 0x5534 #define USB_VENDOR_CYPRESS 0x04b4 #define USB_PRODUCT_CY7C65632 0x6570 +#define USB_VENDOR_TEXAS_INSTRUMENTS 0x0451 +#define USB_PRODUCT_TUSB8041_USB3 0x8140 +#define USB_PRODUCT_TUSB8041_USB2 0x8142 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 #define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 @@ -5798,6 +5801,16 @@ static const struct usb_device_id hub_id_table[] = { .idVendor = USB_VENDOR_GENESYS_LOGIC, .bInterfaceClass = USB_CLASS_HUB, .driver_info = HUB_QUIRK_CHECK_PORT_AUTOSUSPEND}, + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR + | USB_DEVICE_ID_MATCH_PRODUCT, + .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS, + .idProduct = USB_PRODUCT_TUSB8041_USB2, + .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR + | USB_DEVICE_ID_MATCH_PRODUCT, + .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS, + .idProduct = USB_PRODUCT_TUSB8041_USB3, + .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_DEV_CLASS, .bDeviceClass = USB_CLASS_HUB}, { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS, From 580a2a541273264b085acd840a7c59b8c0627094 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Tue, 3 Jan 2023 14:37:54 +0000 Subject: [PATCH 426/543] comedi: adv_pci1760: Fix PWM instruction handling commit 2efb6edd52dc50273f5e68ad863dd1b1fb2f2d1c upstream. (Actually, this is fixing the "Read the Current Status" command sent to the device's outgoing mailbox, but it is only currently used for the PWM instructions.) The PCI-1760 is operated mostly by sending commands to a set of Outgoing Mailbox registers, waiting for the command to complete, and reading the result from the Incoming Mailbox registers. One of these commands is the "Read the Current Status" command. The number of this command is 0x07 (see the User's Manual for the PCI-1760 at . The `PCI1760_CMD_GET_STATUS` macro defined in the driver should expand to this command number 0x07, but unfortunately it currently expands to 0x03. (Command number 0x03 is not defined in the User's Manual.) Correct the definition of the `PCI1760_CMD_GET_STATUS` macro to fix it. This is used by all the PWM subdevice related instructions handled by `pci1760_pwm_insn_config()` which are probably all broken. The effect of sending the undefined command number 0x03 is not known. Fixes: 14b93bb6bbf0 ("staging: comedi: adv_pci_dio: separate out PCI-1760 support") Cc: # v4.5+ Signed-off-by: Ian Abbott Link: https://lore.kernel.org/r/20230103143754.17564-1-abbotti@mev.co.uk Signed-off-by: Greg Kroah-Hartman --- drivers/comedi/drivers/adv_pci1760.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/comedi/drivers/adv_pci1760.c b/drivers/comedi/drivers/adv_pci1760.c index fcfc2e299110..27f3890f471d 100644 --- a/drivers/comedi/drivers/adv_pci1760.c +++ b/drivers/comedi/drivers/adv_pci1760.c @@ -58,7 +58,7 @@ #define PCI1760_CMD_CLR_IMB2 0x00 /* Clears IMB2 */ #define PCI1760_CMD_SET_DO 0x01 /* Set output state */ #define PCI1760_CMD_GET_DO 0x02 /* Read output status */ -#define PCI1760_CMD_GET_STATUS 0x03 /* Read current status */ +#define PCI1760_CMD_GET_STATUS 0x07 /* Read current status */ #define PCI1760_CMD_GET_FW_VER 0x0e /* Read firmware version */ #define PCI1760_CMD_GET_HW_VER 0x0f /* Read hardware version */ #define PCI1760_CMD_SET_PWM_HI(x) (0x10 + (x) * 2) /* Set "hi" period */ From 7a106e8a1cdb5f6c5d343edbfacbfa97331d8235 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 12 Jan 2023 14:33:19 +0100 Subject: [PATCH 427/543] ACPI: PRM: Check whether EFI runtime is available commit 182da6f2b81a78709c58021542fb694f8ed80774 upstream. The ACPI PRM address space handler calls efi_call_virt_pointer() to execute PRM firmware code, but doing so is only permitted when the EFI runtime environment is available. Otherwise, such calls are guaranteed to result in a crash, and must therefore be avoided. Given that the EFI runtime services may become unavailable after a crash occurring in the firmware, we need to check this each time the PRM address space handler is invoked. If the EFI runtime services were not available at registration time to being with, don't install the address space handler at all. Fixes: cefc7ca46235 ("ACPI: PRM: implement OperationRegion handler for the PlatformRtMechanism subtype") Signed-off-by: Ard Biesheuvel Cc: All applicable Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/prmt.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/acpi/prmt.c b/drivers/acpi/prmt.c index 998101cf16e4..3d4c4620f9f9 100644 --- a/drivers/acpi/prmt.c +++ b/drivers/acpi/prmt.c @@ -236,6 +236,11 @@ static acpi_status acpi_platformrt_space_handler(u32 function, efi_status_t status; struct prm_context_buffer context; + if (!efi_enabled(EFI_RUNTIME_SERVICES)) { + pr_err_ratelimited("PRM: EFI runtime services no longer available\n"); + return AE_NO_HANDLER; + } + /* * The returned acpi_status will always be AE_OK. Error values will be * saved in the first byte of the PRM message buffer to be used by ASL. @@ -325,6 +330,11 @@ void __init init_prmt(void) pr_info("PRM: found %u modules\n", mc); + if (!efi_enabled(EFI_RUNTIME_SERVICES)) { + pr_err("PRM: EFI runtime services unavailable\n"); + return; + } + status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT, ACPI_ADR_SPACE_PLATFORM_RT, &acpi_platformrt_space_handler, From b7465be8904050b9fec4aeaa7602a794d3906ba7 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 9 Aug 2022 21:25:09 -0500 Subject: [PATCH 428/543] mmc: sunxi-mmc: Fix clock refcount imbalance during unbind commit 8509419758f2cc28dd05370385af0d91573b76b4 upstream. If the controller is suspended by runtime PM, the clock is already disabled, so do not try to disable it again during removal. Use pm_runtime_disable() to flush any pending runtime PM transitions. Fixes: 9a8e1e8cc2c0 ("mmc: sunxi: Add runtime_pm support") Signed-off-by: Samuel Holland Acked-by: Jernej Skrabec Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220810022509.43743-1-samuel@sholland.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sunxi-mmc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sunxi-mmc.c b/drivers/mmc/host/sunxi-mmc.c index b16e12e62e72..3db9f32d6a7b 100644 --- a/drivers/mmc/host/sunxi-mmc.c +++ b/drivers/mmc/host/sunxi-mmc.c @@ -1492,9 +1492,11 @@ static int sunxi_mmc_remove(struct platform_device *pdev) struct sunxi_mmc_host *host = mmc_priv(mmc); mmc_remove_host(mmc); - pm_runtime_force_suspend(&pdev->dev); - disable_irq(host->irq); - sunxi_mmc_disable(host); + pm_runtime_disable(&pdev->dev); + if (!pm_runtime_status_suspended(&pdev->dev)) { + disable_irq(host->irq); + sunxi_mmc_disable(host); + } dma_free_coherent(&pdev->dev, PAGE_SIZE, host->sg_cpu, host->sg_dma); mmc_free_host(mmc); From 33b161041471cc273c970cd6e9f318087af7a9ee Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 7 Dec 2022 19:23:15 +0800 Subject: [PATCH 429/543] mmc: sdhci-esdhc-imx: correct the tuning start tap and step setting commit 1e336aa0c0250ec84c6f16efac40c9f0138e367d upstream. Current code logic may be impacted by the setting of ROM/Bootloader, so unmask these bits first, then setting these bits accordingly. Fixes: 2b16cf326b70 ("mmc: sdhci-esdhc-imx: move tuning static configuration into hwinit function") Signed-off-by: Haibo Chen Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221207112315.1812222-1-haibo.chen@nxp.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index ffeb5759830f..8c62c3fba75e 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -107,6 +107,7 @@ #define ESDHC_TUNING_START_TAP_DEFAULT 0x1 #define ESDHC_TUNING_START_TAP_MASK 0x7f #define ESDHC_TUNING_CMD_CRC_CHECK_DISABLE (1 << 7) +#define ESDHC_TUNING_STEP_DEFAULT 0x1 #define ESDHC_TUNING_STEP_MASK 0x00070000 #define ESDHC_TUNING_STEP_SHIFT 16 @@ -1361,7 +1362,7 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host); struct cqhci_host *cq_host = host->mmc->cqe_private; - int tmp; + u32 tmp; if (esdhc_is_usdhc(imx_data)) { /* @@ -1416,17 +1417,24 @@ static void sdhci_esdhc_imx_hwinit(struct sdhci_host *host) if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) { tmp = readl(host->ioaddr + ESDHC_TUNING_CTRL); - tmp |= ESDHC_STD_TUNING_EN | - ESDHC_TUNING_START_TAP_DEFAULT; - if (imx_data->boarddata.tuning_start_tap) { - tmp &= ~ESDHC_TUNING_START_TAP_MASK; + tmp |= ESDHC_STD_TUNING_EN; + + /* + * ROM code or bootloader may config the start tap + * and step, unmask them first. + */ + tmp &= ~(ESDHC_TUNING_START_TAP_MASK | ESDHC_TUNING_STEP_MASK); + if (imx_data->boarddata.tuning_start_tap) tmp |= imx_data->boarddata.tuning_start_tap; - } + else + tmp |= ESDHC_TUNING_START_TAP_DEFAULT; if (imx_data->boarddata.tuning_step) { - tmp &= ~ESDHC_TUNING_STEP_MASK; tmp |= imx_data->boarddata.tuning_step << ESDHC_TUNING_STEP_SHIFT; + } else { + tmp |= ESDHC_TUNING_STEP_DEFAULT + << ESDHC_TUNING_STEP_SHIFT; } /* Disable the CMD CRC check for tuning, if not, need to From 6062c992e912df1eedad52cf64efb3d48e8d35c5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 22 Dec 2022 21:55:10 +0100 Subject: [PATCH 430/543] mm/hugetlb: fix PTE marker handling in hugetlb_change_protection() commit 0e678153f5be7e6c8d28835f5a678618da4b7a9c upstream. Patch series "mm/hugetlb: uffd-wp fixes for hugetlb_change_protection()". Playing with virtio-mem and background snapshots (using uffd-wp) on hugetlb in QEMU, I managed to trigger a VM_BUG_ON(). Looking into the details, hugetlb_change_protection() seems to not handle uffd-wp correctly in all cases. Patch #1 fixes my test case. I don't have reproducers for patch #2, as it requires running into migration entries. I did not yet check in detail yet if !hugetlb code requires similar care. This patch (of 2): There are two problematic cases when stumbling over a PTE marker in hugetlb_change_protection(): (1) We protect an uffd-wp PTE marker a second time using uffd-wp: we will end up in the "!huge_pte_none(pte)" case and mess up the PTE marker. (2) We unprotect a uffd-wp PTE marker: we will similarly end up in the "!huge_pte_none(pte)" case even though we cleared the PTE, because the "pte" variable is stale. We'll mess up the PTE marker. For example, if we later stumble over such a "wrongly modified" PTE marker, we'll treat it like a present PTE that maps some garbage page. This can, for example, be triggered by mapping a memfd backed by huge pages, registering uffd-wp, uffd-wp'ing an unmapped page and (a) uffd-wp'ing it a second time; or (b) uffd-unprotecting it; or (c) unregistering uffd-wp. Then, ff we trigger fallocate(FALLOC_FL_PUNCH_HOLE) on that file range, we will run into a VM_BUG_ON: [ 195.039560] page:00000000ba1f2987 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x0 [ 195.039565] flags: 0x7ffffc0001000(reserved|node=0|zone=0|lastcpupid=0x1fffff) [ 195.039568] raw: 0007ffffc0001000 ffffe742c0000008 ffffe742c0000008 0000000000000000 [ 195.039569] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 [ 195.039569] page dumped because: VM_BUG_ON_PAGE(compound && !PageHead(page)) [ 195.039573] ------------[ cut here ]------------ [ 195.039574] kernel BUG at mm/rmap.c:1346! [ 195.039579] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 195.039581] CPU: 7 PID: 4777 Comm: qemu-system-x86 Not tainted 6.0.12-200.fc36.x86_64 #1 [ 195.039583] Hardware name: LENOVO 20WNS1F81N/20WNS1F81N, BIOS N35ET50W (1.50 ) 09/15/2022 [ 195.039584] RIP: 0010:page_remove_rmap+0x45b/0x550 [ 195.039588] Code: [...] [ 195.039589] RSP: 0018:ffffbc03c3633ba8 EFLAGS: 00010292 [ 195.039591] RAX: 0000000000000040 RBX: ffffe742c0000000 RCX: 0000000000000000 [ 195.039592] RDX: 0000000000000002 RSI: ffffffff8e7aac1a RDI: 00000000ffffffff [ 195.039592] RBP: 0000000000000001 R08: 0000000000000000 R09: ffffbc03c3633a08 [ 195.039593] R10: 0000000000000003 R11: ffffffff8f146328 R12: ffff9b04c42754b0 [ 195.039594] R13: ffffffff8fcc6328 R14: ffffbc03c3633c80 R15: ffff9b0484ab9100 [ 195.039595] FS: 00007fc7aaf68640(0000) GS:ffff9b0bbf7c0000(0000) knlGS:0000000000000000 [ 195.039596] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 195.039597] CR2: 000055d402c49110 CR3: 0000000159392003 CR4: 0000000000772ee0 [ 195.039598] PKRU: 55555554 [ 195.039599] Call Trace: [ 195.039600] [ 195.039602] __unmap_hugepage_range+0x33b/0x7d0 [ 195.039605] unmap_hugepage_range+0x55/0x70 [ 195.039608] hugetlb_vmdelete_list+0x77/0xa0 [ 195.039611] hugetlbfs_fallocate+0x410/0x550 [ 195.039612] ? _raw_spin_unlock_irqrestore+0x23/0x40 [ 195.039616] vfs_fallocate+0x12e/0x360 [ 195.039618] __x64_sys_fallocate+0x40/0x70 [ 195.039620] do_syscall_64+0x58/0x80 [ 195.039623] ? syscall_exit_to_user_mode+0x17/0x40 [ 195.039624] ? do_syscall_64+0x67/0x80 [ 195.039626] entry_SYSCALL_64_after_hwframe+0x63/0xcd [ 195.039628] RIP: 0033:0x7fc7b590651f [ 195.039653] Code: [...] [ 195.039654] RSP: 002b:00007fc7aaf66e70 EFLAGS: 00000293 ORIG_RAX: 000000000000011d [ 195.039655] RAX: ffffffffffffffda RBX: 0000558ef4b7f370 RCX: 00007fc7b590651f [ 195.039656] RDX: 0000000018000000 RSI: 0000000000000003 RDI: 000000000000000c [ 195.039657] RBP: 0000000008000000 R08: 0000000000000000 R09: 0000000000000073 [ 195.039658] R10: 0000000008000000 R11: 0000000000000293 R12: 0000000018000000 [ 195.039658] R13: 00007fb8bbe00000 R14: 000000000000000c R15: 0000000000001000 [ 195.039661] Fix it by not going into the "!huge_pte_none(pte)" case if we stumble over an exclusive marker. spin_unlock() + continue would get the job done. However, instead, make it clearer that there are no fall-through statements: we process each case (hwpoison, migration, marker, !none, none) and then unlock the page table to continue with the next PTE. Let's avoid "continue" statements and use a single spin_unlock() at the end. Link: https://lkml.kernel.org/r/20221222205511.675832-1-david@redhat.com Link: https://lkml.kernel.org/r/20221222205511.675832-2-david@redhat.com Fixes: 60dfaad65aa9 ("mm/hugetlb: allow uffd wr-protect none ptes") Signed-off-by: David Hildenbrand Reviewed-by: Peter Xu Reviewed-by: Mike Kravetz Cc: Miaohe Lin Cc: Muchun Song Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e706086023ee..cc766ddd24c7 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6623,10 +6623,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, } pte = huge_ptep_get(ptep); if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { - spin_unlock(ptl); - continue; - } - if (unlikely(is_hugetlb_entry_migration(pte))) { + /* Nothing to do. */ + } else if (unlikely(is_hugetlb_entry_migration(pte))) { swp_entry_t entry = pte_to_swp_entry(pte); struct page *page = pfn_swap_entry_to_page(entry); @@ -6647,18 +6645,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, set_huge_pte_at(mm, address, ptep, newpte); pages++; } - spin_unlock(ptl); - continue; - } - if (unlikely(pte_marker_uffd_wp(pte))) { - /* - * This is changing a non-present pte into a none pte, - * no need for huge_ptep_modify_prot_start/commit(). - */ + } else if (unlikely(is_pte_marker(pte))) { + /* No other markers apply for now. */ + WARN_ON_ONCE(!pte_marker_uffd_wp(pte)); if (uffd_wp_resolve) + /* Safe to modify directly (non-present->none). */ huge_pte_clear(mm, address, ptep, psize); - } - if (!huge_pte_none(pte)) { + } else if (!huge_pte_none(pte)) { pte_t old_pte; unsigned int shift = huge_page_shift(hstate_vma(vma)); From 8d6a675cd744c1af2688778fba2404fdf9737ddd Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 22 Dec 2022 21:55:11 +0100 Subject: [PATCH 431/543] mm/hugetlb: fix uffd-wp handling for migration entries in hugetlb_change_protection() commit 44f86392bdd165da7e43d3c772aeb1e128ffd6c8 upstream. We have to update the uffd-wp SWP PTE bit independent of the type of migration entry. Currently, if we're unlucky and we want to install/clear the uffd-wp bit just while we're migrating a read-only mapped hugetlb page, we would miss to set/clear the uffd-wp bit. Further, if we're processing a readable-exclusive migration entry and neither want to set or clear the uffd-wp bit, we could currently end up losing the uffd-wp bit. Note that the same would hold for writable migrating entries, however, having a writable migration entry with the uffd-wp bit set would already mean that something went wrong. Note that the change from !is_readable_migration_entry -> writable_migration_entry is harmless and actually cleaner, as raised by Miaohe Lin and discussed in [1]. [1] https://lkml.kernel.org/r/90dd6a93-4500-e0de-2bf0-bf522c311b0c@huawei.com Link: https://lkml.kernel.org/r/20221222205511.675832-3-david@redhat.com Fixes: 60dfaad65aa9 ("mm/hugetlb: allow uffd wr-protect none ptes") Signed-off-by: David Hildenbrand Reviewed-by: Mike Kravetz Cc: Miaohe Lin Cc: Muchun Song Cc: Peter Xu Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cc766ddd24c7..3fc5a214f0b9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6627,10 +6627,9 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, } else if (unlikely(is_hugetlb_entry_migration(pte))) { swp_entry_t entry = pte_to_swp_entry(pte); struct page *page = pfn_swap_entry_to_page(entry); + pte_t newpte = pte; - if (!is_readable_migration_entry(entry)) { - pte_t newpte; - + if (is_writable_migration_entry(entry)) { if (PageAnon(page)) entry = make_readable_exclusive_migration_entry( swp_offset(entry)); @@ -6638,13 +6637,15 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, entry = make_readable_migration_entry( swp_offset(entry)); newpte = swp_entry_to_pte(entry); - if (uffd_wp) - newpte = pte_swp_mkuffd_wp(newpte); - else if (uffd_wp_resolve) - newpte = pte_swp_clear_uffd_wp(newpte); - set_huge_pte_at(mm, address, ptep, newpte); pages++; } + + if (uffd_wp) + newpte = pte_swp_mkuffd_wp(newpte); + else if (uffd_wp_resolve) + newpte = pte_swp_clear_uffd_wp(newpte); + if (!pte_same(pte, newpte)) + set_huge_pte_at(mm, address, ptep, newpte); } else if (unlikely(is_pte_marker(pte))) { /* No other markers apply for now. */ WARN_ON_ONCE(!pte_marker_uffd_wp(pte)); From 3b8ede66658cd83db17a370152535e464786c14e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 4 Jan 2023 17:52:05 -0500 Subject: [PATCH 432/543] mm/hugetlb: pre-allocate pgtable pages for uffd wr-protects commit fed15f1345dc8a7fc8baa81e8b55c3ba010d7f4b upstream. Userfaultfd-wp uses pte markers to mark wr-protected pages for both shmem and hugetlb. Shmem has pre-allocation ready for markers, but hugetlb path was overlooked. Doing so by calling huge_pte_alloc() if the initial pgtable walk fails to find the huge ptep. It's possible that huge_pte_alloc() can fail with high memory pressure, in that case stop the loop immediately and fail silently. This is not the most ideal solution but it matches with what we do with shmem meanwhile it avoids the splat in dmesg. Link: https://lkml.kernel.org/r/20230104225207.1066932-2-peterx@redhat.com Fixes: 60dfaad65aa9 ("mm/hugetlb: allow uffd wr-protect none ptes") Signed-off-by: Peter Xu Reported-by: James Houghton Reviewed-by: Mike Kravetz Acked-by: David Hildenbrand Acked-by: James Houghton Cc: Andrea Arcangeli Cc: Axel Rasmussen Cc: Muchun Song Cc: Nadav Amit Cc: [5.19+] Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/hugetlb.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3fc5a214f0b9..52475c4262e4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6604,8 +6604,17 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, spinlock_t *ptl; ptep = huge_pte_offset(mm, address, psize); if (!ptep) { - address |= last_addr_mask; - continue; + if (!uffd_wp) { + address |= last_addr_mask; + continue; + } + /* + * Userfaultfd wr-protect requires pgtable + * pre-allocations to install pte markers. + */ + ptep = huge_pte_alloc(mm, vma, address, psize); + if (!ptep) + break; } ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, vma, address, ptep)) { From bcde505af166147f542634384640a4fc95be12fe Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 9 Dec 2022 09:09:12 +0100 Subject: [PATCH 433/543] mm/userfaultfd: enable writenotify while userfaultfd-wp is enabled for a VMA commit 51d3d5eb74ff53b92dcff48b30ae2ed8edd85a32 upstream. Currently, we don't enable writenotify when enabling userfaultfd-wp on a shared writable mapping (for now only shmem and hugetlb). The consequence is that vma->vm_page_prot will still include write permissions, to be set as default for all PTEs that get remapped (e.g., mprotect(), NUMA hinting, page migration, ...). So far, vma->vm_page_prot is assumed to be a safe default, meaning that we only add permissions (e.g., mkwrite) but not remove permissions (e.g., wrprotect). For example, when enabling softdirty tracking, we enable writenotify. With uffd-wp on shared mappings, that changed. More details on vma->vm_page_prot semantics were summarized in [1]. This is problematic for uffd-wp: we'd have to manually check for a uffd-wp PTEs/PMDs and manually write-protect PTEs/PMDs, which is error prone. Prone to such issues is any code that uses vma->vm_page_prot to set PTE permissions: primarily pte_modify() and mk_pte(). Instead, let's enable writenotify such that PTEs/PMDs/... will be mapped write-protected as default and we will only allow selected PTEs that are definitely safe to be mapped without write-protection (see can_change_pte_writable()) to be writable. In the future, we might want to enable write-bit recovery -- e.g., can_change_pte_writable() -- at more locations, for example, also when removing uffd-wp protection. This fixes two known cases: (a) remove_migration_pte() mapping uffd-wp'ed PTEs writable, resulting in uffd-wp not triggering on write access. (b) do_numa_page() / do_huge_pmd_numa_page() mapping uffd-wp'ed PTEs/PMDs writable, resulting in uffd-wp not triggering on write access. Note that do_numa_page() / do_huge_pmd_numa_page() can be reached even without NUMA hinting (which currently doesn't seem to be applicable to shmem), for example, by using uffd-wp with a PROT_WRITE shmem VMA. On such a VMA, userfaultfd-wp is currently non-functional. Note that when enabling userfaultfd-wp, there is no need to walk page tables to enforce the new default protection for the PTEs: we know that they cannot be uffd-wp'ed yet, because that can only happen after enabling uffd-wp for the VMA in general. Also note that this makes mprotect() on ranges with uffd-wp'ed PTEs not accidentally set the write bit -- which would result in uffd-wp not triggering on later write access. This commit makes uffd-wp on shmem behave just like uffd-wp on anonymous memory in that regard, even though, mixing mprotect with uffd-wp is controversial. [1] https://lkml.kernel.org/r/92173bad-caa3-6b43-9d1e-9a471fdbc184@redhat.com Link: https://lkml.kernel.org/r/20221209080912.7968-1-david@redhat.com Fixes: b1f9e876862d ("mm/uffd: enable write protection for shmem & hugetlbfs") Signed-off-by: David Hildenbrand Reported-by: Ives van Hoorne Debugged-by: Peter Xu Acked-by: Peter Xu Cc: Hugh Dickins Cc: Alistair Popple Cc: Mike Rapoport Cc: Nadav Amit Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/userfaultfd.c | 28 ++++++++++++++++++++++------ mm/mmap.c | 4 ++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 98ac37e34e3d..cc694846617a 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -108,6 +108,21 @@ static bool userfaultfd_is_initialized(struct userfaultfd_ctx *ctx) return ctx->features & UFFD_FEATURE_INITIALIZED; } +static void userfaultfd_set_vm_flags(struct vm_area_struct *vma, + vm_flags_t flags) +{ + const bool uffd_wp_changed = (vma->vm_flags ^ flags) & VM_UFFD_WP; + + vma->vm_flags = flags; + /* + * For shared mappings, we want to enable writenotify while + * userfaultfd-wp is enabled (see vma_wants_writenotify()). We'll simply + * recalculate vma->vm_page_prot whenever userfaultfd-wp changes. + */ + if ((vma->vm_flags & VM_SHARED) && uffd_wp_changed) + vma_set_page_prot(vma); +} + static int userfaultfd_wake_function(wait_queue_entry_t *wq, unsigned mode, int wake_flags, void *key) { @@ -618,7 +633,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, for_each_vma(vmi, vma) { if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, + vma->vm_flags & ~__VM_UFFD_FLAGS); } } mmap_write_unlock(mm); @@ -652,7 +668,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) octx = vma->vm_userfaultfd_ctx.ctx; if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) { vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); return 0; } @@ -733,7 +749,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, } else { /* Drop uffd context if remap feature not enabled */ vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; - vma->vm_flags &= ~__VM_UFFD_FLAGS; + userfaultfd_set_vm_flags(vma, vma->vm_flags & ~__VM_UFFD_FLAGS); } } @@ -895,7 +911,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) prev = vma; } - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; } mmap_write_unlock(mm); @@ -1463,7 +1479,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, * the next vma was merged into the current one and * the current one has not been updated yet. */ - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx.ctx = ctx; if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma)) @@ -1651,7 +1667,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, * the next vma was merged into the current one and * the current one has not been updated yet. */ - vma->vm_flags = new_flags; + userfaultfd_set_vm_flags(vma, new_flags); vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; skip: diff --git a/mm/mmap.c b/mm/mmap.c index 54abd46e6007..177714886849 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1524,6 +1524,10 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) if (vma_soft_dirty_enabled(vma) && !is_vm_hugetlb_page(vma)) return 1; + /* Do we need write faults for uffd-wp tracking? */ + if (userfaultfd_wp(vma)) + return 1; + /* Specialty mapping? */ if (vm_flags & VM_PFNMAP) return 0; From f2f52dd4f50f3d560b88692d94cd5829070b29ee Mon Sep 17 00:00:00 2001 From: Zach O'Keefe Date: Sat, 24 Dec 2022 00:20:34 -0800 Subject: [PATCH 434/543] mm/MADV_COLLAPSE: don't expand collapse when vm_end is past requested end commit 52dc031088f00e323140ece4004e70c33153c6dd upstream. MADV_COLLAPSE acts on one hugepage-aligned/sized region at a time, until it has collapsed all eligible memory contained within the bounds supplied by the user. At the top of each hugepage iteration we (re)lock mmap_lock and (re)validate the VMA for eligibility and update variables that might have changed while mmap_lock was dropped. One thing that might occur is that the VMA could be resized, and as such, we refetch vma->vm_end to make sure we don't collapse past the end of the VMA's new end. However, it's possible that when refetching vma->vm_end that we expand the region acted on by MADV_COLLAPSE if vma->vm_end is greater than size+len supplied by the user. The consequence here is that we may attempt to collapse more memory than requested, possibly yielding either "too much success" or "false failure" user-visible results. An example of the former is if we MADV_COLLAPSE the first 4MiB of a 2TiB mmap()'d file, the incorrect refetch would cause the operation to block for much longer than anticipated as we attempt to collapse the entire TiB region. An example of the latter is that applying MADV_COLLPSE to a 4MiB file mapped to the start of a 6MiB VMA will successfully collapse the first 4MiB, then incorrectly attempt to collapse the last hugepage-aligned/sized region -- fail (since readahead/page cache lookup will fail) -- and report a failure to the user. I don't believe there is a kernel stability concern here as we always (re)validate the VMA / region accordingly. Also as Hugh mentions, the user-visible effects are: we try to collapse more memory than requested by the user, and/or failing an operation that should have otherwise succeeded. An example is trying to collapse a 4MiB file contained within a 12MiB VMA. Don't expand the acted-on region when refetching vma->vm_end. Link: https://lkml.kernel.org/r/20221224082035.3197140-1-zokeefe@google.com Fixes: 4d24de9425f7 ("mm: MADV_COLLAPSE: refetch vm_end after reacquiring mmap_lock") Signed-off-by: Zach O'Keefe Reported-by: Hugh Dickins Cc: Yang Shi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/khugepaged.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index bb1d79a14feb..c982b250aa31 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2644,7 +2644,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, goto out_nolock; } - hend = vma->vm_end & HPAGE_PMD_MASK; + hend = min(hend, vma->vm_end & HPAGE_PMD_MASK); } mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); From 6ec8411329941a568cfcd05033eec5b329a2564b Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 12 Dec 2022 10:19:37 +0800 Subject: [PATCH 435/543] btrfs: add extra error messages to cover non-ENOMEM errors from device_add_list() commit ed02363fbbed52a3f5ea0d188edd09045a806eb5 upstream. [BUG] When test case btrfs/219 (aka, mount a registered device but with a lower generation) failed, there is not any useful information for the end user to find out what's going wrong. The mount failure just looks like this: # mount -o loop /tmp/219.img2 /mnt/btrfs/ mount: /mnt/btrfs: mount(2) system call failed: File exists. dmesg(1) may have more information after failed mount system call. While the dmesg contains nothing but the loop device change: loop1: detected capacity change from 0 to 524288 [CAUSE] In device_list_add() we have a lot of extra checks to reject invalid cases. That function also contains the regular device scan result like the following prompt: BTRFS: device fsid 6222333e-f9f1-47e6-b306-55ddd4dcaef4 devid 1 transid 8 /dev/loop0 scanned by systemd-udevd (3027) But unfortunately not all errors have their own error messages, thus if we hit something wrong in device_add_list(), there may be no error messages at all. [FIX] Add errors message for all non-ENOMEM errors. For ENOMEM, I'd say we're in a much worse situation, and there should be some OOM messages way before our call sites. CC: stable@vger.kernel.org # 6.0+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dba087ad40ea..65e4e887605f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -770,8 +770,11 @@ static noinline struct btrfs_device *device_list_add(const char *path, BTRFS_SUPER_FLAG_CHANGING_FSID_V2); error = lookup_bdev(path, &path_devt); - if (error) + if (error) { + btrfs_err(NULL, "failed to lookup block device for path %s: %d", + path, error); return ERR_PTR(error); + } if (fsid_change_in_progress) { if (!has_metadata_uuid) @@ -836,6 +839,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, if (!device) { if (fs_devices->opened) { + btrfs_err(NULL, + "device %s belongs to fsid %pU, and the fs is already mounted", + path, fs_devices->fsid); mutex_unlock(&fs_devices->device_list_mutex); return ERR_PTR(-EBUSY); } @@ -910,6 +916,9 @@ static noinline struct btrfs_device *device_list_add(const char *path, * generation are equal. */ mutex_unlock(&fs_devices->device_list_mutex); + btrfs_err(NULL, +"device %s already registered with a higher generation, found %llu expect %llu", + path, found_transid, device->generation); return ERR_PTR(-EEXIST); } From 168492decbb6155f890f16373493d3d807af271a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:34 +0000 Subject: [PATCH 436/543] btrfs: fix missing error handling when logging directory items commit 6d3d970b2735b967650d319be27268fedc5598d1 upstream. When logging a directory, at log_dir_items(), if we get an error when attempting to search the subvolume tree for a dir index item, we end up returning 0 (success) from log_dir_items() because 'err' is left with a value of 0. This can lead to a few problems, specially in the case the variable 'last_offset' has a value of (u64)-1 (and it's initialized to that when it was declared): 1) By returning from log_dir_items() with success (0) and a value of (u64)-1 for '*last_offset_ret', we end up not logging any other dir index keys that follow the missing, just deleted, index key. The (u64)-1 value makes log_directory_changes() not call log_dir_items() again; 2) Before returning with success (0), log_dir_items(), will log a dir index range item covering a range from the last old dentry index (stored in the variable 'last_old_dentry_offset') to the value of 'last_offset'. If 'last_offset' has a value of (u64)-1, then it means if the log is persisted and replayed after a power failure, it will cause deletion of all the directory entries that have an index number between last_old_dentry_offset + 1 and (u64)-1; 3) We can end up returning from log_dir_items() with ctx->last_dir_item_offset having a lower value than inode->last_dir_index_offset, because the former is set to the current key we are processing at process_dir_items_leaf(), and at the end of log_directory_changes() we set inode->last_dir_index_offset to the current value of ctx->last_dir_item_offset. So if for example a deletion of a lower dir index key happened, we set ctx->last_dir_item_offset to that index value, then if we return from log_dir_items() because btrfs_search_slot() returned an error, we end up returning without any error from log_dir_items() and then log_directory_changes() sets inode->last_dir_index_offset to a lower value than it had before. This can result in unpredictable and unexpected behaviour when we need to log again the directory in the same transaction, and can result in ending up with a log tree leaf that has duplicated keys, as we do batch insertions of dir index keys into a log tree. Fix this by setting 'err' to the value of 'ret' in case btrfs_search_slot() or btrfs_previous_item() returned an error. That will result in falling back to a full transaction commit. Reported-by: David Arendt Link: https://lore.kernel.org/linux-btrfs/ae169fc6-f504-28f0-a098-6fa6a4dfb612@leemhuis.info/ Fixes: e02119d5a7b4 ("Btrfs: Add a write ahead tree log to optimize synchronous operations") CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c3cf3dabe0b1..bd545713f275 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3857,7 +3857,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, path->slots[0]); if (tmp.type == BTRFS_DIR_INDEX_KEY) last_old_dentry_offset = tmp.offset; + } else if (ret < 0) { + err = ret; } + goto done; } @@ -3877,7 +3880,11 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, */ if (tmp.type == BTRFS_DIR_INDEX_KEY) last_old_dentry_offset = tmp.offset; + } else if (ret < 0) { + err = ret; + goto done; } + btrfs_release_path(path); /* @@ -3890,6 +3897,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, */ search: ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret < 0) + err = ret; if (ret != 0) goto done; From f4c0df59e01a92dee02023e23b9438ae7f3d8024 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:35 +0000 Subject: [PATCH 437/543] btrfs: fix directory logging due to race with concurrent index key deletion commit 8bb6898da6271d82d8e76d8088d66b971a7dcfa6 upstream. Sometimes we log a directory without holding its VFS lock, so while we logging it, dir index entries may be added or removed. This typically happens when logging a dentry from a parent directory that points to a new directory, through log_new_dir_dentries(), or when while logging some other inode we also need to log its parent directories (through btrfs_log_all_parents()). This means that while we are at log_dir_items(), we may not find a dir index key we found before, because it was deleted in the meanwhile, so a call to btrfs_search_slot() may return 1 (key not found). In that case we return from log_dir_items() with a success value (the variable 'err' has a value of 0). This can lead to a few problems, specially in the case where the variable 'last_offset' has a value of (u64)-1 (and it's initialized to that when it was declared): 1) By returning from log_dir_items() with success (0) and a value of (u64)-1 for '*last_offset_ret', we end up not logging any other dir index keys that follow the missing, just deleted, index key. The (u64)-1 value makes log_directory_changes() not call log_dir_items() again; 2) Before returning with success (0), log_dir_items(), will log a dir index range item covering a range from the last old dentry index (stored in the variable 'last_old_dentry_offset') to the value of 'last_offset'. If 'last_offset' has a value of (u64)-1, then it means if the log is persisted and replayed after a power failure, it will cause deletion of all the directory entries that have an index number between last_old_dentry_offset + 1 and (u64)-1; 3) We can end up returning from log_dir_items() with ctx->last_dir_item_offset having a lower value than inode->last_dir_index_offset, because the former is set to the current key we are processing at process_dir_items_leaf(), and at the end of log_directory_changes() we set inode->last_dir_index_offset to the current value of ctx->last_dir_item_offset. So if for example a deletion of a lower dir index key happened, we set ctx->last_dir_item_offset to that index value, then if we return from log_dir_items() because btrfs_search_slot() returned 1, we end up returning from log_dir_items() with success (0) and then log_directory_changes() sets inode->last_dir_index_offset to a lower value than it had before. This can result in unpredictable and unexpected behaviour when we need to log again the directory in the same transaction, and can result in ending up with a log tree leaf that has duplicated keys, as we do batch insertions of dir index keys into a log tree. So fix this by making log_dir_items() move on to the next dir index key if it does not find the one it was looking for. Reported-by: David Arendt Link: https://lore.kernel.org/linux-btrfs/ae169fc6-f504-28f0-a098-6fa6a4dfb612@leemhuis.info/ CC: stable@vger.kernel.org # 4.14+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index bd545713f275..9da5cc3b6546 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3888,17 +3888,26 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, btrfs_release_path(path); /* - * Find the first key from this transaction again. See the note for - * log_new_dir_dentries, if we're logging a directory recursively we - * won't be holding its i_mutex, which means we can modify the directory - * while we're logging it. If we remove an entry between our first - * search and this search we'll not find the key again and can just - * bail. + * Find the first key from this transaction again or the one we were at + * in the loop below in case we had to reschedule. We may be logging the + * directory without holding its VFS lock, which happen when logging new + * dentries (through log_new_dir_dentries()) or in some cases when we + * need to log the parent directory of an inode. This means a dir index + * key might be deleted from the inode's root, and therefore we may not + * find it anymore. If we can't find it, just move to the next key. We + * can not bail out and ignore, because if we do that we will simply + * not log dir index keys that come after the one that was just deleted + * and we can end up logging a dir index range that ends at (u64)-1 + * (@last_offset is initialized to that), resulting in removing dir + * entries we should not remove at log replay time. */ search: ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); + if (ret > 0) + ret = btrfs_next_item(root, path); if (ret < 0) err = ret; + /* If ret is 1, there are no more keys in the inode's root. */ if (ret != 0) goto done; From 076fb040d44761400e68ecd0e359c6fe525ec3be Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:36 +0000 Subject: [PATCH 438/543] btrfs: add missing setup of log for full commit at add_conflicting_inode() commit 94cd63ae679973edeb5ea95ec25a54467c3e54c8 upstream. When logging conflicting inodes, if we reach the maximum limit of inodes, we return BTRFS_LOG_FORCE_COMMIT to force a transaction commit. However we don't mark the log for full commit (with btrfs_set_log_full_commit()), which means that once we leave the log transaction and before we commit the transaction, some other task may sync the log, which is incomplete as we have not logged all conflicting inodes, leading to some inconsistent in case that log ends up being replayed. So also call btrfs_set_log_full_commit() at add_conflicting_inode(). Fixes: e09d94c9e448 ("btrfs: log conflicting inodes without holding log mutex of the initial inode") CC: stable@vger.kernel.org # 6.1 Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9da5cc3b6546..d3fa156de309 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5626,8 +5626,10 @@ static int add_conflicting_inode(struct btrfs_trans_handle *trans, * LOG_INODE_EXISTS mode) and slow down other fsyncs or transaction * commits. */ - if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) + if (ctx->num_conflict_inodes >= MAX_CONFLICT_INODES) { + btrfs_set_log_full_commit(trans); return BTRFS_LOG_FORCE_COMMIT; + } inode = btrfs_iget(root->fs_info->sb, ino, root); /* From 23ffd7fc231aa812c48b91e88bb46f96d2edb2f3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:37 +0000 Subject: [PATCH 439/543] btrfs: do not abort transaction on failure to write log tree when syncing log commit 16199ad9eb6db60a6b10794a09fc1ac6d09312ff upstream. When syncing the log, if we fail to write log tree extent buffers, we mark the log for a full commit and abort the transaction. However we don't need to abort the transaction, all we really need to do is to make sure no one can commit a superblock pointing to new log tree roots. Just because we got a failure writing extent buffers for a log tree, it does not mean we will also fail to do a transaction commit. One particular case is if due to a bug somewhere, when writing log tree extent buffers, the tree checker detects some corruption and the writeout fails because of that. Aborting the transaction can be very disruptive for a user, specially if the issue happened on a root filesystem. One example is the scenario in the Link tag below, where an isolated corruption on log tree leaves was causing transaction aborts when syncing the log. Link: https://lore.kernel.org/linux-btrfs/ae169fc6-f504-28f0-a098-6fa6a4dfb612@leemhuis.info/ CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/disk-io.c | 9 ++++++++- fs/btrfs/tree-log.c | 2 -- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6538f52262ca..883a3671a977 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -344,7 +344,14 @@ error: btrfs_print_tree(eb, 0); btrfs_err(fs_info, "block=%llu write time tree block corruption detected", eb->start); - WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); + /* + * Be noisy if this is an extent buffer from a log tree. We don't abort + * a transaction in case there's a bad log tree extent buffer, we just + * fallback to a transaction commit. Still we want to know when there is + * a bad log tree extent buffer, as that may signal a bug somewhere. + */ + WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) || + btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID); return ret; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d3fa156de309..6efd9e05fc5a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3011,7 +3011,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, ret = 0; if (ret) { blk_finish_plug(&plug); - btrfs_abort_transaction(trans, ret); btrfs_set_log_full_commit(trans); mutex_unlock(&root->log_mutex); goto out; @@ -3143,7 +3142,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, goto out_wake_log_root; } else if (ret) { btrfs_set_log_full_commit(trans); - btrfs_abort_transaction(trans, ret); mutex_unlock(&log_root_tree->log_mutex); goto out_wake_log_root; } From 34fe5b527e68c7882def3ea4c590034c5b6cb106 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Jan 2023 14:56:38 +0000 Subject: [PATCH 440/543] btrfs: do not abort transaction on failure to update log root commit 09e44868f1e03c7825ca4283256abedc95e249a3 upstream. When syncing a log, if we fail to update a log root in the log root tree, we are aborting the transaction if the failure was not -ENOSPC. This is excessive because there is a chance that a transaction commit can succeed, and therefore avoid to turn the filesystem into RO mode. All we need to be careful about is to mark the log for a full commit, which we already do, to make sure no one commits a super block pointing to an outdated log root tree. So don't abort the transaction if we fail to update a log root in the log root tree, and log an error if the failure is not -ENOSPC, so that it does not go completely unnoticed. CC: stable@vger.kernel.org # 6.0+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tree-log.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6efd9e05fc5a..7535857f4c8f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3075,15 +3075,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, blk_finish_plug(&plug); btrfs_set_log_full_commit(trans); - - if (ret != -ENOSPC) { - btrfs_abort_transaction(trans, ret); - mutex_unlock(&log_root_tree->log_mutex); - goto out; - } + if (ret != -ENOSPC) + btrfs_err(fs_info, + "failed to update log for root %llu ret %d", + root->root_key.objectid, ret); btrfs_wait_tree_log_extents(log, mark); mutex_unlock(&log_root_tree->log_mutex); - ret = BTRFS_LOG_FORCE_COMMIT; goto out; } From bb2c2e62539f2b63c5e0beb51501d328260c7595 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 10 Jan 2023 15:14:17 +0800 Subject: [PATCH 441/543] btrfs: qgroup: do not warn on record without old_roots populated commit 75181406b4eafacc531ff2ee5fb032bd93317e2b upstream. [BUG] There are some reports from the mailing list that since v6.1 kernel, the WARN_ON() inside btrfs_qgroup_account_extent() gets triggered during rescan: WARNING: CPU: 3 PID: 6424 at fs/btrfs/qgroup.c:2756 btrfs_qgroup_account_extents+0x1ae/0x260 [btrfs] CPU: 3 PID: 6424 Comm: snapperd Tainted: P OE 6.1.2-1-default #1 openSUSE Tumbleweed 05c7a1b1b61d5627475528f71f50444637b5aad7 RIP: 0010:btrfs_qgroup_account_extents+0x1ae/0x260 [btrfs] Call Trace: btrfs_commit_transaction+0x30c/0xb40 [btrfs c39c9c546c241c593f03bd6d5f39ea1b676250f6] ? start_transaction+0xc3/0x5b0 [btrfs c39c9c546c241c593f03bd6d5f39ea1b676250f6] btrfs_qgroup_rescan+0x42/0xc0 [btrfs c39c9c546c241c593f03bd6d5f39ea1b676250f6] btrfs_ioctl+0x1ab9/0x25c0 [btrfs c39c9c546c241c593f03bd6d5f39ea1b676250f6] ? __rseq_handle_notify_resume+0xa9/0x4a0 ? mntput_no_expire+0x4a/0x240 ? __seccomp_filter+0x319/0x4d0 __x64_sys_ioctl+0x90/0xd0 do_syscall_64+0x5b/0x80 ? syscall_exit_to_user_mode+0x17/0x40 ? do_syscall_64+0x67/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7fd9b790d9bf [CAUSE] Since commit e15e9f43c7ca ("btrfs: introduce BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING to skip qgroup accounting"), if our qgroup is already in inconsistent state, we will no longer do the time-consuming backref walk. This can leave some qgroup records without a valid old_roots ulist. Normally this is fine, as btrfs_qgroup_account_extents() would also skip those records if we have NO_ACCOUNTING flag set. But there is a small window, if we have NO_ACCOUNTING flag set, and inserted some qgroup_record without a old_roots ulist, but then the user triggered a qgroup rescan. During btrfs_qgroup_rescan(), we firstly clear NO_ACCOUNTING flag, then commit current transaction. And since we have a qgroup_record with old_roots = NULL, we trigger the WARN_ON() during btrfs_qgroup_account_extents(). [FIX] Unfortunately due to the introduction of NO_ACCOUNTING flag, the assumption that every qgroup_record would have its old_roots populated is no longer correct. Fix the false alerts and drop the WARN_ON(). Reported-by: Lukas Straub Reported-by: HanatoK Fixes: e15e9f43c7ca ("btrfs: introduce BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING to skip qgroup accounting") CC: stable@vger.kernel.org # 6.1 Link: https://lore.kernel.org/linux-btrfs/2403c697-ddaf-58ad-3829-0335fc89df09@gmail.com/ Signed-off-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b74105a10f16..1d84ff393d4b 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2751,9 +2751,19 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)) { /* * Old roots should be searched when inserting qgroup - * extent record + * extent record. + * + * But for INCONSISTENT (NO_ACCOUNTING) -> rescan case, + * we may have some record inserted during + * NO_ACCOUNTING (thus no old_roots populated), but + * later we start rescan, which clears NO_ACCOUNTING, + * leaving some inserted records without old_roots + * populated. + * + * Those cases are rare and should not cause too much + * time spent during commit_transaction(). */ - if (WARN_ON(!record->old_roots)) { + if (!record->old_roots) { /* Search commit root to find old_roots */ ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0, From 8af00fc7b670cd9bfb80ff3086926d4d6a51f2a9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 12 Jan 2023 14:17:20 +0000 Subject: [PATCH 442/543] btrfs: fix invalid leaf access due to inline extent during lseek commit 1f55ee6d0901d915801618bda0af4e5b937e3db7 upstream. During lseek, for SEEK_DATA and SEEK_HOLE modes, we access the disk_bytenr of an extent without checking its type. However inline extents have their data starting the offset of the disk_bytenr field, so accessing that field when we have an inline extent can result in either of the following: 1) Interpret the inline extent's data as a disk_bytenr value; 2) In case the inline data is less than 8 bytes, we access part of some other item in the leaf, or unused space in the leaf; 3) In case the inline data is less than 8 bytes and the extent item is the first item in the leaf, we can access beyond the leaf's limit. So fix this by not accessing the disk_bytenr field if we have an inline extent. Fixes: b6e833567ea1 ("btrfs: make hole and data seeking a lot more efficient") Reported-by: Matthias Schoepfer Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=216908 Link: https://lore.kernel.org/linux-btrfs/7f25442f-b121-2a3a-5a3d-22bcaae83cd4@leemhuis.info/ CC: stable@vger.kernel.org # 6.1 Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9bef8eaa074a..23056d9914d8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3838,6 +3838,7 @@ static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset, struct extent_buffer *leaf = path->nodes[0]; struct btrfs_file_extent_item *extent; u64 extent_end; + u8 type; if (path->slots[0] >= btrfs_header_nritems(leaf)) { ret = btrfs_next_leaf(root, path); @@ -3892,10 +3893,16 @@ static loff_t find_desired_extent(struct btrfs_inode *inode, loff_t offset, extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); + type = btrfs_file_extent_type(leaf, extent); - if (btrfs_file_extent_disk_bytenr(leaf, extent) == 0 || - btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_PREALLOC) { + /* + * Can't access the extent's disk_bytenr field if this is an + * inline extent, since at that offset, it's where the extent + * data starts. + */ + if (type == BTRFS_FILE_EXTENT_PREALLOC || + (type == BTRFS_FILE_EXTENT_REG && + btrfs_file_extent_disk_bytenr(leaf, extent) == 0)) { /* * Explicit hole or prealloc extent, search for delalloc. * A prealloc extent is treated like a hole. From 1004fc90f0d79a4b7d9e3d432729914f472f9ad1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 12 Jan 2023 16:31:08 +0000 Subject: [PATCH 443/543] btrfs: fix race between quota rescan and disable leading to NULL pointer deref commit b7adbf9ada3513d2092362c8eac5cddc5b651f5c upstream. If we have one task trying to start the quota rescan worker while another one is trying to disable quotas, we can end up hitting a race that results in the quota rescan worker doing a NULL pointer dereference. The steps for this are the following: 1) Quotas are enabled; 2) Task A calls the quota rescan ioctl and enters btrfs_qgroup_rescan(). It calls qgroup_rescan_init() which returns 0 (success) and then joins a transaction and commits it; 3) Task B calls the quota disable ioctl and enters btrfs_quota_disable(). It clears the bit BTRFS_FS_QUOTA_ENABLED from fs_info->flags and calls btrfs_qgroup_wait_for_completion(), which returns immediately since the rescan worker is not yet running. Then it starts a transaction and locks fs_info->qgroup_ioctl_lock; 4) Task A queues the rescan worker, by calling btrfs_queue_work(); 5) The rescan worker starts, and calls rescan_should_stop() at the start of its while loop, which results in 0 iterations of the loop, since the flag BTRFS_FS_QUOTA_ENABLED was cleared from fs_info->flags by task B at step 3); 6) Task B sets fs_info->quota_root to NULL; 7) The rescan worker tries to start a transaction and uses fs_info->quota_root as the root argument for btrfs_start_transaction(). This results in a NULL pointer dereference down the call chain of btrfs_start_transaction(). The stack trace is something like the one reported in Link tag below: general protection fault, probably for non-canonical address 0xdffffc0000000041: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000208-0x000000000000020f] CPU: 1 PID: 34 Comm: kworker/u4:2 Not tainted 6.1.0-syzkaller-13872-gb6bb9676f216 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Workqueue: btrfs-qgroup-rescan btrfs_work_helper RIP: 0010:start_transaction+0x48/0x10f0 fs/btrfs/transaction.c:564 Code: 48 89 fb 48 (...) RSP: 0018:ffffc90000ab7ab0 EFLAGS: 00010206 RAX: 0000000000000041 RBX: 0000000000000208 RCX: ffff88801779ba80 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000000 RBP: dffffc0000000000 R08: 0000000000000001 R09: fffff52000156f5d R10: fffff52000156f5d R11: 1ffff92000156f5c R12: 0000000000000000 R13: 0000000000000001 R14: 0000000000000001 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f2bea75b718 CR3: 000000001d0cc000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_qgroup_rescan_worker+0x3bb/0x6a0 fs/btrfs/qgroup.c:3402 btrfs_work_helper+0x312/0x850 fs/btrfs/async-thread.c:280 process_one_work+0x877/0xdb0 kernel/workqueue.c:2289 worker_thread+0xb14/0x1330 kernel/workqueue.c:2436 kthread+0x266/0x300 kernel/kthread.c:376 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308 Modules linked in: So fix this by having the rescan worker function not attempt to start a transaction if it didn't do any rescan work. Reported-by: syzbot+96977faa68092ad382c4@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/000000000000e5454b05f065a803@google.com/ Fixes: e804861bd4e6 ("btrfs: fix deadlock between quota disable and qgroup rescan worker") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/qgroup.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1d84ff393d4b..5ac65384c947 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3348,6 +3348,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) int err = -ENOMEM; int ret = 0; bool stopped = false; + bool did_leaf_rescans = false; path = btrfs_alloc_path(); if (!path) @@ -3368,6 +3369,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) } err = qgroup_rescan_leaf(trans, path); + did_leaf_rescans = true; if (err > 0) btrfs_commit_transaction(trans); @@ -3388,16 +3390,23 @@ out: mutex_unlock(&fs_info->qgroup_rescan_lock); /* - * only update status, since the previous part has already updated the - * qgroup info. + * Only update status, since the previous part has already updated the + * qgroup info, and only if we did any actual work. This also prevents + * race with a concurrent quota disable, which has already set + * fs_info->quota_root to NULL and cleared BTRFS_FS_QUOTA_ENABLED at + * btrfs_quota_disable(). */ - trans = btrfs_start_transaction(fs_info->quota_root, 1); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); + if (did_leaf_rescans) { + trans = btrfs_start_transaction(fs_info->quota_root, 1); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + btrfs_err(fs_info, + "fail to start transaction for status update: %d", + err); + } + } else { trans = NULL; - btrfs_err(fs_info, - "fail to start transaction for status update: %d", - err); } mutex_lock(&fs_info->qgroup_rescan_lock); From d048df8baf90c42b29480d8c23e58bd91d53074d Mon Sep 17 00:00:00 2001 From: Enzo Matsumiya Date: Wed, 18 Jan 2023 14:06:57 -0300 Subject: [PATCH 444/543] cifs: do not include page data when checking signature commit 30b2b2196d6e4cc24cbec633535a2404f258ce69 upstream. On async reads, page data is allocated before sending. When the response is received but it has no data to fill (e.g. STATUS_END_OF_FILE), __calc_signature() will still include the pages in its computation, leading to an invalid signature check. This patch fixes this by not setting the async read smb_rqst page data (zeroed by default) if its got_bytes is 0. This can be reproduced/verified with xfstests generic/465. Cc: Signed-off-by: Enzo Matsumiya Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 727f16b426be..92f39052d311 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -4162,12 +4162,15 @@ smb2_readv_callback(struct mid_q_entry *mid) (struct smb2_hdr *)rdata->iov[0].iov_base; struct cifs_credits credits = { .value = 0, .instance = 0 }; struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], - .rq_nvec = 1, - .rq_pages = rdata->pages, - .rq_offset = rdata->page_offset, - .rq_npages = rdata->nr_pages, - .rq_pagesz = rdata->pagesz, - .rq_tailsz = rdata->tailsz }; + .rq_nvec = 1, }; + + if (rdata->got_bytes) { + rqst.rq_pages = rdata->pages; + rqst.rq_offset = rdata->page_offset; + rqst.rq_npages = rdata->nr_pages; + rqst.rq_pagesz = rdata->pagesz; + rqst.rq_tailsz = rdata->tailsz; + } WARN_ONCE(rdata->server != mid->server, "rdata server %p != mid server %p", From 627ae8c6271e8bd5e5b28f95e28f55253a19f6f2 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 8 Sep 2022 09:45:22 +0300 Subject: [PATCH 445/543] thunderbolt: Disable XDomain lane 1 only in software connection manager commit 84ee211c83212f4d35b56e0603acdcc41f860f1b upstream. When firmware connection manager is in use we should not touch the lane adapter (well or any) configuration space so do this only when we know that the software connection manager is active. Fixes: 8e1de7042596 ("thunderbolt: Add support for XDomain lane bonding") Cc: stable@vger.kernel.org Acked-by: Yehezkel Bernat Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/xdomain.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c index f00b2f62d8e3..9a3c52f6b8c9 100644 --- a/drivers/thunderbolt/xdomain.c +++ b/drivers/thunderbolt/xdomain.c @@ -1419,12 +1419,19 @@ static int tb_xdomain_get_properties(struct tb_xdomain *xd) * registered, we notify the userspace that it has changed. */ if (!update) { - struct tb_port *port; + /* + * Now disable lane 1 if bonding was not enabled. Do + * this only if bonding was possible at the beginning + * (that is we are the connection manager and there are + * two lanes). + */ + if (xd->bonding_possible) { + struct tb_port *port; - /* Now disable lane 1 if bonding was not enabled */ - port = tb_port_at(xd->route, tb_xdomain_parent(xd)); - if (!port->bonded) - tb_port_disable(port->dual_link_port); + port = tb_port_at(xd->route, tb_xdomain_parent(xd)); + if (!port->bonded) + tb_port_disable(port->dual_link_port); + } if (device_add(&xd->dev)) { dev_err(&xd->dev, "failed to add XDomain device\n"); From 15a2e23fd5856e605c04f9ffee8fe1c631599aa0 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 20 May 2022 13:35:19 +0300 Subject: [PATCH 446/543] thunderbolt: Use correct function to calculate maximum USB3 link rate commit e8ff07fb33026c5c1bb5b81293496faba5d68059 upstream. We need to take minimum of both sides of the USB3 link into consideration, not just the downstream port. Fix this by calling tb_usb3_max_link_rate() instead. Fixes: 0bd680cd900c ("thunderbolt: Add USB3 bandwidth management") Cc: stable@vger.kernel.org Acked-by: Yehezkel Bernat Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/tunnel.c b/drivers/thunderbolt/tunnel.c index 2c3cf7fc3357..1fc3c29b24f8 100644 --- a/drivers/thunderbolt/tunnel.c +++ b/drivers/thunderbolt/tunnel.c @@ -1275,7 +1275,7 @@ static void tb_usb3_reclaim_available_bandwidth(struct tb_tunnel *tunnel, return; } else if (!ret) { /* Use maximum link rate if the link valid is not set */ - ret = usb4_usb3_port_max_link_rate(tunnel->src_port); + ret = tb_usb3_max_link_rate(tunnel->dst_port, tunnel->src_port); if (ret < 0) { tb_tunnel_warn(tunnel, "failed to read maximum link rate\n"); return; From 16b4b0f8e5a054426dea04b4253d15602220dc1c Mon Sep 17 00:00:00 2001 From: Utkarsh Patel Date: Thu, 22 Dec 2022 20:22:46 -0800 Subject: [PATCH 447/543] thunderbolt: Do not report errors if on-board retimers are found commit c28f3d80383571d3630df1a0e89500d23e855924 upstream. Currently we return an error even if on-board retimers are found and that's not expected. Fix this to return an error only if there was one and 0 otherwise. Fixes: 1e56c88adecc ("thunderbolt: Runtime resume USB4 port when retimers are scanned") Cc: stable@vger.kernel.org Signed-off-by: Utkarsh Patel Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/retimer.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 81252e31014a..6ebe7a2886ec 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -471,10 +471,9 @@ int tb_retimer_scan(struct tb_port *port, bool add) break; } - if (!last_idx) { - ret = 0; + ret = 0; + if (!last_idx) goto out; - } /* Add on-board retimers if they do not exist already */ for (i = 1; i <= last_idx; i++) { From d1b531efcb8e51af0bc9832ad9c6d56683851392 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 29 Dec 2022 14:10:30 +0200 Subject: [PATCH 448/543] thunderbolt: Do not call PM runtime functions in tb_retimer_scan() commit 23257cfc1cb7202fd0065e9f4a6a0aac1c04c4a9 upstream. We cannot call PM runtime functions in tb_retimer_scan() because it will also be called when retimers are scanned from userspace (happens when there is no device connected on ChromeOS for instance) and at the same USB4 port runtime resume hook. This leads to hang because neither can proceed. Fix this by runtime resuming USB4 ports in tb_scan_port() instead. This makes sure the ports are runtime PM active when retimers are added under it while avoiding the reported hang as well. Reported-by: Utkarsh Patel Fixes: 1e56c88adecc ("thunderbolt: Runtime resume USB4 port when retimers are scanned") Cc: stable@vger.kernel.org Acked-by: Yehezkel Bernat Signed-off-by: Mika Westerberg Signed-off-by: Greg Kroah-Hartman --- drivers/thunderbolt/retimer.c | 17 +++-------------- drivers/thunderbolt/tb.c | 20 +++++++++++++++----- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/drivers/thunderbolt/retimer.c b/drivers/thunderbolt/retimer.c index 6ebe7a2886ec..56008eb91e2e 100644 --- a/drivers/thunderbolt/retimer.c +++ b/drivers/thunderbolt/retimer.c @@ -427,13 +427,6 @@ int tb_retimer_scan(struct tb_port *port, bool add) { u32 status[TB_MAX_RETIMER_INDEX + 1] = {}; int ret, i, last_idx = 0; - struct usb4_port *usb4; - - usb4 = port->usb4; - if (!usb4) - return 0; - - pm_runtime_get_sync(&usb4->dev); /* * Send broadcast RT to make sure retimer indices facing this @@ -441,7 +434,7 @@ int tb_retimer_scan(struct tb_port *port, bool add) */ ret = usb4_port_enumerate_retimers(port); if (ret) - goto out; + return ret; /* * Enable sideband channel for each retimer. We can do this @@ -471,11 +464,11 @@ int tb_retimer_scan(struct tb_port *port, bool add) break; } - ret = 0; if (!last_idx) - goto out; + return 0; /* Add on-board retimers if they do not exist already */ + ret = 0; for (i = 1; i <= last_idx; i++) { struct tb_retimer *rt; @@ -489,10 +482,6 @@ int tb_retimer_scan(struct tb_port *port, bool add) } } -out: - pm_runtime_mark_last_busy(&usb4->dev); - pm_runtime_put_autosuspend(&usb4->dev); - return ret; } diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 462845804427..3f1ab30c4fb1 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -628,11 +628,15 @@ static void tb_scan_port(struct tb_port *port) * Downstream switch is reachable through two ports. * Only scan on the primary port (link_nr == 0). */ + + if (port->usb4) + pm_runtime_get_sync(&port->usb4->dev); + if (tb_wait_for_port(port, false) <= 0) - return; + goto out_rpm_put; if (port->remote) { tb_port_dbg(port, "port already has a remote\n"); - return; + goto out_rpm_put; } tb_retimer_scan(port, true); @@ -647,12 +651,12 @@ static void tb_scan_port(struct tb_port *port) */ if (PTR_ERR(sw) == -EIO || PTR_ERR(sw) == -EADDRNOTAVAIL) tb_scan_xdomain(port); - return; + goto out_rpm_put; } if (tb_switch_configure(sw)) { tb_switch_put(sw); - return; + goto out_rpm_put; } /* @@ -681,7 +685,7 @@ static void tb_scan_port(struct tb_port *port) if (tb_switch_add(sw)) { tb_switch_put(sw); - return; + goto out_rpm_put; } /* Link the switches using both links if available */ @@ -733,6 +737,12 @@ static void tb_scan_port(struct tb_port *port) tb_add_dp_resources(sw); tb_scan_switch(sw); + +out_rpm_put: + if (port->usb4) { + pm_runtime_mark_last_busy(&port->usb4->dev); + pm_runtime_put_autosuspend(&port->usb4->dev); + } } static void tb_deactivate_and_free_tunnel(struct tb_tunnel *tunnel) From ea41602d3b2f368d44b7b682f93dca537292d2a4 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 6 Jan 2023 13:44:56 +0000 Subject: [PATCH 449/543] riscv: dts: sifive: fu740: fix size of pcie 32bit memory commit 43d5f5d63699724d47f0d9e0eae516a260d232b4 upstream. The 32-bit memory resource is needed for non-prefetchable memory allocations on the PCIe bus, however with some cards (such as the SM768) the system fails to allocate memory from this. Checking the allocation against the datasheet, it looks like there has been a mis-calcualation of the resource for the first memory region (0x0060090000..0x0070ffffff) which in the data-sheet for the fu740 (v1p2) is from 0x0060000000..0x007fffffff. Changing this to allocate from 0x0060090000..0x007fffffff fixes the probing issues. Fixes: ae80d5148085 ("riscv: dts: Add PCIe support for the SiFive FU740-C000 SoC") Cc: Paul Walmsley Cc: Greentime Hu Signed-off-by: Ben Dooks Cc: stable@vger.kernel.org Tested-by: Ron Economos # from IRC Reviewed-by: Conor Dooley Signed-off-by: Conor Dooley Signed-off-by: Greg Kroah-Hartman --- arch/riscv/boot/dts/sifive/fu740-c000.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi index 43bed6c0a84f..5235fd1c9cb6 100644 --- a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi +++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi @@ -328,7 +328,7 @@ bus-range = <0x0 0xff>; ranges = <0x81000000 0x0 0x60080000 0x0 0x60080000 0x0 0x10000>, /* I/O */ <0x82000000 0x0 0x60090000 0x0 0x60090000 0x0 0xff70000>, /* mem */ - <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x1000000>, /* mem */ + <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x10000000>, /* mem */ <0xc3000000 0x20 0x00000000 0x20 0x00000000 0x20 0x00000000>; /* mem prefetchable */ num-lanes = <0x8>; interrupts = <56>, <57>, <58>, <59>, <60>, <61>, <62>, <63>, <64>; From 8de08b0c44ae7bb1500893c5edbf093d72cbd3b8 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 6 Jan 2023 10:43:59 -0500 Subject: [PATCH 450/543] bpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD and PERF_BPF_EVENT_PROG_UNLOAD commit ef01f4e25c1760920e2c94f1c232350277ace69b upstream. When changing the ebpf program put() routines to support being called from within IRQ context the program ID was reset to zero prior to calling the perf event and audit UNLOAD record generators, which resulted in problems as the ebpf program ID was bogus (always zero). This patch addresses this problem by removing an unnecessary call to bpf_prog_free_id() in __bpf_prog_offload_destroy() and adjusting __bpf_prog_put() to only call bpf_prog_free_id() after audit and perf have finished their bpf program unload tasks in bpf_prog_put_deferred(). For the record, no one can determine, or remember, why it was necessary to free the program ID, and remove it from the IDR, prior to executing bpf_prog_put_deferred(); regardless, both Stanislav and Alexei agree that the approach in this patch should be safe. It is worth noting that when moving the bpf_prog_free_id() call, the do_idr_lock parameter was forced to true as the ebpf devs determined this was the correct as the do_idr_lock should always be true. The do_idr_lock parameter will be removed in a follow-up patch, but it was kept here to keep the patch small in an effort to ease any stable backports. I also modified the bpf_audit_prog() logic used to associate the AUDIT_BPF record with other associated records, e.g. @ctx != NULL. Instead of keying off the operation, it now keys off the execution context, e.g. '!in_irg && !irqs_disabled()', which is much more appropriate and should help better connect the UNLOAD operations with the associated audit state (other audit records). Cc: stable@vger.kernel.org Fixes: d809e134be7a ("bpf: Prepare bpf_prog_put() to be called from irq context.") Reported-by: Burn Alting Reported-by: Jiri Olsa Suggested-by: Stanislav Fomichev Suggested-by: Alexei Starovoitov Signed-off-by: Paul Moore Acked-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20230106154400.74211-1-paul@paul-moore.com Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/offload.c | 3 --- kernel/bpf/syscall.c | 6 ++---- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 13e4efc971e6..190d9f9dc987 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -216,9 +216,6 @@ static void __bpf_prog_offload_destroy(struct bpf_prog *prog) if (offload->dev_state) offload->offdev->ops->destroy(prog); - /* Make sure BPF_PROG_GET_NEXT_ID can't find this dead program */ - bpf_prog_free_id(prog, true); - list_del_init(&offload->offloads); kfree(offload); prog->aux->offload = NULL; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 439ed7e5a82b..6c61dba26f4d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1958,7 +1958,7 @@ static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) return; if (audit_enabled == AUDIT_OFF) return; - if (op == BPF_AUDIT_LOAD) + if (!in_irq() && !irqs_disabled()) ctx = audit_context(); ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); if (unlikely(!ab)) @@ -2053,6 +2053,7 @@ static void bpf_prog_put_deferred(struct work_struct *work) prog = aux->prog; perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); + bpf_prog_free_id(prog, true); __bpf_prog_put_noref(prog, true); } @@ -2061,9 +2062,6 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) struct bpf_prog_aux *aux = prog->aux; if (atomic64_dec_and_test(&aux->refcnt)) { - /* bpf_prog_free_id() must be called first */ - bpf_prog_free_id(prog, do_idr_lock); - if (in_irq() || irqs_disabled()) { INIT_WORK(&aux->work, bpf_prog_put_deferred); schedule_work(&aux->work); From fd524ca7fe45b8a06dca2dd546d62684a9768f95 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 21 Dec 2022 17:40:22 +0100 Subject: [PATCH 451/543] tty: serial: qcom-geni-serial: fix slab-out-of-bounds on RX FIFO buffer commit b8caf69a6946e18ffebad49847e258f5b6d52ac2 upstream. Driver's probe allocates memory for RX FIFO (port->rx_fifo) based on default RX FIFO depth, e.g. 16. Later during serial startup the qcom_geni_serial_port_setup() updates the RX FIFO depth (port->rx_fifo_depth) to match real device capabilities, e.g. to 32. The RX UART handle code will read "port->rx_fifo_depth" number of words into "port->rx_fifo" buffer, thus exceeding the bounds. This can be observed in certain configurations with Qualcomm Bluetooth HCI UART device and KASAN: Bluetooth: hci0: QCA Product ID :0x00000010 Bluetooth: hci0: QCA SOC Version :0x400a0200 Bluetooth: hci0: QCA ROM Version :0x00000200 Bluetooth: hci0: QCA Patch Version:0x00000d2b Bluetooth: hci0: QCA controller version 0x02000200 Bluetooth: hci0: QCA Downloading qca/htbtfw20.tlv bluetooth hci0: Direct firmware load for qca/htbtfw20.tlv failed with error -2 Bluetooth: hci0: QCA Failed to request file: qca/htbtfw20.tlv (-2) Bluetooth: hci0: QCA Failed to download patch (-2) ================================================================== BUG: KASAN: slab-out-of-bounds in handle_rx_uart+0xa8/0x18c Write of size 4 at addr ffff279347d578c0 by task swapper/0/0 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.1.0-rt5-00350-gb2450b7e00be-dirty #26 Hardware name: Qualcomm Technologies, Inc. Robotics RB5 (DT) Call trace: dump_backtrace.part.0+0xe0/0xf0 show_stack+0x18/0x40 dump_stack_lvl+0x8c/0xb8 print_report+0x188/0x488 kasan_report+0xb4/0x100 __asan_store4+0x80/0xa4 handle_rx_uart+0xa8/0x18c qcom_geni_serial_handle_rx+0x84/0x9c qcom_geni_serial_isr+0x24c/0x760 __handle_irq_event_percpu+0x108/0x500 handle_irq_event+0x6c/0x110 handle_fasteoi_irq+0x138/0x2cc generic_handle_domain_irq+0x48/0x64 If the RX FIFO depth changes after probe, be sure to resize the buffer. Fixes: f9d690b6ece7 ("tty: serial: qcom_geni_serial: Allocate port->rx_fifo buffer in probe") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Jiri Slaby Link: https://lore.kernel.org/r/20221221164022.1087814-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 83b66b73303a..7905935b9f1b 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -864,9 +864,10 @@ out_unlock: return IRQ_HANDLED; } -static void get_tx_fifo_size(struct qcom_geni_serial_port *port) +static int setup_fifos(struct qcom_geni_serial_port *port) { struct uart_port *uport; + u32 old_rx_fifo_depth = port->rx_fifo_depth; uport = &port->uport; port->tx_fifo_depth = geni_se_get_tx_fifo_depth(&port->se); @@ -874,6 +875,16 @@ static void get_tx_fifo_size(struct qcom_geni_serial_port *port) port->rx_fifo_depth = geni_se_get_rx_fifo_depth(&port->se); uport->fifosize = (port->tx_fifo_depth * port->tx_fifo_width) / BITS_PER_BYTE; + + if (port->rx_fifo && (old_rx_fifo_depth != port->rx_fifo_depth) && port->rx_fifo_depth) { + port->rx_fifo = devm_krealloc(uport->dev, port->rx_fifo, + port->rx_fifo_depth * sizeof(u32), + GFP_KERNEL); + if (!port->rx_fifo) + return -ENOMEM; + } + + return 0; } @@ -888,6 +899,7 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport) u32 rxstale = DEFAULT_BITS_PER_CHAR * STALE_TIMEOUT; u32 proto; u32 pin_swap; + int ret; proto = geni_se_read_proto(&port->se); if (proto != GENI_SE_UART) { @@ -897,7 +909,9 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport) qcom_geni_serial_stop_rx(uport); - get_tx_fifo_size(port); + ret = setup_fifos(port); + if (ret) + return ret; writel(rxstale, uport->membase + SE_UART_RX_STALE_CNT); From 64152e05a4de3ebf59f1740a0985a6d5fba0c77b Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Fri, 2 Dec 2022 14:06:33 +0800 Subject: [PATCH 452/543] tty: fix possible null-ptr-defer in spk_ttyio_release commit 5abbeebd8296c2301023b8dc4b5a6c0d5229b4f5 upstream. Run the following tests on the qemu platform: syzkaller:~# modprobe speakup_audptr input: Speakup as /devices/virtual/input/input4 initialized device: /dev/synth, node (MAJOR 10, MINOR 125) speakup 3.1.6: initialized synth name on entry is: (null) synth probe spk_ttyio_initialise_ldisc failed because tty_kopen_exclusive returned failed (errno -16), then remove the module, we will get a null-ptr-defer problem, as follow: syzkaller:~# modprobe -r speakup_audptr releasing synth audptr BUG: kernel NULL pointer dereference, address: 0000000000000080 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] PREEMPT SMP PTI CPU: 2 PID: 204 Comm: modprobe Not tainted 6.1.0-rc6-dirty #1 RIP: 0010:mutex_lock+0x14/0x30 Call Trace: spk_ttyio_release+0x19/0x70 [speakup] synth_release.part.6+0xac/0xc0 [speakup] synth_remove+0x56/0x60 [speakup] __x64_sys_delete_module+0x156/0x250 ? fpregs_assert_state_consistent+0x1d/0x50 do_syscall_64+0x37/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd Modules linked in: speakup_audptr(-) speakup Dumping ftrace buffer: in_synth->dev was not initialized during modprobe, so we add check for in_synth->dev to fix this bug. Fixes: 4f2a81f3a882 ("speakup: Reference synth from tty and tty from synth") Cc: stable Signed-off-by: Gaosheng Cui Link: https://lore.kernel.org/r/20221202060633.217364-1-cuigaosheng1@huawei.com Reviewed-by: Samuel Thibault Signed-off-by: Greg Kroah-Hartman --- drivers/accessibility/speakup/spk_ttyio.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/accessibility/speakup/spk_ttyio.c b/drivers/accessibility/speakup/spk_ttyio.c index 08cf8a17754b..07373b3debd1 100644 --- a/drivers/accessibility/speakup/spk_ttyio.c +++ b/drivers/accessibility/speakup/spk_ttyio.c @@ -354,6 +354,9 @@ void spk_ttyio_release(struct spk_synth *in_synth) { struct tty_struct *tty = in_synth->dev; + if (tty == NULL) + return; + tty_lock(tty); if (tty->ops->close) From 66144d830f59364d650ee9e7dc575daf9748172f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 16 Jan 2023 08:51:05 -0700 Subject: [PATCH 453/543] pktcdvd: check for NULL returna fter calling bio_split_to_limits() commit 3e9900f3bd7ba30d60f82b162b70a1dffe4e8e24 upstream. The revert of the removal of this driver happened after we fixed up the split limits for NOWAIT issue, hence it got missed. Ensure that we check for a NULL bio after splitting, in case it should be retried. Marking this as fixing both commits, so that stable backport will do this correctly. Cc: stable@vger.kernel.org Fixes: 9cea62b2cbab ("block: don't allow splitting of a REQ_NOWAIT bio") Fixes: 4b83e99ee709 ("Revert "pktcdvd: remove driver."") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- drivers/block/pktcdvd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 4cea3b08087e..2f1a92509271 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2400,6 +2400,8 @@ static void pkt_submit_bio(struct bio *bio) struct bio *split; bio = bio_split_to_limits(bio); + if (!bio) + return; pkt_dbg(2, pd, "start = %6llx stop = %6llx\n", (unsigned long long)bio->bi_iter.bi_sector, From 36fc7317cdb16cfeae0f879916995037bb615ac4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 20 Jan 2023 15:08:21 -0700 Subject: [PATCH 454/543] io_uring/poll: don't reissue in case of poll race on multishot request commit 8caa03f10bf92cb8657408a6ece6a8a73f96ce13 upstream. A previous commit fixed a poll race that can occur, but it's only applicable for multishot requests. For a multishot request, we can safely ignore a spurious wakeup, as we never leave the waitqueue to begin with. A blunt reissue of a multishot armed request can cause us to leak a buffer, if they are ring provided. While this seems like a bug in itself, it's not really defined behavior to reissue a multishot request directly. It's less efficient to do so as well, and not required to rearm anything like it is for singleshot poll requests. Cc: stable@vger.kernel.org Fixes: 6e5aedb9324a ("io_uring/poll: attempt request issue after racy poll wakeup") Reported-and-tested-by: Olivier Langlois Link: https://github.com/axboe/liburing/issues/778 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/poll.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index f2f9f174fc62..ab5ae475840f 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -281,8 +281,12 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) * to the waitqueue, so if we get nothing back, we * should be safe and attempt a reissue. */ - if (unlikely(!req->cqe.res)) + if (unlikely(!req->cqe.res)) { + /* Multishot armed need not reissue */ + if (!(req->apoll_events & EPOLLONESHOT)) + continue; return IOU_POLL_REISSUE; + } } if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; From 1423c12b4f0c6a10830407a99e90a68c278205cc Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 12 Jan 2023 18:42:51 +0100 Subject: [PATCH 455/543] mptcp: explicitly specify sock family at subflow creation time commit 6bc1fe7dd748ba5e76e7917d110837cafe7b931c upstream. Let the caller specify the to-be-created subflow family. For a given MPTCP socket created with the AF_INET6 family, the current userspace PM can already ask the kernel to create subflows in v4 and v6. If "plain" IPv4 addresses are passed to the kernel, they are automatically mapped in v6 addresses "by accident". This can be problematic because the userspace will need to pass different addresses, now the v4-mapped-v6 addresses to destroy this new subflow. On the other hand, if the MPTCP socket has been created with the AF_INET family, the command to create a subflow in v6 will be accepted but the result will not be the one as expected as new subflow will be created in IPv4 using part of the v6 addresses passed to the kernel: not creating the expected subflow then. No functional change intended for the in-kernel PM where an explicit enforcement is currently in place. This arbitrary enforcement will be leveraged by other patches in a future version. Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment") Cc: stable@vger.kernel.org Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 3 ++- net/mptcp/subflow.c | 9 +++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index e97465f0c667..29849d77e4bf 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -107,7 +107,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) struct socket *ssock; int err; - err = mptcp_subflow_create_socket(sk, &ssock); + err = mptcp_subflow_create_socket(sk, sk->sk_family, &ssock); if (err) return err; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 62e9ff237b6e..1097428804f9 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -626,7 +626,8 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a, /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, const struct mptcp_addr_info *remote); -int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock); +int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, + struct socket **new_sock); void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9d3701fdb293..5220435d8e34 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1492,7 +1492,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (!mptcp_is_fully_established(sk)) goto err_out; - err = mptcp_subflow_create_socket(sk, &sf); + err = mptcp_subflow_create_socket(sk, loc->family, &sf); if (err) goto err_out; @@ -1604,7 +1604,9 @@ static void mptcp_subflow_ops_undo_override(struct sock *ssk) #endif ssk->sk_prot = &tcp_prot; } -int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) + +int mptcp_subflow_create_socket(struct sock *sk, unsigned short family, + struct socket **new_sock) { struct mptcp_subflow_context *subflow; struct net *net = sock_net(sk); @@ -1617,8 +1619,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) if (unlikely(!sk->sk_socket)) return -EINVAL; - err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP, - &sf); + err = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf); if (err) return err; From f44ce132016febf697a09139c12a24048e365376 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 12 Jan 2023 18:42:52 +0100 Subject: [PATCH 456/543] mptcp: netlink: respect v4/v6-only sockets commit fb00ee4f3343acb2b9222ca9b73b47dd1e1a8efc upstream. If an MPTCP socket has been created with AF_INET6 and the IPV6_V6ONLY option has been set, the userspace PM would allow creating subflows using IPv4 addresses, e.g. mapped in v6. The kernel side of userspace PM will also accept creating subflows with local and remote addresses having different families. Depending on the subflow socket's family, different behaviours are expected: - If AF_INET is forced with a v6 address, the kernel will take the last byte of the IP and try to connect to that: a new subflow is created but to a non expected address. - If AF_INET6 is forced with a v4 address, the kernel will try to connect to a v4 address (v4-mapped-v6). A -EBADF error from the connect() part is then expected. It is then required to check the given families can be accepted. This is done by using a new helper for addresses family matching, taking care of IPv4 vs IPv4-mapped-IPv6 addresses. This helper will be re-used later by the in-kernel path-manager to use mixed IPv4 and IPv6 addresses. While at it, a clear error message is now reported if there are some conflicts with the families that have been passed by the userspace. Fixes: 702c2f646d42 ("mptcp: netlink: allow userspace-driven subflow establishment") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/mptcp/pm.c | 25 +++++++++++++++++++++++++ net/mptcp/pm_userspace.c | 7 +++++++ net/mptcp/protocol.h | 3 +++ 3 files changed, 35 insertions(+) diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 45e2a48397b9..70f0ced3ca86 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -420,6 +420,31 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) } } +/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses, + * otherwise allow any matching local/remote pair + */ +bool mptcp_pm_addr_families_match(const struct sock *sk, + const struct mptcp_addr_info *loc, + const struct mptcp_addr_info *rem) +{ + bool mptcp_is_v4 = sk->sk_family == AF_INET; + +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6); + bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6); + + if (mptcp_is_v4) + return loc_is_v4 && rem_is_v4; + + if (ipv6_only_sock(sk)) + return !loc_is_v4 && !rem_is_v4; + + return loc_is_v4 == rem_is_v4; +#else + return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET; +#endif +} + void mptcp_pm_data_reset(struct mptcp_sock *msk) { u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk)); diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 0430415357ba..c1d6cd5b188c 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -294,6 +294,13 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info) } sk = &msk->sk.icsk_inet.sk; + + if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) { + GENL_SET_ERR_MSG(info, "families mismatch"); + err = -EINVAL; + goto create_err; + } + lock_sock(sk); err = __mptcp_subflow_connect(sk, &addr_l, &addr_r); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1097428804f9..6f22ae13c984 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -762,6 +762,9 @@ int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, bool require_family, struct mptcp_pm_addr_entry *entry); +bool mptcp_pm_addr_families_match(const struct sock *sk, + const struct mptcp_addr_info *loc, + const struct mptcp_addr_info *rem); void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); From f59549814a64ce7db3bc52bad2826068c3c97a7f Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 12 Jan 2023 18:42:53 +0100 Subject: [PATCH 457/543] selftests: mptcp: userspace: validate v4-v6 subflows mix commit 4656d72c1efa495a58ad6d8b073a60907073e4e6 upstream. MPTCP protocol supports having subflows in both IPv4 and IPv6. In Linux, it is possible to have that if the MPTCP socket has been created with AF_INET6 family without the IPV6_V6ONLY option. Here, a new IPv4 subflow is being added to the initial IPv6 connection, then being removed using Netlink commands. Cc: stable@vger.kernel.org # v5.19+ Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- .../selftests/net/mptcp/userspace_pm.sh | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 3229725b64b0..0040e3bc7b16 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -776,6 +776,52 @@ test_subflows() rm -f "$evts" } +test_subflows_v4_v6_mix() +{ + # Attempt to add a listener at 10.0.2.1: + ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ + $app6_port > /dev/null 2>&1 & + local listener_pid=$! + + # ADD_ADDR4 from server to client machine reusing the subflow port on + # the established v6 connection + :>"$client_evts" + ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ + $server_addr_id dev ns1eth2 > /dev/null 2>&1 + stdbuf -o0 -e0 printf "ADD_ADDR4 id:%d 10.0.2.1 (ns1) => ns2, reuse port\t\t" $server_addr_id + sleep 0.5 + verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ + "$server_addr_id" "$app6_port" + + # CREATE_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ + $app6_port token "$client6_token" > /dev/null 2>&1 + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # Delete the listener from the server ns, if one was created + kill_wait $listener_pid + + sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + + # DESTROY_SUBFLOW from client to server machine + :>"$client_evts" + ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ + $app6_port token "$client6_token" > /dev/null 2>&1 + sleep 0.5 + verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \ + "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ + "$server_addr_id" "ns2" "ns1" + + # RM_ADDR from server to client machine + ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ + "$server6_token" > /dev/null 2>&1 + sleep 0.5 +} + test_prio() { local count @@ -812,6 +858,7 @@ make_connection "v6" test_announce test_remove test_subflows +test_subflows_v4_v6_mix test_prio exit 0 From 616fd34d017000ecf9097368b13d8a266f4920b3 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 23 Dec 2022 09:59:09 -0500 Subject: [PATCH 458/543] USB: gadgetfs: Fix race between mounting and unmounting commit d18dcfe9860e842f394e37ba01ca9440ab2178f4 upstream. The syzbot fuzzer and Gerald Lee have identified a use-after-free bug in the gadgetfs driver, involving processes concurrently mounting and unmounting the gadgetfs filesystem. In particular, gadgetfs_fill_super() can race with gadgetfs_kill_sb(), causing the latter to deallocate the_device while the former is using it. The output from KASAN says, in part: BUG: KASAN: use-after-free in instrument_atomic_read_write include/linux/instrumented.h:102 [inline] BUG: KASAN: use-after-free in atomic_fetch_sub_release include/linux/atomic/atomic-instrumented.h:176 [inline] BUG: KASAN: use-after-free in __refcount_sub_and_test include/linux/refcount.h:272 [inline] BUG: KASAN: use-after-free in __refcount_dec_and_test include/linux/refcount.h:315 [inline] BUG: KASAN: use-after-free in refcount_dec_and_test include/linux/refcount.h:333 [inline] BUG: KASAN: use-after-free in put_dev drivers/usb/gadget/legacy/inode.c:159 [inline] BUG: KASAN: use-after-free in gadgetfs_kill_sb+0x33/0x100 drivers/usb/gadget/legacy/inode.c:2086 Write of size 4 at addr ffff8880276d7840 by task syz-executor126/18689 CPU: 0 PID: 18689 Comm: syz-executor126 Not tainted 6.1.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: ... atomic_fetch_sub_release include/linux/atomic/atomic-instrumented.h:176 [inline] __refcount_sub_and_test include/linux/refcount.h:272 [inline] __refcount_dec_and_test include/linux/refcount.h:315 [inline] refcount_dec_and_test include/linux/refcount.h:333 [inline] put_dev drivers/usb/gadget/legacy/inode.c:159 [inline] gadgetfs_kill_sb+0x33/0x100 drivers/usb/gadget/legacy/inode.c:2086 deactivate_locked_super+0xa7/0xf0 fs/super.c:332 vfs_get_super fs/super.c:1190 [inline] get_tree_single+0xd0/0x160 fs/super.c:1207 vfs_get_tree+0x88/0x270 fs/super.c:1531 vfs_fsconfig_locked fs/fsopen.c:232 [inline] The simplest solution is to ensure that gadgetfs_fill_super() and gadgetfs_kill_sb() are serialized by making them both acquire a new mutex. Signed-off-by: Alan Stern Reported-and-tested-by: syzbot+33d7ad66d65044b93f16@syzkaller.appspotmail.com Reported-and-tested-by: Gerald Lee Link: https://lore.kernel.org/linux-usb/CAO3qeMVzXDP-JU6v1u5Ags6Q-bb35kg3=C6d04DjzA9ffa5x1g@mail.gmail.com/ Fixes: e5d82a7360d1 ("vfs: Convert gadgetfs to use the new mount API") CC: Link: https://lore.kernel.org/r/Y6XCPXBpn3tmjdCC@rowland.harvard.edu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/inode.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 01c3ead7d1b4..d605bc2e7e8f 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -229,6 +229,7 @@ static void put_ep (struct ep_data *data) */ static const char *CHIP; +static DEFINE_MUTEX(sb_mutex); /* Serialize superblock operations */ /*----------------------------------------------------------------------*/ @@ -2010,13 +2011,20 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) { struct inode *inode; struct dev_data *dev; + int rc; - if (the_device) - return -ESRCH; + mutex_lock(&sb_mutex); + + if (the_device) { + rc = -ESRCH; + goto Done; + } CHIP = usb_get_gadget_udc_name(); - if (!CHIP) - return -ENODEV; + if (!CHIP) { + rc = -ENODEV; + goto Done; + } /* superblock */ sb->s_blocksize = PAGE_SIZE; @@ -2053,13 +2061,17 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) * from binding to a controller. */ the_device = dev; - return 0; + rc = 0; + goto Done; -Enomem: + Enomem: kfree(CHIP); CHIP = NULL; + rc = -ENOMEM; - return -ENOMEM; + Done: + mutex_unlock(&sb_mutex); + return rc; } /* "mount -t gadgetfs path /dev/gadget" ends up here */ @@ -2081,6 +2093,7 @@ static int gadgetfs_init_fs_context(struct fs_context *fc) static void gadgetfs_kill_sb (struct super_block *sb) { + mutex_lock(&sb_mutex); kill_litter_super (sb); if (the_device) { put_dev (the_device); @@ -2088,6 +2101,7 @@ gadgetfs_kill_sb (struct super_block *sb) } kfree(CHIP); CHIP = NULL; + mutex_unlock(&sb_mutex); } /*----------------------------------------------------------------------*/ From feb7840e57ca2b370421a3120ee9d029b61477b2 Mon Sep 17 00:00:00 2001 From: Michael Adler Date: Tue, 3 Jan 2023 14:48:50 +0100 Subject: [PATCH 459/543] USB: serial: cp210x: add SCALANCE LPE-9000 device id commit 3f9e76e31704a325170e5aec2243c8d084d74854 upstream. Add the USB serial console device ID for Siemens SCALANCE LPE-9000 which have a USB port for their serial console. Signed-off-by: Michael Adler Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f6fb23620e87..ba5638471de4 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -60,6 +60,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ + { USB_DEVICE(0x0908, 0x0070) }, /* Siemens SCALANCE LPE-9000 USB Serial Console */ { USB_DEVICE(0x0908, 0x01FF) }, /* Siemens RUGGEDCOM USB Serial Console */ { USB_DEVICE(0x0988, 0x0578) }, /* Teraoka AD2000 */ { USB_DEVICE(0x0B00, 0x3070) }, /* Ingenico 3070 */ From 34f207ed1c109c0a6aa5bc61cdab6080b297ec1f Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 15 Nov 2022 05:00:39 -0500 Subject: [PATCH 460/543] usb: cdns3: remove fetched trb from cache before dequeuing commit 1301c7b9f7efad2f11ef924e317c18ebd714fc9a upstream. After doorbell DMA fetches the TRB. If during dequeuing request driver changes NORMAL TRB to LINK TRB but doesn't delete it from controller cache then controller will handle cached TRB and packet can be lost. The example scenario for this issue looks like: 1. queue request - set doorbell 2. dequeue request 3. send OUT data packet from host 4. Device will accept this packet which is unexpected 5. queue new request - set doorbell 6. Device lost the expected packet. By setting DFLUSH controller clears DRDY bit and stop DMA transfer. Fixes: 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver") cc: Signed-off-by: Pawel Laszczak Acked-by: Peter Chen Link: https://lore.kernel.org/r/20221115100039.441295-1-pawell@cadence.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/cdns3/cdns3-gadget.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 5adcb349718c..ccfaebca6faa 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -2614,6 +2614,7 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep, u8 req_on_hw_ring = 0; unsigned long flags; int ret = 0; + int val; if (!ep || !request || !ep->desc) return -EINVAL; @@ -2649,6 +2650,13 @@ found: /* Update ring only if removed request is on pending_req_list list */ if (req_on_hw_ring && link_trb) { + /* Stop DMA */ + writel(EP_CMD_DFLUSH, &priv_dev->regs->ep_cmd); + + /* wait for DFLUSH cleared */ + readl_poll_timeout_atomic(&priv_dev->regs->ep_cmd, val, + !(val & EP_CMD_DFLUSH), 1, 1000); + link_trb->buffer = cpu_to_le32(TRB_BUFFER(priv_ep->trb_pool_dma + ((priv_req->end_trb + 1) * TRB_SIZE))); link_trb->control = cpu_to_le32((le32_to_cpu(link_trb->control) & TRB_CYCLE) | @@ -2660,6 +2668,10 @@ found: cdns3_gadget_giveback(priv_ep, priv_req, -ECONNRESET); + req = cdns3_next_request(&priv_ep->pending_req_list); + if (req) + cdns3_rearm_transfer(priv_ep, 1); + not_found: spin_unlock_irqrestore(&priv_dev->lock, flags); return ret; From b3fdc02c61234860af03640182474c72a9768666 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Fri, 20 Jan 2023 13:27:14 +0100 Subject: [PATCH 461/543] usb: host: ehci-fsl: Fix module alias commit 5d3d01ae15d2f37ed0325c99ab47ef0ae5d05f3c upstream. Commit ca07e1c1e4a6 ("drivers:usb:fsl:Make fsl ehci drv an independent driver module") changed DRV_NAME which was used for MODULE_ALIAS as well. Starting from this the module alias didn't match the platform device name created in fsl-mph-dr-of.c Change DRV_NAME to match the driver name for host mode in fsl-mph-dr-of. This is needed for module autoloading on ls1021a. Fixes: ca07e1c1e4a6 ("drivers:usb:fsl:Make fsl ehci drv an independent driver module") Cc: stable Signed-off-by: Alexander Stein Link: https://lore.kernel.org/r/20230120122714.3848784-1-alexander.stein@ew.tq-group.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-fsl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c index 9cea785934e5..38d06e5abfbb 100644 --- a/drivers/usb/host/ehci-fsl.c +++ b/drivers/usb/host/ehci-fsl.c @@ -29,7 +29,7 @@ #include "ehci-fsl.h" #define DRIVER_DESC "Freescale EHCI Host controller driver" -#define DRV_NAME "ehci-fsl" +#define DRV_NAME "fsl-ehci" static struct hc_driver __read_mostly fsl_ehci_hc_driver; From 421492f229f027840a7c468b10229e17d36ad653 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 30 Dec 2022 16:17:30 +0800 Subject: [PATCH 462/543] usb: musb: fix error return code in omap2430_probe() commit bd449ad8cee9d4b523abbdfa73e1a2a08333f331 upstream. Before calling platform_get_resource() in omap2430_probe(), the 'ret' is re-assgined to 0, it can't return an error code, if platform_get_resource fails. Set the error code to -EINVAL to fix this. Fixes: ffbe2feac59b ("usb: musb: omap2430: Fix probe regression for missing resources") Cc: stable Signed-off-by: Yang Yingliang Link: https://lore.kernel.org/r/20221230081730.1655616-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 476f55d1fec3..44a21ec865fb 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -411,8 +411,10 @@ static int omap2430_probe(struct platform_device *pdev) memset(musb_res, 0, sizeof(*musb_res) * ARRAY_SIZE(musb_res)); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) + if (!res) { + ret = -EINVAL; goto err2; + } musb_res[i].start = res->start; musb_res[i].end = res->end; From 9a027eeb9860edd36403d34ea23e9f6f7dd148d9 Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Mon, 9 Jan 2023 15:19:50 +0800 Subject: [PATCH 463/543] usb: typec: tcpm: Fix altmode re-registration causes sysfs create fail commit 36f78477ac2c89e9a2eed4a31404a291a3450b5d upstream. There's the altmode re-registeration issue after data role swap (DR_SWAP). Comparing to USBPD 2.0, in USBPD 3.0, it loose the limit that only DFP can initiate the VDM command to get partner identity information. For a USBPD 3.0 UFP device, it may already get the identity information from its port partner before DR_SWAP. If DR_SWAP send or receive at the mean time, 'send_discover' flag will be raised again. It causes discover identify action restart while entering ready state. And after all discover actions are done, the 'tcpm_register_altmodes' will be called. If old altmode is not unregistered, this sysfs create fail can be found. In 'DR_SWAP_CHANGE_DR' state case, only DFP will unregister altmodes. For UFP, the original altmodes keep registered. This patch fix the logic that after DR_SWAP, 'tcpm_unregister_altmodes' must be called whatever the current data role is. Reviewed-by: Macpaul Lin Fixes: ae8a2ca8a221 ("usb: typec: Group all TCPCI/TCPM code together") Reported-by: TommyYl Chen Cc: stable@vger.kernel.org Signed-off-by: ChiYuan Huang Acked-by: Heikki Krogerus Link: https://lore.kernel.org/r/1673248790-15794-1-git-send-email-cy_huang@richtek.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 904c7b4ce2f0..59b366b5c614 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4594,14 +4594,13 @@ static void run_state_machine(struct tcpm_port *port) tcpm_set_state(port, ready_state(port), 0); break; case DR_SWAP_CHANGE_DR: - if (port->data_role == TYPEC_HOST) { - tcpm_unregister_altmodes(port); + tcpm_unregister_altmodes(port); + if (port->data_role == TYPEC_HOST) tcpm_set_roles(port, true, port->pwr_role, TYPEC_DEVICE); - } else { + else tcpm_set_roles(port, true, port->pwr_role, TYPEC_HOST); - } tcpm_ams_finish(port); tcpm_set_state(port, ready_state(port), 0); break; From 632b223797a94517e721723f222c09ded1e33221 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Wed, 11 Jan 2023 02:05:41 +0000 Subject: [PATCH 464/543] usb: typec: altmodes/displayport: Add pin assignment helper commit 582836e3cfab4faafbdc93bbec96fce036a08ee1 upstream. The code to extract a peripheral's currently supported Pin Assignments is repeated in a couple of locations. Factor it out into a separate function. This will also make it easier to add fixes (we only need to update 1 location instead of 2). Fixes: c1e5c2f0cb8a ("usb: typec: altmodes/displayport: correct pin assignment for UFP receptacles") Cc: stable@vger.kernel.org Cc: Heikki Krogerus Signed-off-by: Prashant Malani Reviewed-by: Benson Leung Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230111020546.3384569-1-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index de66a2949e33..5bc7a58f7f44 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -419,6 +419,18 @@ static const char * const pin_assignments[] = { [DP_PIN_ASSIGN_F] = "F", }; +/* + * Helper function to extract a peripheral's currently supported + * Pin Assignments from its DisplayPort alternate mode state. + */ +static u8 get_current_pin_assignments(struct dp_altmode *dp) +{ + if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D) + return DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo); + else + return DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo); +} + static ssize_t pin_assignment_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) @@ -445,10 +457,7 @@ pin_assignment_store(struct device *dev, struct device_attribute *attr, goto out_unlock; } - if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D) - assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo); - else - assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo); + assignments = get_current_pin_assignments(dp); if (!(DP_CONF_GET_PIN_ASSIGN(conf) & assignments)) { ret = -EINVAL; @@ -485,10 +494,7 @@ static ssize_t pin_assignment_show(struct device *dev, cur = get_count_order(DP_CONF_GET_PIN_ASSIGN(dp->data.conf)); - if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D) - assignments = DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo); - else - assignments = DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo); + assignments = get_current_pin_assignments(dp); for (i = 0; assignments; assignments >>= 1, i++) { if (assignments & 1) { From 184cfc6afb5547223d5cce44014e6a04572bc9c3 Mon Sep 17 00:00:00 2001 From: Prashant Malani Date: Wed, 11 Jan 2023 02:05:42 +0000 Subject: [PATCH 465/543] usb: typec: altmodes/displayport: Fix pin assignment calculation commit 9682b41e52cc9f42f5c33caf410464392adaef04 upstream. Commit c1e5c2f0cb8a ("usb: typec: altmodes/displayport: correct pin assignment for UFP receptacles") fixed the pin assignment calculation to take into account whether the peripheral was a plug or a receptacle. But the "pin_assignments" sysfs logic was not updated. Address this by using the macros introduced in the aforementioned commit in the sysfs logic too. Fixes: c1e5c2f0cb8a ("usb: typec: altmodes/displayport: correct pin assignment for UFP receptacles") Cc: stable@vger.kernel.org Cc: Heikki Krogerus Signed-off-by: Prashant Malani Reviewed-by: Benson Leung Reviewed-by: Heikki Krogerus Link: https://lore.kernel.org/r/20230111020546.3384569-2-pmalani@chromium.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 5bc7a58f7f44..80d8c6c3be36 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -426,9 +426,9 @@ static const char * const pin_assignments[] = { static u8 get_current_pin_assignments(struct dp_altmode *dp) { if (DP_CONF_CURRENTLY(dp->data.conf) == DP_CONF_DFP_D) - return DP_CAP_UFP_D_PIN_ASSIGN(dp->alt->vdo); + return DP_CAP_PIN_ASSIGN_DFP_D(dp->alt->vdo); else - return DP_CAP_DFP_D_PIN_ASSIGN(dp->alt->vdo); + return DP_CAP_PIN_ASSIGN_UFP_D(dp->alt->vdo); } static ssize_t From f9c8c6b34d5d6724c7be291ec4192030068f2251 Mon Sep 17 00:00:00 2001 From: Daniel Scally Date: Fri, 16 Dec 2022 16:05:28 +0000 Subject: [PATCH 466/543] usb: gadget: g_webcam: Send color matching descriptor per frame commit e95765e97d9cb93258a4840440d410fa6ff7e819 upstream. Currently the color matching descriptor is only sent across the wire a single time, following the descriptors for each format and frame. According to the UVC 1.5 Specification 3.9.2.6 ("Color Matching Descriptors"): "Only one instance is allowed for a given format and if present, the Color Matching descriptor shall be placed following the Video and Still Image Frame descriptors for that format". Add another reference to the color matching descriptor after the yuyv frames so that it's correctly transmitted for that format too. Fixes: a9914127e834 ("USB gadget: Webcam device") Cc: stable Signed-off-by: Daniel Scally Reviewed-by: Laurent Pinchart Reviewed-by: Kieran Bingham Link: https://lore.kernel.org/r/20221216160528.479094-1-dan.scally@ideasonboard.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/legacy/webcam.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/legacy/webcam.c b/drivers/usb/gadget/legacy/webcam.c index 94e22867da1d..e9b5846b2322 100644 --- a/drivers/usb/gadget/legacy/webcam.c +++ b/drivers/usb/gadget/legacy/webcam.c @@ -293,6 +293,7 @@ static const struct uvc_descriptor_header * const uvc_fs_streaming_cls[] = { (const struct uvc_descriptor_header *) &uvc_format_yuv, (const struct uvc_descriptor_header *) &uvc_frame_yuv_360p, (const struct uvc_descriptor_header *) &uvc_frame_yuv_720p, + (const struct uvc_descriptor_header *) &uvc_color_matching, (const struct uvc_descriptor_header *) &uvc_format_mjpg, (const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p, (const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p, @@ -305,6 +306,7 @@ static const struct uvc_descriptor_header * const uvc_hs_streaming_cls[] = { (const struct uvc_descriptor_header *) &uvc_format_yuv, (const struct uvc_descriptor_header *) &uvc_frame_yuv_360p, (const struct uvc_descriptor_header *) &uvc_frame_yuv_720p, + (const struct uvc_descriptor_header *) &uvc_color_matching, (const struct uvc_descriptor_header *) &uvc_format_mjpg, (const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p, (const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p, @@ -317,6 +319,7 @@ static const struct uvc_descriptor_header * const uvc_ss_streaming_cls[] = { (const struct uvc_descriptor_header *) &uvc_format_yuv, (const struct uvc_descriptor_header *) &uvc_frame_yuv_360p, (const struct uvc_descriptor_header *) &uvc_frame_yuv_720p, + (const struct uvc_descriptor_header *) &uvc_color_matching, (const struct uvc_descriptor_header *) &uvc_format_mjpg, (const struct uvc_descriptor_header *) &uvc_frame_mjpg_360p, (const struct uvc_descriptor_header *) &uvc_frame_mjpg_720p, From 96abcf7230d7b860c445e2ea8645524c31d0106c Mon Sep 17 00:00:00 2001 From: Chanh Nguyen Date: Wed, 11 Jan 2023 13:51:05 +0700 Subject: [PATCH 467/543] USB: gadget: Add ID numbers to configfs-gadget driver names commit 7c07553807c5125c89de242d35c10c206fd8e6bb upstream. It is unable to use configfs to attach more than one gadget. When attaching the second gadget, it always fails and the kernel message prints out: Error: Driver 'configfs-gadget' is already registered, aborting... UDC core: g1: driver registration failed: -16 This commit fixes the problem by using the gadget name as a suffix to each configfs_gadget's driver name, thus making the names distinct. Fixes: fc274c1e9973 ("USB: gadget: Add a new bus for gadgets") Cc: stable Signed-off-by: Chanh Nguyen Reviewed-by: Andrzej Pietrasiewicz Tested-by: Heikki Krogerus Reviewed-by: Frank Li Link: https://lore.kernel.org/r/20230111065105.29205-1-chanh@os.amperecomputing.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/configfs.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 3a6b4926193e..7bbc77618546 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -392,6 +392,7 @@ static void gadget_info_attr_release(struct config_item *item) WARN_ON(!list_empty(&gi->string_list)); WARN_ON(!list_empty(&gi->available_func)); kfree(gi->composite.gadget_driver.function); + kfree(gi->composite.gadget_driver.driver.name); kfree(gi); } @@ -1571,7 +1572,6 @@ static const struct usb_gadget_driver configfs_driver_template = { .max_speed = USB_SPEED_SUPER_PLUS, .driver = { .owner = THIS_MODULE, - .name = "configfs-gadget", }, .match_existing_only = 1, }; @@ -1622,13 +1622,21 @@ static struct config_group *gadgets_make( gi->composite.gadget_driver = configfs_driver_template; + gi->composite.gadget_driver.driver.name = kasprintf(GFP_KERNEL, + "configfs-gadget.%s", name); + if (!gi->composite.gadget_driver.driver.name) + goto err; + gi->composite.gadget_driver.function = kstrdup(name, GFP_KERNEL); gi->composite.name = gi->composite.gadget_driver.function; if (!gi->composite.gadget_driver.function) - goto err; + goto out_free_driver_name; return &gi->group; + +out_free_driver_name: + kfree(gi->composite.gadget_driver.driver.name); err: kfree(gi); return ERR_PTR(-ENOMEM); From 09e4507ec8ef2d44da6ba4092b8ee2d81f216497 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 17 Jan 2023 05:18:39 -0800 Subject: [PATCH 468/543] usb: gadget: f_ncm: fix potential NULL ptr deref in ncm_bitrate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c6ec929595c7443250b2a4faea988c62019d5cd2 upstream. In Google internal bug 265639009 we've received an (as yet) unreproducible crash report from an aarch64 GKI 5.10.149-android13 running device. AFAICT the source code is at: https://android.googlesource.com/kernel/common/+/refs/tags/ASB-2022-12-05_13-5.10 The call stack is: ncm_close() -> ncm_notify() -> ncm_do_notify() with the crash at: ncm_do_notify+0x98/0x270 Code: 79000d0b b9000a6c f940012a f9400269 (b9405d4b) Which I believe disassembles to (I don't know ARM assembly, but it looks sane enough to me...): // halfword (16-bit) store presumably to event->wLength (at offset 6 of struct usb_cdc_notification) 0B 0D 00 79 strh w11, [x8, #6] // word (32-bit) store presumably to req->Length (at offset 8 of struct usb_request) 6C 0A 00 B9 str w12, [x19, #8] // x10 (NULL) was read here from offset 0 of valid pointer x9 // IMHO we're reading 'cdev->gadget' and getting NULL // gadget is indeed at offset 0 of struct usb_composite_dev 2A 01 40 F9 ldr x10, [x9] // loading req->buf pointer, which is at offset 0 of struct usb_request 69 02 40 F9 ldr x9, [x19] // x10 is null, crash, appears to be attempt to read cdev->gadget->max_speed 4B 5D 40 B9 ldr w11, [x10, #0x5c] which seems to line up with ncm_do_notify() case NCM_NOTIFY_SPEED code fragment: event->wLength = cpu_to_le16(8); req->length = NCM_STATUS_BYTECOUNT; /* SPEED_CHANGE data is up/down speeds in bits/sec */ data = req->buf + sizeof *event; data[0] = cpu_to_le32(ncm_bitrate(cdev->gadget)); My analysis of registers and NULL ptr deref crash offset (Unable to handle kernel NULL pointer dereference at virtual address 000000000000005c) heavily suggests that the crash is due to 'cdev->gadget' being NULL when executing: data[0] = cpu_to_le32(ncm_bitrate(cdev->gadget)); which calls: ncm_bitrate(NULL) which then calls: gadget_is_superspeed(NULL) which reads ((struct usb_gadget *)NULL)->max_speed and hits a panic. AFAICT, if I'm counting right, the offset of max_speed is indeed 0x5C. (remember there's a GKI KABI reservation of 16 bytes in struct work_struct) It's not at all clear to me how this is all supposed to work... but returning 0 seems much better than panic-ing... Cc: Felipe Balbi Cc: Lorenzo Colitti Cc: Carlos Llamas Cc: stable@vger.kernel.org Signed-off-by: Maciej Żenczykowski Cc: stable Link: https://lore.kernel.org/r/20230117131839.1138208-1-maze@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index c36bcfa0e9b4..424bb3b666db 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -83,7 +83,9 @@ static inline struct f_ncm *func_to_ncm(struct usb_function *f) /* peak (theoretical) bulk transfer rate in bits-per-second */ static inline unsigned ncm_bitrate(struct usb_gadget *g) { - if (gadget_is_superspeed(g) && g->speed >= USB_SPEED_SUPER_PLUS) + if (!g) + return 0; + else if (gadget_is_superspeed(g) && g->speed >= USB_SPEED_SUPER_PLUS) return 4250000000U; else if (gadget_is_superspeed(g) && g->speed == USB_SPEED_SUPER) return 3750000000U; From 2cab24bd8f437ff006445276c77eee68ba9ee770 Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Tue, 17 Jan 2023 17:51:54 +0900 Subject: [PATCH 469/543] usb-storage: apply IGNORE_UAS only for HIKSEMI MD202 on RTL9210 commit dbd24ec17b85b45f4e823d1aa5607721920f2b05 upstream. The commit e00b488e813f ("usb-storage: Add Hiksemi USB3-FW to IGNORE_UAS") blacklists UAS for all of RTL9210 enclosures. The RTL9210 controller was advertised with UAS since its release back in 2019 and was shipped with a lot of enclosure products with different firmware combinations. Blacklist UAS only for HIKSEMI MD202. This should hopefully be replaced with more robust method than just comparing strings. But with limited information [1] provided thus far (dmesg when the device is plugged in, which includes manufacturer and product, but no lsusb -v to compare against), this is the best we can do for now. [1] https://lore.kernel.org/all/20230109115550.71688-1-qkrwngud825@gmail.com Fixes: e00b488e813f ("usb-storage: Add Hiksemi USB3-FW to IGNORE_UAS") Cc: Alan Stern Cc: Hongling Zeng Cc: stable@vger.kernel.org Signed-off-by: Juhyung Park Acked-by: Oliver Neukum Link: https://lore.kernel.org/r/20230117085154.123301-1-qkrwngud825@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/storage/uas-detect.h | 13 +++++++++++++ drivers/usb/storage/unusual_uas.h | 7 ------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/usb/storage/uas-detect.h b/drivers/usb/storage/uas-detect.h index 3f720faa6f97..d73282c0ec50 100644 --- a/drivers/usb/storage/uas-detect.h +++ b/drivers/usb/storage/uas-detect.h @@ -116,6 +116,19 @@ static int uas_use_uas_driver(struct usb_interface *intf, if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2) flags |= US_FL_NO_ATA_1X; + /* + * RTL9210-based enclosure from HIKSEMI, MD202 reportedly have issues + * with UAS. This isn't distinguishable with just idVendor and + * idProduct, use manufacturer and product too. + * + * Reported-by: Hongling Zeng + */ + if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bda && + le16_to_cpu(udev->descriptor.idProduct) == 0x9210 && + (udev->manufacturer && !strcmp(udev->manufacturer, "HIKSEMI")) && + (udev->product && !strcmp(udev->product, "MD202"))) + flags |= US_FL_IGNORE_UAS; + usb_stor_adjust_quirks(udev, &flags); if (flags & US_FL_IGNORE_UAS) { diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index 251778d14e2d..c7b763d6d102 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -83,13 +83,6 @@ UNUSUAL_DEV(0x0bc2, 0x331a, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_LUNS), -/* Reported-by: Hongling Zeng */ -UNUSUAL_DEV(0x0bda, 0x9210, 0x0000, 0x9999, - "Hiksemi", - "External HDD", - USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_IGNORE_UAS), - /* Reported-by: Benjamin Tissoires */ UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999, "Initio Corporation", From 1f1007d1301bc2dc05c70ac9e9ed033ddd4d34b4 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Fri, 30 Sep 2022 22:54:23 +0800 Subject: [PATCH 470/543] arm64: dts: imx8mp: correct usb clocks commit 8a1ed98fe0f2e7669f0409de0f46f317b275f8be upstream. After commit cf7f3f4fa9e5 ("clk: imx8mp: fix usb_root_clk parent"), usb_root_clk is no longer for suspend clock so update dts accordingly to use right bus clock and suspend clock. Fixes: fb8587a2c165 ("arm64: dtsi: imx8mp: add usb nodes") Cc: stable@vger.kernel.org # ed1f4ccfe947: clk: imx: imx8mp: add shared clk gate for usb suspend clk Cc: stable@vger.kernel.org # v5.19+ Reviewed-by: Peng Fan Tested-by: Alexander Stein Signed-off-by: Li Jun Signed-off-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index bb916a0948a8..d944ecca1b3c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -1279,7 +1279,7 @@ reg = <0x32f10100 0x8>, <0x381f0000 0x20>; clocks = <&clk IMX8MP_CLK_HSIO_ROOT>, - <&clk IMX8MP_CLK_USB_ROOT>; + <&clk IMX8MP_CLK_USB_SUSP>; clock-names = "hsio", "suspend"; interrupts = ; power-domains = <&hsio_blk_ctrl IMX8MP_HSIOBLK_PD_USB>; @@ -1292,9 +1292,9 @@ usb_dwc3_0: usb@38100000 { compatible = "snps,dwc3"; reg = <0x38100000 0x10000>; - clocks = <&clk IMX8MP_CLK_HSIO_AXI>, + clocks = <&clk IMX8MP_CLK_USB_ROOT>, <&clk IMX8MP_CLK_USB_CORE_REF>, - <&clk IMX8MP_CLK_USB_ROOT>; + <&clk IMX8MP_CLK_USB_SUSP>; clock-names = "bus_early", "ref", "suspend"; interrupts = ; phys = <&usb3_phy0>, <&usb3_phy0>; @@ -1321,7 +1321,7 @@ reg = <0x32f10108 0x8>, <0x382f0000 0x20>; clocks = <&clk IMX8MP_CLK_HSIO_ROOT>, - <&clk IMX8MP_CLK_USB_ROOT>; + <&clk IMX8MP_CLK_USB_SUSP>; clock-names = "hsio", "suspend"; interrupts = ; power-domains = <&hsio_blk_ctrl IMX8MP_HSIOBLK_PD_USB>; @@ -1334,9 +1334,9 @@ usb_dwc3_1: usb@38200000 { compatible = "snps,dwc3"; reg = <0x38200000 0x10000>; - clocks = <&clk IMX8MP_CLK_HSIO_AXI>, + clocks = <&clk IMX8MP_CLK_USB_ROOT>, <&clk IMX8MP_CLK_USB_CORE_REF>, - <&clk IMX8MP_CLK_USB_ROOT>; + <&clk IMX8MP_CLK_USB_SUSP>; clock-names = "bus_early", "ref", "suspend"; interrupts = ; phys = <&usb3_phy1>, <&usb3_phy1>; From f1288e0549950033edd47b4dd9ff781cd8ff33b9 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 16 Jan 2023 21:17:39 +0100 Subject: [PATCH 471/543] dt-bindings: phy: g12a-usb2-phy: fix compatible string documentation commit c63835bf1c750c9b3aec1d5c23d811d6375fc23d upstream. The compatible strings in the driver don't have the meson prefix. Fix this in the documentation and rename the file accordingly. Fixes: da86d286cce8 ("dt-bindings: phy: meson-g12a-usb2-phy: convert to yaml") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Martin Blumenstingl Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/8d960029-e94d-224b-911f-03e5deb47ebc@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- ...eson-g12a-usb2-phy.yaml => amlogic,g12a-usb2-phy.yaml} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename Documentation/devicetree/bindings/phy/{amlogic,meson-g12a-usb2-phy.yaml => amlogic,g12a-usb2-phy.yaml} (85%) diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml similarity index 85% rename from Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml rename to Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml index f3a5fbabbbb5..bb01c6b34dab 100644 --- a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb2-phy.yaml +++ b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb2-phy.yaml @@ -2,7 +2,7 @@ # Copyright 2019 BayLibre, SAS %YAML 1.2 --- -$id: "http://devicetree.org/schemas/phy/amlogic,meson-g12a-usb2-phy.yaml#" +$id: "http://devicetree.org/schemas/phy/amlogic,g12a-usb2-phy.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" title: Amlogic G12A USB2 PHY @@ -13,8 +13,8 @@ maintainers: properties: compatible: enum: - - amlogic,meson-g12a-usb2-phy - - amlogic,meson-a1-usb2-phy + - amlogic,g12a-usb2-phy + - amlogic,a1-usb2-phy reg: maxItems: 1 @@ -68,7 +68,7 @@ additionalProperties: false examples: - | phy@36000 { - compatible = "amlogic,meson-g12a-usb2-phy"; + compatible = "amlogic,g12a-usb2-phy"; reg = <0x36000 0x2000>; clocks = <&xtal>; clock-names = "xtal"; From caf81ca977b36d0c4a290e7f6a8f5132063266ac Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 16 Jan 2023 21:19:03 +0100 Subject: [PATCH 472/543] dt-bindings: phy: g12a-usb3-pcie-phy: fix compatible string documentation commit e181119046a0ec16126b682163040e8e33f310c1 upstream. The compatible string in the driver doesn't have the meson prefix. Fix this in the documentation and rename the file accordingly. Fixes: 87a55485f2fc ("dt-bindings: phy: meson-g12a-usb3-pcie-phy: convert to yaml") Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Reviewed-by: Martin Blumenstingl Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/0a82be92-ce85-da34-9d6f-4b33034473e5@gmail.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- ...a-usb3-pcie-phy.yaml => amlogic,g12a-usb3-pcie-phy.yaml} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename Documentation/devicetree/bindings/phy/{amlogic,meson-g12a-usb3-pcie-phy.yaml => amlogic,g12a-usb3-pcie-phy.yaml} (82%) diff --git a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml similarity index 82% rename from Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml rename to Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml index 868b4e6fde71..129d26e99776 100644 --- a/Documentation/devicetree/bindings/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml +++ b/Documentation/devicetree/bindings/phy/amlogic,g12a-usb3-pcie-phy.yaml @@ -2,7 +2,7 @@ # Copyright 2019 BayLibre, SAS %YAML 1.2 --- -$id: "http://devicetree.org/schemas/phy/amlogic,meson-g12a-usb3-pcie-phy.yaml#" +$id: "http://devicetree.org/schemas/phy/amlogic,g12a-usb3-pcie-phy.yaml#" $schema: "http://devicetree.org/meta-schemas/core.yaml#" title: Amlogic G12A USB3 + PCIE Combo PHY @@ -13,7 +13,7 @@ maintainers: properties: compatible: enum: - - amlogic,meson-g12a-usb3-pcie-phy + - amlogic,g12a-usb3-pcie-phy reg: maxItems: 1 @@ -49,7 +49,7 @@ additionalProperties: false examples: - | phy@46000 { - compatible = "amlogic,meson-g12a-usb3-pcie-phy"; + compatible = "amlogic,g12a-usb3-pcie-phy"; reg = <0x46000 0x2000>; clocks = <&ref_clk>; clock-names = "ref_clk"; From 16b18ee00c406c6dc581c43e73ff71fe034506f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Tue, 3 Jan 2023 11:34:35 +0200 Subject: [PATCH 473/543] serial: pch_uart: Pass correct sg to dma_unmap_sg() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e8914b52e5b024e4af3d810a935fe0805eee8a36 upstream. A local variable sg is used to store scatterlist pointer in pch_dma_tx_complete(). The for loop doing Tx byte accounting before dma_unmap_sg() alters sg in its increment statement. Therefore, the pointer passed into dma_unmap_sg() won't match to the one given to dma_map_sg(). To fix the problem, use priv->sg_tx_p directly in dma_unmap_sg() instead of the local variable. Fixes: da3564ee027e ("pch_uart: add multi-scatter processing") Cc: stable@vger.kernel.org Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230103093435.4396-1-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pch_uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index b17788cf309b..83f35b7b0897 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -752,7 +752,7 @@ static void pch_dma_tx_complete(void *arg) } xmit->tail &= UART_XMIT_SIZE - 1; async_tx_ack(priv->desc_tx); - dma_unmap_sg(port->dev, sg, priv->orig_nent, DMA_TO_DEVICE); + dma_unmap_sg(port->dev, priv->sg_tx_p, priv->orig_nent, DMA_TO_DEVICE); priv->tx_dma_use = 0; priv->nent = 0; priv->orig_nent = 0; From 16848cad1a48e25c6aad68b7ba21a1aa43826945 Mon Sep 17 00:00:00 2001 From: Peter Harliman Liem Date: Thu, 5 Jan 2023 11:05:51 +0800 Subject: [PATCH 474/543] dmaengine: lgm: Move DT parsing after initialization commit 96b3bb18f6cbe259ef4e0bed3135911b7e8d2af5 upstream. ldma_cfg_init() will parse DT to retrieve certain configs. However, that is called before ldma_dma_init_vXX(), which will make some initialization to channel configs. It will thus incorrectly overwrite certain configs that are declared in DT. To fix that, we move DT parsing after initialization. Function name is renamed to better represent what it does. Fixes: 32d31c79a1a4 ("dmaengine: Add Intel LGM SoC DMA support.") Signed-off-by: Peter Harliman Liem Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/afef6fc1ed20098b684e0d53737d69faf63c125f.1672887183.git.pliem@maxlinear.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/lgm/lgm-dma.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/dma/lgm/lgm-dma.c b/drivers/dma/lgm/lgm-dma.c index 9b9184f964be..1709d159af7e 100644 --- a/drivers/dma/lgm/lgm-dma.c +++ b/drivers/dma/lgm/lgm-dma.c @@ -914,7 +914,7 @@ static void ldma_dev_init(struct ldma_dev *d) } } -static int ldma_cfg_init(struct ldma_dev *d) +static int ldma_parse_dt(struct ldma_dev *d) { struct fwnode_handle *fwnode = dev_fwnode(d->dev); struct ldma_port *p; @@ -1661,10 +1661,6 @@ static int intel_ldma_probe(struct platform_device *pdev) p->ldev = d; } - ret = ldma_cfg_init(d); - if (ret) - return ret; - dma_dev->dev = &pdev->dev; ch_mask = (unsigned long)d->channels_mask; @@ -1675,6 +1671,10 @@ static int intel_ldma_probe(struct platform_device *pdev) ldma_dma_init_v3X(j, d); } + ret = ldma_parse_dt(d); + if (ret) + return ret; + dma_dev->device_alloc_chan_resources = ldma_alloc_chan_resources; dma_dev->device_free_chan_resources = ldma_free_chan_resources; dma_dev->device_terminate_all = ldma_terminate_all; From 8802fea8dd7f916682dc6448ee323aa1cdf3d3fe Mon Sep 17 00:00:00 2001 From: Mohan Kumar Date: Mon, 2 Jan 2023 12:18:44 +0530 Subject: [PATCH 475/543] dmaengine: tegra210-adma: fix global intr clear commit 9c7e355ccbb33d239360c876dbe49ad5ade65b47 upstream. The current global interrupt clear programming register offset was not correct. Fix the programming with right offset Fixes: ded1f3db4cd6 ("dmaengine: tegra210-adma: prepare for supporting newer Tegra chips") Cc: stable@vger.kernel.org Signed-off-by: Mohan Kumar Link: https://lore.kernel.org/r/20230102064844.31306-1-mkumard@nvidia.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/tegra210-adma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index ae39b52012b2..79da93cc77b6 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -221,7 +221,7 @@ static int tegra_adma_init(struct tegra_adma *tdma) int ret; /* Clear any interrupts */ - tdma_write(tdma, tdma->cdata->global_int_clear, 0x1); + tdma_write(tdma, tdma->cdata->ch_base_offset + tdma->cdata->global_int_clear, 0x1); /* Assert soft reset */ tdma_write(tdma, ADMA_GLOBAL_SOFT_RESET, 0x1); From 99dc4520b74e7ca8e9dc9abe37a0b10b49467960 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 7 Dec 2022 14:52:20 -0800 Subject: [PATCH 476/543] dmaengine: idxd: Let probe fail when workqueue cannot be enabled commit b51b75f0604f17c0f6f3b6f68f1a521a5cc6b04f upstream. The workqueue is enabled when the appropriate driver is loaded and disabled when the driver is removed. When the driver is removed it assumes that the workqueue was enabled successfully and proceeds to free allocations made during workqueue enabling. Failure during workqueue enabling does not prevent the driver from being loaded. This is because the error path within drv_enable_wq() returns success unless a second failure is encountered during the error path. By returning success it is possible to load the driver even if the workqueue cannot be enabled and allocations that do not exist are attempted to be freed during driver remove. Some examples of problematic flows: (a) idxd_dmaengine_drv_probe() -> drv_enable_wq() -> idxd_wq_request_irq(): In above flow, if idxd_wq_request_irq() fails then idxd_wq_unmap_portal() is called on error exit path, but drv_enable_wq() returns 0 because idxd_wq_disable() succeeds. The driver is thus loaded successfully. idxd_dmaengine_drv_remove()->drv_disable_wq()->idxd_wq_unmap_portal() Above flow on driver unload triggers the WARN in devm_iounmap() because the device resource has already been removed during error path of drv_enable_wq(). (b) idxd_dmaengine_drv_probe() -> drv_enable_wq() -> idxd_wq_request_irq(): In above flow, if idxd_wq_request_irq() fails then idxd_wq_init_percpu_ref() is never called to initialize the percpu counter, yet the driver loads successfully because drv_enable_wq() returns 0. idxd_dmaengine_drv_remove()->__idxd_wq_quiesce()->percpu_ref_kill(): Above flow on driver unload triggers a BUG when attempting to drop the initial ref of the uninitialized percpu ref: BUG: kernel NULL pointer dereference, address: 0000000000000010 Fix the drv_enable_wq() error path by returning the original error that indicates failure of workqueue enabling. This ensures that the probe fails when an error is encountered and the driver remove paths are only attempted when the workqueue was enabled successfully. Fixes: 1f2bb40337f0 ("dmaengine: idxd: move wq_enable() to device.c") Signed-off-by: Reinette Chatre Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/e8d8116e5efa0fd14fadc5adae6ffd319f0e5ff1.1670452419.git.reinette.chatre@intel.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/idxd/device.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 6f44fa8f78a5..fcd03d29a941 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1391,8 +1391,7 @@ err_res_alloc: err_irq: idxd_wq_unmap_portal(wq); err_map_portal: - rc = idxd_wq_disable(wq, false); - if (rc < 0) + if (idxd_wq_disable(wq, false)) dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq))); err: return rc; From b9e8e3fcfec625fc1c2f68f684448aeeb882625b Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 7 Dec 2022 14:52:21 -0800 Subject: [PATCH 477/543] dmaengine: idxd: Prevent use after free on completion memory commit 1beeec45f9ac31eba52478379f70a5fa9c2ad005 upstream. On driver unload any pending descriptors are flushed at the time the interrupt is freed: idxd_dmaengine_drv_remove() -> drv_disable_wq() -> idxd_wq_free_irq() -> idxd_flush_pending_descs(). If there are any descriptors present that need to be flushed this flow triggers a "not present" page fault as below: BUG: unable to handle page fault for address: ff391c97c70c9040 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page The address that triggers the fault is the address of the descriptor that was freed moments earlier via: drv_disable_wq()->idxd_wq_free_resources() Fix the use after free by freeing the descriptors after any possible usage. This is done after idxd_wq_reset() to ensure that the memory remains accessible during possible completion writes by the device. Fixes: 63c14ae6c161 ("dmaengine: idxd: refactor wq driver enable/disable operations") Suggested-by: Dave Jiang Signed-off-by: Reinette Chatre Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/6c4657d9cff0a0a00501a7b928297ac966e9ec9d.1670452419.git.reinette.chatre@intel.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/idxd/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index fcd03d29a941..b4d7bb923a40 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1408,11 +1408,11 @@ void drv_disable_wq(struct idxd_wq *wq) dev_warn(dev, "Clients has claim on wq %d: %d\n", wq->id, idxd_wq_refcount(wq)); - idxd_wq_free_resources(wq); idxd_wq_unmap_portal(wq); idxd_wq_drain(wq); idxd_wq_free_irq(wq); idxd_wq_reset(wq); + idxd_wq_free_resources(wq); percpu_ref_exit(&wq->wq_active); wq->type = IDXD_WQT_NONE; wq->client_count = 0; From e7c07deaa1937a3c38d631b9f8d1c37b9f70dc42 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Wed, 7 Dec 2022 14:52:22 -0800 Subject: [PATCH 478/543] dmaengine: idxd: Do not call DMX TX callbacks during workqueue disable commit 6744a030d81e456883bfbb627ac1f30465c1a989 upstream. On driver unload any pending descriptors are flushed and pending DMA descriptors are explicitly completed: idxd_dmaengine_drv_remove() -> drv_disable_wq() -> idxd_wq_free_irq() -> idxd_flush_pending_descs() -> idxd_dma_complete_txd() With this done during driver unload any remaining descriptor is likely stuck and can be dropped. Even so, the descriptor may still have a callback set that could no longer be accessible. An example of such a problem is when the dmatest fails and the dmatest module is unloaded. The failure of dmatest leaves descriptors with dma_async_tx_descriptor::callback pointing to code that no longer exist. This causes a page fault as below at the time the IDXD driver is unloaded when it attempts to run the callback: BUG: unable to handle page fault for address: ffffffffc0665190 #PF: supervisor instruction fetch in kernel mode #PF: error_code(0x0010) - not-present page Fix this by clearing the callback pointers on the transmit descriptors only when workqueue is disabled. Fixes: 403a2e236538 ("dmaengine: idxd: change MSIX allocation based on per wq activation") Signed-off-by: Reinette Chatre Reviewed-by: Dave Jiang Reviewed-by: Fenghua Yu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/37d06b772aa7f8863ca50f90930ea2fd80b38fc3.1670452419.git.reinette.chatre@intel.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/idxd/device.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index b4d7bb923a40..6d8ff664fdfb 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -1173,8 +1173,19 @@ static void idxd_flush_pending_descs(struct idxd_irq_entry *ie) spin_unlock(&ie->list_lock); list_for_each_entry_safe(desc, itr, &flist, list) { + struct dma_async_tx_descriptor *tx; + list_del(&desc->list); ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT; + /* + * wq is being disabled. Any remaining descriptors are + * likely to be stuck and can be dropped. callback could + * point to code that is no longer accessible, for example + * if dmatest module has been unloaded. + */ + tx = &desc->txd; + tx->callback = NULL; + tx->callback_result = NULL; idxd_dma_complete_txd(desc, ctype, true); } } From ab47d385da12bf715e686c431e687ec691b9c727 Mon Sep 17 00:00:00 2001 From: Lino Sanfilippo Date: Sun, 8 Jan 2023 19:17:35 +0100 Subject: [PATCH 479/543] serial: amba-pl011: fix high priority character transmission in rs486 mode commit 4f39aca2360c82dccd2f5179d77e94aab665bea6 upstream. In RS485 mode the transmission of a high priority character fails since it is written to the data register before the transmitter is enabled. Fix this in pl011_tx_chars() by enabling RS485 transmission before writing the character. Fixes: 8d479237727c ("serial: amba-pl011: add RS485 support") Cc: stable@vger.kernel.org Signed-off-by: Lino Sanfilippo Link: https://lore.kernel.org/r/20230108181735.10937-1-LinoSanfilippo@gmx.de Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/amba-pl011.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index aa0bbb7abeac..0a1cc36f93aa 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1467,6 +1467,10 @@ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) struct circ_buf *xmit = &uap->port.state->xmit; int count = uap->fifosize >> 1; + if ((uap->port.rs485.flags & SER_RS485_ENABLED) && + !uap->rs485_tx_started) + pl011_rs485_tx_start(uap); + if (uap->port.x_char) { if (!pl011_tx_char(uap, uap->port.x_char, from_irq)) return true; @@ -1478,10 +1482,6 @@ static bool pl011_tx_chars(struct uart_amba_port *uap, bool from_irq) return false; } - if ((uap->port.rs485.flags & SER_RS485_ENABLED) && - !uap->rs485_tx_started) - pl011_rs485_tx_start(uap); - /* If we are using DMA mode, try to send some characters. */ if (pl011_dma_tx_irq(uap)) return true; From 13efa6b1a304b2edd1ebe48c7eebc49dcc28e1f1 Mon Sep 17 00:00:00 2001 From: Tobias Schramm Date: Mon, 9 Jan 2023 08:29:40 +0100 Subject: [PATCH 480/543] serial: atmel: fix incorrect baudrate setup commit 5bfdd3c654bd879bff50c2e85e42f85ae698b42f upstream. Commit ba47f97a18f2 ("serial: core: remove baud_rates when serial console setup") changed uart_set_options to select the correct baudrate configuration based on the absolute error between requested baudrate and available standard baudrate settings. Prior to that commit the baudrate was selected based on which predefined standard baudrate did not exceed the requested baudrate. This change of selection logic was never reflected in the atmel serial driver. Thus the comment left in the atmel serial driver is no longer accurate. Additionally the manual rounding up described in that comment and applied via (quot - 1) requests an incorrect baudrate. Since uart_set_options uses tty_termios_encode_baud_rate to determine the appropriate baudrate flags this can cause baudrate selection to fail entirely because tty_termios_encode_baud_rate will only select a baudrate if relative error between requested and selected baudrate does not exceed +/-2%. Fix that by requesting actual, exact baudrate used by the serial. Fixes: ba47f97a18f2 ("serial: core: remove baud_rates when serial console setup") Cc: stable Signed-off-by: Tobias Schramm Acked-by: Richard Genoud Link: https://lore.kernel.org/r/20230109072940.202936-1-t.schramm@manjaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index bd07f79a2df9..cff64e5edee2 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2673,13 +2673,7 @@ static void __init atmel_console_get_options(struct uart_port *port, int *baud, else if (mr == ATMEL_US_PAR_ODD) *parity = 'o'; - /* - * The serial core only rounds down when matching this to a - * supported baud rate. Make sure we don't end up slightly - * lower than one of those, as it would make us fall through - * to a much lower baud rate than we really want. - */ - *baud = port->uartclk / (16 * (quot - 1)); + *baud = port->uartclk / (16 * quot); } static int __init atmel_console_setup(struct console *co, char *options) From 1c5aaff754f3d916e49ea8d8539642e4771323c4 Mon Sep 17 00:00:00 2001 From: Matthew Howell Date: Thu, 19 Jan 2023 14:40:29 -0500 Subject: [PATCH 481/543] serial: exar: Add support for Sealevel 7xxxC serial cards commit 14ee78d5932afeb710c8305196a676a715bfdea8 upstream. Add support for Sealevel 7xxxC serial cards. This patch: * Adds IDs to recognize 7xxxC cards from Sealevel Systems. * Updates exar_pci_probe() to set nr_ports to last two bytes of primary dev ID for these cards. Signed-off-by: Matthew Howell Cc: stable Link: https://lore.kernel.org/r/alpine.DEB.2.21.2301191440010.22558@tstest-VirtualBox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_exar.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 314a05e009df..64770c62bbec 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -43,6 +43,12 @@ #define PCI_DEVICE_ID_EXAR_XR17V4358 0x4358 #define PCI_DEVICE_ID_EXAR_XR17V8358 0x8358 +#define PCI_DEVICE_ID_SEALEVEL_710xC 0x1001 +#define PCI_DEVICE_ID_SEALEVEL_720xC 0x1002 +#define PCI_DEVICE_ID_SEALEVEL_740xC 0x1004 +#define PCI_DEVICE_ID_SEALEVEL_780xC 0x1008 +#define PCI_DEVICE_ID_SEALEVEL_716xC 0x1010 + #define UART_EXAR_INT0 0x80 #define UART_EXAR_8XMODE 0x88 /* 8X sampling rate select */ #define UART_EXAR_SLEEP 0x8b /* Sleep mode */ @@ -638,6 +644,8 @@ exar_pci_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) nr_ports = BIT(((pcidev->device & 0x38) >> 3) - 1); else if (board->num_ports) nr_ports = board->num_ports; + else if (pcidev->vendor == PCI_VENDOR_ID_SEALEVEL) + nr_ports = pcidev->device & 0xff; else nr_ports = pcidev->device & 0x0f; @@ -864,6 +872,12 @@ static const struct pci_device_id exar_pci_tbl[] = { EXAR_DEVICE(COMMTECH, 4224PCI335, pbn_fastcom335_4), EXAR_DEVICE(COMMTECH, 2324PCI335, pbn_fastcom335_4), EXAR_DEVICE(COMMTECH, 2328PCI335, pbn_fastcom335_8), + + EXAR_DEVICE(SEALEVEL, 710xC, pbn_exar_XR17V35x), + EXAR_DEVICE(SEALEVEL, 720xC, pbn_exar_XR17V35x), + EXAR_DEVICE(SEALEVEL, 740xC, pbn_exar_XR17V35x), + EXAR_DEVICE(SEALEVEL, 780xC, pbn_exar_XR17V35x), + EXAR_DEVICE(SEALEVEL, 716xC, pbn_exar_XR17V35x), { 0, } }; MODULE_DEVICE_TABLE(pci, exar_pci_tbl); From 6646d769fdb0ce4318ef9afd127f8526d1ca8393 Mon Sep 17 00:00:00 2001 From: Khazhismel Kumykov Date: Tue, 17 Jan 2023 17:02:12 -0800 Subject: [PATCH 482/543] gsmi: fix null-deref in gsmi_get_variable commit a769b05eeed7accc4019a1ed9799dd72067f1ce8 upstream. We can get EFI variables without fetching the attribute, so we must allow for that in gsmi. commit 859748255b43 ("efi: pstore: Omit efivars caching EFI varstore access layer") added a new get_variable call with attr=NULL, which triggers panic in gsmi. Fixes: 74c5b31c6618 ("driver: Google EFI SMI") Cc: stable Signed-off-by: Khazhismel Kumykov Link: https://lore.kernel.org/r/20230118010212.1268474-1-khazhy@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/gsmi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c index 4e2575dfeb90..871bedf533a8 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c @@ -361,9 +361,10 @@ static efi_status_t gsmi_get_variable(efi_char16_t *name, memcpy(data, gsmi_dev.data_buf->start, *data_size); /* All variables are have the following attributes */ - *attr = EFI_VARIABLE_NON_VOLATILE | - EFI_VARIABLE_BOOTSERVICE_ACCESS | - EFI_VARIABLE_RUNTIME_ACCESS; + if (attr) + *attr = EFI_VARIABLE_NON_VOLATILE | + EFI_VARIABLE_BOOTSERVICE_ACCESS | + EFI_VARIABLE_RUNTIME_ACCESS; } spin_unlock_irqrestore(&gsmi_dev.lock, flags); From a63171d0979ce13b82645b26e24c8982ff523b70 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 13 Dec 2022 00:02:46 +0200 Subject: [PATCH 483/543] mei: bus: fix unlink on bus in error path commit a43866856125c3c432e2fbb6cc63cee1539ec4a7 upstream. Unconditional call to mei_cl_unlink in mei_cl_bus_dev_release leads to call of the mei_cl_unlink without corresponding mei_cl_link. This leads to miscalculation of open_handle_count (decrease without increase). Call unlink in mei_cldev_enable fail path and remove blanket unlink from mei_cl_bus_dev_release. Fixes: 34f1166afd67 ("mei: bus: need to unlink client before freeing") Cc: Signed-off-by: Alexander Usyskin Reviewed-by: Tomas Winkler Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20221212220247.286019-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/bus.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 46aa3554e97b..7b7f4190cd02 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -665,13 +665,15 @@ void *mei_cldev_dma_map(struct mei_cl_device *cldev, u8 buffer_id, size_t size) if (cl->state == MEI_FILE_UNINITIALIZED) { ret = mei_cl_link(cl); if (ret) - goto out; + goto notlinked; /* update pointers */ cl->cldev = cldev; } ret = mei_cl_dma_alloc_and_map(cl, NULL, buffer_id, size); -out: + if (ret) + mei_cl_unlink(cl); +notlinked: mutex_unlock(&bus->device_lock); if (ret) return ERR_PTR(ret); @@ -721,7 +723,7 @@ int mei_cldev_enable(struct mei_cl_device *cldev) if (cl->state == MEI_FILE_UNINITIALIZED) { ret = mei_cl_link(cl); if (ret) - goto out; + goto notlinked; /* update pointers */ cl->cldev = cldev; } @@ -748,6 +750,9 @@ int mei_cldev_enable(struct mei_cl_device *cldev) } out: + if (ret) + mei_cl_unlink(cl); +notlinked: mutex_unlock(&bus->device_lock); return ret; @@ -1115,7 +1120,6 @@ static void mei_cl_bus_dev_release(struct device *dev) mei_cl_flush_queues(cldev->cl, NULL); mei_me_cl_put(cldev->me_cl); mei_dev_bus_put(cldev->bus); - mei_cl_unlink(cldev->cl); kfree(cldev->cl); kfree(cldev); } From 32b39dccd69bcb09619ac60d71ea2ccdcc0758eb Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Tue, 13 Dec 2022 00:02:47 +0200 Subject: [PATCH 484/543] mei: me: add meteor lake point M DID commit 0c4d68261717f89fa8c4f98a6967c3832fcb3ad0 upstream. Add Meteor Lake Point M device id. Cc: Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Link: https://lore.kernel.org/r/20221212220247.286019-2-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-me-regs.h | 2 ++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 99966cd3e7d8..bdc65d50b945 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -111,6 +111,8 @@ #define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */ +#define MEI_DEV_ID_MTL_M 0x7E70 /* Meteor Lake Point M */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 704cd0caa172..5bf0d50d55a0 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -118,6 +118,8 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_MTL_M, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; From 548ea9dd5e01b0ecf53d2563004c80abd636743d Mon Sep 17 00:00:00 2001 From: Vishnu Dasa Date: Tue, 29 Nov 2022 23:05:11 -0800 Subject: [PATCH 485/543] VMCI: Use threaded irqs instead of tasklets commit 3daed6345d5880464f46adab871d208e1baa2f3a upstream. The vmci_dispatch_dgs() tasklet function calls vmci_read_data() which uses wait_event() resulting in invalid sleep in an atomic context (and therefore potentially in a deadlock). Use threaded irqs to fix this issue and completely remove usage of tasklets. [ 20.264639] BUG: sleeping function called from invalid context at drivers/misc/vmw_vmci/vmci_guest.c:145 [ 20.264643] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 762, name: vmtoolsd [ 20.264645] preempt_count: 101, expected: 0 [ 20.264646] RCU nest depth: 0, expected: 0 [ 20.264647] 1 lock held by vmtoolsd/762: [ 20.264648] #0: ffff0000874ae440 (sk_lock-AF_VSOCK){+.+.}-{0:0}, at: vsock_connect+0x60/0x330 [vsock] [ 20.264658] Preemption disabled at: [ 20.264659] [] vmci_send_datagram+0x44/0xa0 [vmw_vmci] [ 20.264665] CPU: 0 PID: 762 Comm: vmtoolsd Not tainted 5.19.0-0.rc8.20220727git39c3c396f813.60.fc37.aarch64 #1 [ 20.264667] Hardware name: VMware, Inc. VBSA/VBSA, BIOS VEFI 12/31/2020 [ 20.264668] Call trace: [ 20.264669] dump_backtrace+0xc4/0x130 [ 20.264672] show_stack+0x24/0x80 [ 20.264673] dump_stack_lvl+0x88/0xb4 [ 20.264676] dump_stack+0x18/0x34 [ 20.264677] __might_resched+0x1a0/0x280 [ 20.264679] __might_sleep+0x58/0x90 [ 20.264681] vmci_read_data+0x74/0x120 [vmw_vmci] [ 20.264683] vmci_dispatch_dgs+0x64/0x204 [vmw_vmci] [ 20.264686] tasklet_action_common.constprop.0+0x13c/0x150 [ 20.264688] tasklet_action+0x40/0x50 [ 20.264689] __do_softirq+0x23c/0x6b4 [ 20.264690] __irq_exit_rcu+0x104/0x214 [ 20.264691] irq_exit_rcu+0x1c/0x50 [ 20.264693] el1_interrupt+0x38/0x6c [ 20.264695] el1h_64_irq_handler+0x18/0x24 [ 20.264696] el1h_64_irq+0x68/0x6c [ 20.264697] preempt_count_sub+0xa4/0xe0 [ 20.264698] _raw_spin_unlock_irqrestore+0x64/0xb0 [ 20.264701] vmci_send_datagram+0x7c/0xa0 [vmw_vmci] [ 20.264703] vmci_datagram_dispatch+0x84/0x100 [vmw_vmci] [ 20.264706] vmci_datagram_send+0x2c/0x40 [vmw_vmci] [ 20.264709] vmci_transport_send_control_pkt+0xb8/0x120 [vmw_vsock_vmci_transport] [ 20.264711] vmci_transport_connect+0x40/0x7c [vmw_vsock_vmci_transport] [ 20.264713] vsock_connect+0x278/0x330 [vsock] [ 20.264715] __sys_connect_file+0x8c/0xc0 [ 20.264718] __sys_connect+0x84/0xb4 [ 20.264720] __arm64_sys_connect+0x2c/0x3c [ 20.264721] invoke_syscall+0x78/0x100 [ 20.264723] el0_svc_common.constprop.0+0x68/0x124 [ 20.264724] do_el0_svc+0x38/0x4c [ 20.264725] el0_svc+0x60/0x180 [ 20.264726] el0t_64_sync_handler+0x11c/0x150 [ 20.264728] el0t_64_sync+0x190/0x194 Signed-off-by: Vishnu Dasa Suggested-by: Zack Rusin Reported-by: Nadav Amit Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Fixes: 463713eb6164 ("VMCI: dma dg: add support for DMA datagrams receive") Cc: # v5.18+ Cc: VMware PV-Drivers Reviewers Cc: Greg Kroah-Hartman Cc: Bryan Tan Reviewed-by: Bryan Tan Reviewed-by: Zack Rusin Link: https://lore.kernel.org/r/20221130070511.46558-1-vdasa@vmware.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_guest.c | 49 ++++++++++++------------------ 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c index aa7b05de97dd..4f8d962bb5b2 100644 --- a/drivers/misc/vmw_vmci/vmci_guest.c +++ b/drivers/misc/vmw_vmci/vmci_guest.c @@ -56,8 +56,6 @@ struct vmci_guest_device { bool exclusive_vectors; - struct tasklet_struct datagram_tasklet; - struct tasklet_struct bm_tasklet; struct wait_queue_head inout_wq; void *data_buffer; @@ -304,9 +302,8 @@ static int vmci_check_host_caps(struct pci_dev *pdev) * This function assumes that it has exclusive access to the data * in register(s) for the duration of the call. */ -static void vmci_dispatch_dgs(unsigned long data) +static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev) { - struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data; u8 *dg_in_buffer = vmci_dev->data_buffer; struct vmci_datagram *dg; size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE; @@ -465,10 +462,8 @@ static void vmci_dispatch_dgs(unsigned long data) * Scans the notification bitmap for raised flags, clears them * and handles the notifications. */ -static void vmci_process_bitmap(unsigned long data) +static void vmci_process_bitmap(struct vmci_guest_device *dev) { - struct vmci_guest_device *dev = (struct vmci_guest_device *)data; - if (!dev->notification_bitmap) { dev_dbg(dev->dev, "No bitmap present in %s\n", __func__); return; @@ -486,13 +481,13 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev) struct vmci_guest_device *dev = _dev; /* - * If we are using MSI-X with exclusive vectors then we simply schedule - * the datagram tasklet, since we know the interrupt was meant for us. + * If we are using MSI-X with exclusive vectors then we simply call + * vmci_dispatch_dgs(), since we know the interrupt was meant for us. * Otherwise we must read the ICR to determine what to do. */ if (dev->exclusive_vectors) { - tasklet_schedule(&dev->datagram_tasklet); + vmci_dispatch_dgs(dev); } else { unsigned int icr; @@ -502,12 +497,12 @@ static irqreturn_t vmci_interrupt(int irq, void *_dev) return IRQ_NONE; if (icr & VMCI_ICR_DATAGRAM) { - tasklet_schedule(&dev->datagram_tasklet); + vmci_dispatch_dgs(dev); icr &= ~VMCI_ICR_DATAGRAM; } if (icr & VMCI_ICR_NOTIFICATION) { - tasklet_schedule(&dev->bm_tasklet); + vmci_process_bitmap(dev); icr &= ~VMCI_ICR_NOTIFICATION; } @@ -536,7 +531,7 @@ static irqreturn_t vmci_interrupt_bm(int irq, void *_dev) struct vmci_guest_device *dev = _dev; /* For MSI-X we can just assume it was meant for us. */ - tasklet_schedule(&dev->bm_tasklet); + vmci_process_bitmap(dev); return IRQ_HANDLED; } @@ -638,10 +633,6 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, vmci_dev->iobase = iobase; vmci_dev->mmio_base = mmio_base; - tasklet_init(&vmci_dev->datagram_tasklet, - vmci_dispatch_dgs, (unsigned long)vmci_dev); - tasklet_init(&vmci_dev->bm_tasklet, - vmci_process_bitmap, (unsigned long)vmci_dev); init_waitqueue_head(&vmci_dev->inout_wq); if (mmio_base != NULL) { @@ -808,8 +799,9 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, * Request IRQ for legacy or MSI interrupts, or for first * MSI-X vector. */ - error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt, - IRQF_SHARED, KBUILD_MODNAME, vmci_dev); + error = request_threaded_irq(pci_irq_vector(pdev, 0), NULL, + vmci_interrupt, IRQF_SHARED, + KBUILD_MODNAME, vmci_dev); if (error) { dev_err(&pdev->dev, "Irq %u in use: %d\n", pci_irq_vector(pdev, 0), error); @@ -823,9 +815,9 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, * between the vectors. */ if (vmci_dev->exclusive_vectors) { - error = request_irq(pci_irq_vector(pdev, 1), - vmci_interrupt_bm, 0, KBUILD_MODNAME, - vmci_dev); + error = request_threaded_irq(pci_irq_vector(pdev, 1), NULL, + vmci_interrupt_bm, 0, + KBUILD_MODNAME, vmci_dev); if (error) { dev_err(&pdev->dev, "Failed to allocate irq %u: %d\n", @@ -833,9 +825,11 @@ static int vmci_guest_probe_device(struct pci_dev *pdev, goto err_free_irq; } if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) { - error = request_irq(pci_irq_vector(pdev, 2), - vmci_interrupt_dma_datagram, - 0, KBUILD_MODNAME, vmci_dev); + error = request_threaded_irq(pci_irq_vector(pdev, 2), + NULL, + vmci_interrupt_dma_datagram, + 0, KBUILD_MODNAME, + vmci_dev); if (error) { dev_err(&pdev->dev, "Failed to allocate irq %u: %d\n", @@ -871,8 +865,6 @@ err_free_bm_irq: err_free_irq: free_irq(pci_irq_vector(pdev, 0), vmci_dev); - tasklet_kill(&vmci_dev->datagram_tasklet); - tasklet_kill(&vmci_dev->bm_tasklet); err_disable_msi: pci_free_irq_vectors(pdev); @@ -943,9 +935,6 @@ static void vmci_guest_remove_device(struct pci_dev *pdev) free_irq(pci_irq_vector(pdev, 0), vmci_dev); pci_free_irq_vectors(pdev); - tasklet_kill(&vmci_dev->datagram_tasklet); - tasklet_kill(&vmci_dev->bm_tasklet); - if (vmci_dev->notification_bitmap) { /* * The device reset above cleared the bitmap state of the From 4cc5622b2e9acd9712bf5370819a9fa572435883 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sun, 4 Dec 2022 09:46:14 +0100 Subject: [PATCH 486/543] ARM: dts: qcom: apq8084-ifc6540: fix overriding SDHCI commit 0154252a3b87f77db1e44516d1ed2e82e2d29c30 upstream. While changing node names of APQ8084 SDHCI, the ones in IFC6540 board were not updated leading to disabled and misconfigured SDHCI. Cc: Fixes: 2477d81901a2 ("ARM: dts: qcom: Fix sdhci node names - use 'mmc@'") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221204084614.12193-1-krzysztof.kozlowski@linaro.org Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/qcom-apq8084-ifc6540.dts | 22 +++++++++++----------- arch/arm/boot/dts/qcom-apq8084.dtsi | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts b/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts index 44cd72f1b1be..116e59a3b76d 100644 --- a/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts +++ b/arch/arm/boot/dts/qcom-apq8084-ifc6540.dts @@ -19,16 +19,16 @@ serial@f995e000 { status = "okay"; }; - - sdhci@f9824900 { - bus-width = <8>; - non-removable; - status = "okay"; - }; - - sdhci@f98a4900 { - cd-gpios = <&tlmm 122 GPIO_ACTIVE_LOW>; - bus-width = <4>; - }; }; }; + +&sdhc_1 { + bus-width = <8>; + non-removable; + status = "okay"; +}; + +&sdhc_2 { + cd-gpios = <&tlmm 122 GPIO_ACTIVE_LOW>; + bus-width = <4>; +}; diff --git a/arch/arm/boot/dts/qcom-apq8084.dtsi b/arch/arm/boot/dts/qcom-apq8084.dtsi index f2fb7c975af8..69da87577ad0 100644 --- a/arch/arm/boot/dts/qcom-apq8084.dtsi +++ b/arch/arm/boot/dts/qcom-apq8084.dtsi @@ -419,7 +419,7 @@ status = "disabled"; }; - mmc@f9824900 { + sdhc_1: mmc@f9824900 { compatible = "qcom,apq8084-sdhci", "qcom,sdhci-msm-v4"; reg = <0xf9824900 0x11c>, <0xf9824000 0x800>; reg-names = "hc", "core"; @@ -432,7 +432,7 @@ status = "disabled"; }; - mmc@f98a4900 { + sdhc_2: mmc@f98a4900 { compatible = "qcom,apq8084-sdhci", "qcom,sdhci-msm-v4"; reg = <0xf98a4900 0x11c>, <0xf98a4000 0x800>; reg-names = "hc", "core"; From cfb7a66c99adcec285eff9546f998c9c2da8bfe8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 4 Jan 2023 09:35:09 +0100 Subject: [PATCH 487/543] ARM: omap1: fix !ARCH_OMAP1_ANY link failures commit 980a637d11fe8dfc734f508a422185c2de55e669 upstream. While compile-testing randconfig builds for the upcoming boardfile removal, I noticed that an earlier patch of mine was completely broken, and the introduction of CONFIG_ARCH_OMAP1_ANY only replaced one set of build failures with another one, now resulting in link failures like ld: drivers/video/fbdev/omap/omapfb_main.o: in function `omapfb_do_probe': drivers/video/fbdev/omap/omapfb_main.c:1703: undefined reference to `omap_set_dma_priority' ld: drivers/dma/ti/omap-dma.o: in function `omap_dma_free_chan_resources': drivers/dma/ti/omap-dma.c:777: undefined reference to `omap_free_dma' drivers/dma/ti/omap-dma.c:1685: undefined reference to `omap_get_plat_info' ld: drivers/usb/gadget/udc/omap_udc.o: in function `next_in_dma': drivers/usb/gadget/udc/omap_udc.c:820: undefined reference to `omap_get_dma_active_status' I tried reworking it, but the resulting patch ended up much bigger than simply avoiding the original problem of unused-function warnings like arch/arm/mach-omap1/mcbsp.c:76:30: error: unused variable 'omap1_mcbsp_ops' [-Werror,-Wunused-variable] As a result, revert the previous fix, and rearrange the code that produces warnings to hide them. For mcbsp, the #ifdef check can simply be removed as the cpu_is_omapxxx() checks already achieve the same result, while in the io.c the easiest solution appears to be to merge the common map bits into each soc specific portion. This gets cleaned in a nicer way after omap7xx support gets dropped, as the remaining SoCs all have the exact same I/O map. Fixes: 615dce5bf736 ("ARM: omap1: fix build with no SoC selected") Cc: stable@vger.kernel.org Acked-by: Aaro Koskinen Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-omap1/Kconfig | 5 +---- arch/arm/mach-omap1/Makefile | 4 ---- arch/arm/mach-omap1/io.c | 32 +++++++++++++++----------------- arch/arm/mach-omap1/mcbsp.c | 21 --------------------- arch/arm/mach-omap1/pm.h | 7 ------- include/linux/soc/ti/omap1-io.h | 4 ++-- 6 files changed, 18 insertions(+), 55 deletions(-) diff --git a/arch/arm/mach-omap1/Kconfig b/arch/arm/mach-omap1/Kconfig index 538a960257cc..7ec7ada287e0 100644 --- a/arch/arm/mach-omap1/Kconfig +++ b/arch/arm/mach-omap1/Kconfig @@ -4,6 +4,7 @@ menuconfig ARCH_OMAP1 depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 depends on CPU_LITTLE_ENDIAN depends on ATAGS + select ARCH_OMAP select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_OMAP select CLKSRC_MMIO @@ -45,10 +46,6 @@ config ARCH_OMAP16XX select CPU_ARM926T select OMAP_DM_TIMER -config ARCH_OMAP1_ANY - select ARCH_OMAP - def_bool ARCH_OMAP730 || ARCH_OMAP850 || ARCH_OMAP15XX || ARCH_OMAP16XX - config ARCH_OMAP bool diff --git a/arch/arm/mach-omap1/Makefile b/arch/arm/mach-omap1/Makefile index 506074b86333..0615cb0ba580 100644 --- a/arch/arm/mach-omap1/Makefile +++ b/arch/arm/mach-omap1/Makefile @@ -3,8 +3,6 @@ # Makefile for the linux kernel. # -ifdef CONFIG_ARCH_OMAP1_ANY - # Common support obj-y := io.o id.o sram-init.o sram.o time.o irq.o mux.o flash.o \ serial.o devices.o dma.o omap-dma.o fb.o @@ -59,5 +57,3 @@ obj-$(CONFIG_ARCH_OMAP730) += gpio7xx.o obj-$(CONFIG_ARCH_OMAP850) += gpio7xx.o obj-$(CONFIG_ARCH_OMAP15XX) += gpio15xx.o obj-$(CONFIG_ARCH_OMAP16XX) += gpio16xx.o - -endif diff --git a/arch/arm/mach-omap1/io.c b/arch/arm/mach-omap1/io.c index d2db9b8aed3f..0074b011a05a 100644 --- a/arch/arm/mach-omap1/io.c +++ b/arch/arm/mach-omap1/io.c @@ -22,17 +22,14 @@ * The machine specific code may provide the extra mapping besides the * default mapping provided here. */ -static struct map_desc omap_io_desc[] __initdata = { +#if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850) +static struct map_desc omap7xx_io_desc[] __initdata = { { .virtual = OMAP1_IO_VIRT, .pfn = __phys_to_pfn(OMAP1_IO_PHYS), .length = OMAP1_IO_SIZE, .type = MT_DEVICE - } -}; - -#if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850) -static struct map_desc omap7xx_io_desc[] __initdata = { + }, { .virtual = OMAP7XX_DSP_BASE, .pfn = __phys_to_pfn(OMAP7XX_DSP_START), @@ -49,6 +46,12 @@ static struct map_desc omap7xx_io_desc[] __initdata = { #ifdef CONFIG_ARCH_OMAP15XX static struct map_desc omap1510_io_desc[] __initdata = { + { + .virtual = OMAP1_IO_VIRT, + .pfn = __phys_to_pfn(OMAP1_IO_PHYS), + .length = OMAP1_IO_SIZE, + .type = MT_DEVICE + }, { .virtual = OMAP1510_DSP_BASE, .pfn = __phys_to_pfn(OMAP1510_DSP_START), @@ -65,6 +68,12 @@ static struct map_desc omap1510_io_desc[] __initdata = { #if defined(CONFIG_ARCH_OMAP16XX) static struct map_desc omap16xx_io_desc[] __initdata = { + { + .virtual = OMAP1_IO_VIRT, + .pfn = __phys_to_pfn(OMAP1_IO_PHYS), + .length = OMAP1_IO_SIZE, + .type = MT_DEVICE + }, { .virtual = OMAP16XX_DSP_BASE, .pfn = __phys_to_pfn(OMAP16XX_DSP_START), @@ -79,18 +88,9 @@ static struct map_desc omap16xx_io_desc[] __initdata = { }; #endif -/* - * Maps common IO regions for omap1 - */ -static void __init omap1_map_common_io(void) -{ - iotable_init(omap_io_desc, ARRAY_SIZE(omap_io_desc)); -} - #if defined (CONFIG_ARCH_OMAP730) || defined (CONFIG_ARCH_OMAP850) void __init omap7xx_map_io(void) { - omap1_map_common_io(); iotable_init(omap7xx_io_desc, ARRAY_SIZE(omap7xx_io_desc)); } #endif @@ -98,7 +98,6 @@ void __init omap7xx_map_io(void) #ifdef CONFIG_ARCH_OMAP15XX void __init omap15xx_map_io(void) { - omap1_map_common_io(); iotable_init(omap1510_io_desc, ARRAY_SIZE(omap1510_io_desc)); } #endif @@ -106,7 +105,6 @@ void __init omap15xx_map_io(void) #if defined(CONFIG_ARCH_OMAP16XX) void __init omap16xx_map_io(void) { - omap1_map_common_io(); iotable_init(omap16xx_io_desc, ARRAY_SIZE(omap16xx_io_desc)); } #endif diff --git a/arch/arm/mach-omap1/mcbsp.c b/arch/arm/mach-omap1/mcbsp.c index 05c25c432449..b1632cbe37e6 100644 --- a/arch/arm/mach-omap1/mcbsp.c +++ b/arch/arm/mach-omap1/mcbsp.c @@ -89,7 +89,6 @@ static struct omap_mcbsp_ops omap1_mcbsp_ops = { #define OMAP1610_MCBSP2_BASE 0xfffb1000 #define OMAP1610_MCBSP3_BASE 0xe1017000 -#if defined(CONFIG_ARCH_OMAP730) || defined(CONFIG_ARCH_OMAP850) struct resource omap7xx_mcbsp_res[][6] = { { { @@ -159,14 +158,7 @@ static struct omap_mcbsp_platform_data omap7xx_mcbsp_pdata[] = { }; #define OMAP7XX_MCBSP_RES_SZ ARRAY_SIZE(omap7xx_mcbsp_res[1]) #define OMAP7XX_MCBSP_COUNT ARRAY_SIZE(omap7xx_mcbsp_res) -#else -#define omap7xx_mcbsp_res_0 NULL -#define omap7xx_mcbsp_pdata NULL -#define OMAP7XX_MCBSP_RES_SZ 0 -#define OMAP7XX_MCBSP_COUNT 0 -#endif -#ifdef CONFIG_ARCH_OMAP15XX struct resource omap15xx_mcbsp_res[][6] = { { { @@ -266,14 +258,7 @@ static struct omap_mcbsp_platform_data omap15xx_mcbsp_pdata[] = { }; #define OMAP15XX_MCBSP_RES_SZ ARRAY_SIZE(omap15xx_mcbsp_res[1]) #define OMAP15XX_MCBSP_COUNT ARRAY_SIZE(omap15xx_mcbsp_res) -#else -#define omap15xx_mcbsp_res_0 NULL -#define omap15xx_mcbsp_pdata NULL -#define OMAP15XX_MCBSP_RES_SZ 0 -#define OMAP15XX_MCBSP_COUNT 0 -#endif -#ifdef CONFIG_ARCH_OMAP16XX struct resource omap16xx_mcbsp_res[][6] = { { { @@ -373,12 +358,6 @@ static struct omap_mcbsp_platform_data omap16xx_mcbsp_pdata[] = { }; #define OMAP16XX_MCBSP_RES_SZ ARRAY_SIZE(omap16xx_mcbsp_res[1]) #define OMAP16XX_MCBSP_COUNT ARRAY_SIZE(omap16xx_mcbsp_res) -#else -#define omap16xx_mcbsp_res_0 NULL -#define omap16xx_mcbsp_pdata NULL -#define OMAP16XX_MCBSP_RES_SZ 0 -#define OMAP16XX_MCBSP_COUNT 0 -#endif static void omap_mcbsp_register_board_cfg(struct resource *res, int res_count, struct omap_mcbsp_platform_data *config, int size) diff --git a/arch/arm/mach-omap1/pm.h b/arch/arm/mach-omap1/pm.h index d9165709c532..0d1f092821ff 100644 --- a/arch/arm/mach-omap1/pm.h +++ b/arch/arm/mach-omap1/pm.h @@ -106,13 +106,6 @@ #define OMAP7XX_IDLECT3 0xfffece24 #define OMAP7XX_IDLE_LOOP_REQUEST 0x0C00 -#if !defined(CONFIG_ARCH_OMAP730) && \ - !defined(CONFIG_ARCH_OMAP850) && \ - !defined(CONFIG_ARCH_OMAP15XX) && \ - !defined(CONFIG_ARCH_OMAP16XX) -#warning "Power management for this processor not implemented yet" -#endif - #ifndef __ASSEMBLER__ #include diff --git a/include/linux/soc/ti/omap1-io.h b/include/linux/soc/ti/omap1-io.h index f7f12728d4a6..9a60f45899d3 100644 --- a/include/linux/soc/ti/omap1-io.h +++ b/include/linux/soc/ti/omap1-io.h @@ -5,7 +5,7 @@ #ifndef __ASSEMBLER__ #include -#ifdef CONFIG_ARCH_OMAP1_ANY +#ifdef CONFIG_ARCH_OMAP1 /* * NOTE: Please use ioremap + __raw_read/write where possible instead of these */ @@ -15,7 +15,7 @@ extern u32 omap_readl(u32 pa); extern void omap_writeb(u8 v, u32 pa); extern void omap_writew(u16 v, u32 pa); extern void omap_writel(u32 v, u32 pa); -#else +#elif defined(CONFIG_COMPILE_TEST) static inline u8 omap_readb(u32 pa) { return 0; } static inline u16 omap_readw(u32 pa) { return 0; } static inline u32 omap_readl(u32 pa) { return 0; } From af4a3c274a92ee0545d65b9102b398fb75d7cc9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Thu, 12 Jan 2023 14:46:00 +0100 Subject: [PATCH 488/543] drm/amdgpu: fix amdgpu_job_free_resources v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 74ea8e78ab349514c9f4df0be1189d91267d750d upstream. It can be that neither fence were initialized when we run out of UVD streams for example. v2: fix typo breaking compile Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2324 Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index adac650cf544..3bf0e893c07d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -154,8 +154,14 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) struct dma_fence *f; unsigned i; - /* use sched fence if available */ - f = job->base.s_fence ? &job->base.s_fence->finished : &job->hw_fence; + /* Check if any fences where initialized */ + if (job->base.s_fence && job->base.s_fence->finished.ops) + f = &job->base.s_fence->finished; + else if (job->hw_fence.ops) + f = &job->hw_fence; + else + f = NULL; + for (i = 0; i < job->num_ibs; ++i) amdgpu_ib_free(ring->adev, &job->ibs[i], f); } From d693fdf73eb296081523e18d1ac3bc38787e7cab Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Wed, 11 Jan 2023 09:32:15 +0800 Subject: [PATCH 489/543] drm/amdgpu: allow multipipe policy on ASICs with one MEC commit dc88063b87775971be564d79dc1b05f7b8b5c135 upstream. Always enable multipipe policy on ASICs with GC VERSION > 9.0.0 instead of MEC number > 1. This will allow multipipe policy on ASICs with one MEC, e.g., gfx11 APUs. Signed-off-by: Lang Yu Reviewed-by: Aaron Liu Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 9546adc8a76f..99f5e38c4835 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -156,6 +156,9 @@ static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) return amdgpu_compute_multipipe == 1; } + if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)) + return true; + /* FIXME: spreading the queues across pipes causes perf regressions * on POLARIS11 compute workloads */ if (adev->asic_type == CHIP_POLARIS11) From 8dabe4e0daa999b2f11e354a0f67934420e095b4 Mon Sep 17 00:00:00 2001 From: jie1zhan Date: Fri, 13 Jan 2023 10:39:13 +0800 Subject: [PATCH 490/543] drm/amdgpu: Correct the power calcultion for Renior/Cezanne. commit c7bae4aaa5609c1fa9761c35dbcc5fcc92915222 upstream. From smu firmware,the value of power is transferred in units of watts. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2321 Fixes: 137aac26a2ed ("drm/amdgpu/smu12: fix power reporting on renoir") Acked-by: Alex Deucher Signed-off-by: Jesse Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index 85e22210963f..5cdc07165480 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -1171,6 +1171,7 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu, int ret = 0; uint32_t apu_percent = 0; uint32_t dgpu_percent = 0; + struct amdgpu_device *adev = smu->adev; ret = smu_cmn_get_metrics_table(smu, @@ -1196,7 +1197,11 @@ static int renoir_get_smu_metrics_data(struct smu_context *smu, *value = metrics->AverageUvdActivity / 100; break; case METRICS_AVERAGE_SOCKETPOWER: - *value = (metrics->CurrentSocketPower << 8) / 1000; + if (((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1)) && (adev->pm.fw_version >= 0x40000f)) || + ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0)) && (adev->pm.fw_version >= 0x373200))) + *value = metrics->CurrentSocketPower << 8; + else + *value = (metrics->CurrentSocketPower << 8) / 1000; break; case METRICS_TEMPERATURE_EDGE: *value = (metrics->GfxTemperature / 100) * From 38a9b17d3b58a3163bc031e96c94fc2d800e02b7 Mon Sep 17 00:00:00 2001 From: Sasa Dragic Date: Mon, 19 Dec 2022 18:29:27 +0100 Subject: [PATCH 491/543] drm/i915: re-disable RC6p on Sandy Bridge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 67b0b4ed259e425b7eed09da75b42c80682ca003 upstream. RC6p on Sandy Bridge got re-enabled over time, causing visual glitches and GPU hangs. Disabled originally in commit 1c8ecf80fdee ("drm/i915: do not enable RC6p on Sandy Bridge"). Signed-off-by: Sasa Dragic Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221219172927.9603-2-sasa.dragic@gmail.com Fixes: fb6db0f5bf1d ("drm/i915: Remove unsafe i915.enable_rc6") Fixes: 13c5a577b342 ("drm/i915/gt: Select the deepest available parking mode for rc6") Cc: stable@vger.kernel.org Reviewed-by: Rodrigo Vivi (cherry picked from commit 0c8a6e9ea232c221976a0670256bd861408d9917) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index cd4487a1d3be..34f2d9da201e 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -423,7 +423,8 @@ static const struct intel_device_info ilk_m_info = { .has_coherent_ggtt = true, \ .has_llc = 1, \ .has_rc6 = 1, \ - .has_rc6p = 1, \ + /* snb does support rc6p, but enabling it causes various issues */ \ + .has_rc6p = 0, \ .has_rps = true, \ .dma_mask_size = 40, \ .__runtime.ppgtt_type = INTEL_PPGTT_ALIASING, \ From 597747eb58a7c8deb131f89aad7e136dbb63ef7a Mon Sep 17 00:00:00 2001 From: Drew Davenport Date: Mon, 26 Dec 2022 22:53:24 -0700 Subject: [PATCH 492/543] drm/i915/display: Check source height is > 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8565c502e7c156d190d8e6d36e443f51b257f165 upstream. The error message suggests that the height of the src rect must be at least 1. Reject source with height of 0. Cc: stable@vger.kernel.org Signed-off-by: Drew Davenport Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20221226225246.1.I15dff7bb5a0e485c862eae61a69096caf12ef29f@changeid (cherry picked from commit 0fe76b198d482b41771a8d17b45fb726d13083cf) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/display/skl_universal_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 7cb713043408..bc523a3d1d42 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1620,7 +1620,7 @@ static int skl_check_main_surface(struct intel_plane_state *plane_state) u32 offset; int ret; - if (w > max_width || w < min_width || h > max_height) { + if (w > max_width || w < min_width || h > max_height || h < 1) { drm_dbg_kms(&dev_priv->drm, "requested Y/RGB source size %dx%d outside limits (min: %dx1 max: %dx%d)\n", w, h, min_width, max_width, max_height); From 4397bcbd729dd79af4cc53615bdca59f80e74643 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 16 Jan 2023 12:54:23 +0100 Subject: [PATCH 493/543] drm/i915: Allow switching away via vga-switcheroo if uninitialized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a273e95721e96885971a05f1b34cb6d093904d9d upstream. Always allow switching away via vga-switcheroo if the display is uninitalized. Instead prevent switching to i915 if the device has not been initialized. This issue was introduced by commit 5df7bd130818 ("drm/i915: skip display initialization when there is no display") protected, which protects code paths from being executed on uninitialized devices. In the case of vga-switcheroo, we want to allow a switch away from i915's device. So run vga_switcheroo_process_delayed_switch() and test in the switcheroo callbacks if the i915 device is available. Fixes: 5df7bd130818 ("drm/i915: skip display initialization when there is no display") Signed-off-by: Thomas Zimmermann Reviewed-by: Alex Deucher Cc: Radhakrishna Sripada Cc: Lucas De Marchi Cc: José Roberto de Souza Cc: Jani Nikula Cc: Ville Syrjälä Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Cc: "Ville Syrjälä" Cc: Manasi Navare Cc: Stanislav Lisovskiy Cc: Imre Deak Cc: "Jouni Högander" Cc: Uma Shankar Cc: Ankit Nautiyal Cc: "Jason A. Donenfeld" Cc: Matt Roper Cc: Ramalingam C Cc: Thomas Zimmermann Cc: Andi Shyti Cc: Andrzej Hajda Cc: "José Roberto de Souza" Cc: Julia Lawall Cc: intel-gfx@lists.freedesktop.org Cc: # v5.14+ Link: https://patchwork.freedesktop.org/patch/msgid/20230116115425.13484-2-tzimmermann@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_driver.c | 3 +-- drivers/gpu/drm/i915/i915_switcheroo.c | 6 +++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 2ce30cff461a..2b2a9d4b3059 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1074,8 +1074,7 @@ static void i915_driver_lastclose(struct drm_device *dev) intel_fbdev_restore_mode(dev); - if (HAS_DISPLAY(i915)) - vga_switcheroo_process_delayed_switch(); + vga_switcheroo_process_delayed_switch(); } static void i915_driver_postclose(struct drm_device *dev, struct drm_file *file) diff --git a/drivers/gpu/drm/i915/i915_switcheroo.c b/drivers/gpu/drm/i915/i915_switcheroo.c index 23777d500cdf..f45bd6b6cede 100644 --- a/drivers/gpu/drm/i915/i915_switcheroo.c +++ b/drivers/gpu/drm/i915/i915_switcheroo.c @@ -19,6 +19,10 @@ static void i915_switcheroo_set_state(struct pci_dev *pdev, dev_err(&pdev->dev, "DRM not initialized, aborting switch.\n"); return; } + if (!HAS_DISPLAY(i915)) { + dev_err(&pdev->dev, "Device state not initialized, aborting switch.\n"); + return; + } if (state == VGA_SWITCHEROO_ON) { drm_info(&i915->drm, "switched on\n"); @@ -44,7 +48,7 @@ static bool i915_switcheroo_can_switch(struct pci_dev *pdev) * locking inversion with the driver load path. And the access here is * completely racy anyway. So don't bother with locking for now. */ - return i915 && atomic_read(&i915->drm.open_count) == 0; + return i915 && HAS_DISPLAY(i915) && atomic_read(&i915->drm.open_count) == 0; } static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { From 92c39d6995923103f952c94ba0e9b859e182d2a8 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 18 Jan 2023 18:06:24 +0100 Subject: [PATCH 494/543] drm/i915: Remove unused variable commit 2293a73ad4f3b6c37c06713ff1b67659d92ef43d upstream. Removed unused i915 var. Fixes: a273e95721e9 ("drm/i915: Allow switching away via vga-switcheroo if uninitialized") Signed-off-by: Nirmoy Das Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20230118170624.9326-1-nirmoy.das@intel.com Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_driver.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 2b2a9d4b3059..35bc2a3fa811 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1070,8 +1070,6 @@ static int i915_driver_open(struct drm_device *dev, struct drm_file *file) */ static void i915_driver_lastclose(struct drm_device *dev) { - struct drm_i915_private *i915 = to_i915(dev); - intel_fbdev_restore_mode(dev); vga_switcheroo_process_delayed_switch(); From 032cebdff71e92d6aa6fd2a8550eb7c9898553e0 Mon Sep 17 00:00:00 2001 From: hongao Date: Tue, 22 Nov 2022 19:20:34 +0800 Subject: [PATCH 495/543] drm/amd/display: Fix set scaling doesn's work commit 040625ab82ce6dca7772cb3867fe5c9eb279a344 upstream. [Why] Setting scaling does not correctly update CRTC state. As a result dc stream state's src (composition area) && dest (addressable area) was not calculated as expected. This causes set scaling doesn's work. [How] Correctly update CRTC state when setting scaling property. Reviewed-by: Harry Wentland Tested-by: Rodrigo Siqueira Signed-off-by: hongao Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index dacad8b85963..855d89eee472 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9433,8 +9433,8 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, goto fail; } - if (dm_old_con_state->abm_level != - dm_new_con_state->abm_level) + if (dm_old_con_state->abm_level != dm_new_con_state->abm_level || + dm_old_con_state->scaling != dm_new_con_state->scaling) new_crtc_state->connectors_changed = true; } From 87e84d0ff97cf8d508910d793866083a918d8cab Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Tue, 10 Jan 2023 20:12:21 +0000 Subject: [PATCH 496/543] drm/amd/display: Calculate output_color_space after pixel encoding adjustment commit 79601b894849cb6f6d6122e6590f1887ac4a66b3 upstream. Code in get_output_color_space depends on knowing the pixel encoding to determine whether to pick between eg. COLOR_SPACE_SRGB or COLOR_SPACE_YCBCR709 for transparent RGB -> YCbCr 4:4:4 in the driver. v2: Fixed patch being accidentally based on a personal feature branch, oops! Fixes: ea117312ea9f ("drm/amd/display: Reduce HDMI pixel encoding if max clock is exceeded") Reviewed-by: Melissa Wen Signed-off-by: Joshua Ashton Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 855d89eee472..958dbeac1eaf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5283,8 +5283,6 @@ static void fill_stream_properties_from_drm_display_mode( timing_out->aspect_ratio = get_aspect_ratio(mode_in); - stream->output_color_space = get_output_color_space(timing_out); - stream->out_transfer_func->type = TF_TYPE_PREDEFINED; stream->out_transfer_func->tf = TRANSFER_FUNCTION_SRGB; if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) { @@ -5295,6 +5293,8 @@ static void fill_stream_properties_from_drm_display_mode( adjust_colour_depth_from_display_info(timing_out, info); } } + + stream->output_color_space = get_output_color_space(timing_out); } static void fill_audio_info(struct audio_info *audio_info, From b105b79ea05029ba7f55d826e6c0493a45620573 Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Tue, 10 Jan 2023 22:50:42 +0000 Subject: [PATCH 497/543] drm/amd/display: Fix COLOR_SPACE_YCBCR2020_TYPE matrix commit 973a9c810c785ac270a6d50d8cf862b0c1643a10 upstream. The YCC conversion matrix for RGB -> COLOR_SPACE_YCBCR2020_TYPE is missing the values for the fourth column of the matrix. The fourth column of the matrix is essentially just a value that is added given that the color is 3 components in size. These values are needed to bias the chroma from the [-1, 1] -> [0, 1] range. This fixes color being very green when using Gamescope HDR on HDMI output which prefers YCC 4:4:4. Fixes: 40df2f809e8f ("drm/amd/display: color space ycbcr709 support") Reviewed-by: Melissa Wen Signed-off-by: Joshua Ashton Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c index 7c2e3b8dc26a..95562efad651 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c @@ -90,8 +90,8 @@ static const struct out_csc_color_matrix_type output_csc_matrix[] = { { 0xE00, 0xF349, 0xFEB7, 0x1000, 0x6CE, 0x16E3, 0x24F, 0x200, 0xFCCB, 0xF535, 0xE00, 0x1000} }, { COLOR_SPACE_YCBCR2020_TYPE, - { 0x1000, 0xF149, 0xFEB7, 0x0000, 0x0868, 0x15B2, - 0x01E6, 0x0000, 0xFB88, 0xF478, 0x1000, 0x0000} }, + { 0x1000, 0xF149, 0xFEB7, 0x1004, 0x0868, 0x15B2, + 0x01E6, 0x201, 0xFB88, 0xF478, 0x1000, 0x1004} }, { COLOR_SPACE_YCBCR709_BLACK_TYPE, { 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x0000, 0x0000, 0x0200, 0x0000, 0x0000, 0x0000, 0x1000} }, From 3ad10fc4ad37be5adfb02f6d493d092dec9b4c55 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 18 Jan 2023 09:19:21 -0500 Subject: [PATCH 498/543] drm/amd/display: disable S/G display on DCN 3.1.5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e78cc6a4c7486f50c2786d91dd7d9649a87d1dcb upstream. Causes flickering or white screens in some configurations. Disable it for now until we can fix the issue. Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2354 Cc: roman.li@amd.com Cc: yifan1.zhang@amd.com Acked-by: Christian König Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 958dbeac1eaf..0d876a196bd3 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1513,7 +1513,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): case IP_VERSION(3, 1, 4): - case IP_VERSION(3, 1, 5): case IP_VERSION(3, 1, 6): init_data.flags.gpu_vm_support = true; break; From bf9261e4536f1165e67572ffbdd768c90f8eebef Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 18 Jan 2023 09:21:22 -0500 Subject: [PATCH 499/543] drm/amd/display: disable S/G display on DCN 3.1.4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a52287d66dfa1cca32e6273623b63ba39d87f126 upstream. Causes flickering or white screens in some configurations. Disable it for now until we can fix the issue. Cc: roman.li@amd.com Cc: yifan1.zhang@amd.com Acked-by: Christian König Reviewed-by: Roman Li Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0d876a196bd3..e10f1f15c9c4 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1512,7 +1512,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): - case IP_VERSION(3, 1, 4): case IP_VERSION(3, 1, 6): init_data.flags.gpu_vm_support = true; break; From b542cb024a5d981b4385e03bdeeaabcdd725c9f3 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Mon, 12 Dec 2022 23:39:37 -0300 Subject: [PATCH 500/543] cifs: reduce roundtrips on create/qinfo requests commit c877ce47e1378dbafa6f1bf84c0c83a05ca8972a upstream. To work around some Window servers that return STATUS_OBJECT_NAME_INVALID on query infos under DFS namespaces that contain non-ASCII characters, we started checking for -ENOENT on every file open, and if so, then send additional requests to figure out whether it is a DFS link or not. It means that all those requests will be sent to every non-existing file. So, in order to reduce the number of roundtrips, check earlier whether status code is STATUS_OBJECT_NAME_INVALID and tcon supports dfs, and if so, then map -ENOENT to -EREMOTE so mount or automount will take care of chasing the DFS link -- if it isn't an DFS link, then -ENOENT will be returned appropriately. Before patch SMB2 438 Create Request File: ada.test\dfs\foo;GetInfo Request... SMB2 310 Create Response, Error: STATUS_OBJECT_NAME_NOT_FOUND;... SMB2 228 Ioctl Request FSCTL_DFS_GET_REFERRALS, File: \ada.test\dfs\foo SMB2 143 Ioctl Response, Error: STATUS_OBJECT_PATH_NOT_FOUND SMB2 438 Create Request File: ada.test\dfs\foo;GetInfo Request... SMB2 310 Create Response, Error: STATUS_OBJECT_NAME_NOT_FOUND;... SMB2 228 Ioctl Request FSCTL_DFS_GET_REFERRALS, File: \ada.test\dfs\foo SMB2 143 Ioctl Response, Error: STATUS_OBJECT_PATH_NOT_FOUND After patch SMB2 438 Create Request File: ada.test\dfs\foo;GetInfo Request... SMB2 310 Create Response, Error: STATUS_OBJECT_NAME_NOT_FOUND;... SMB2 438 Create Request File: ada.test\dfs\foo;GetInfo Request... SMB2 310 Create Response, Error: STATUS_OBJECT_NAME_NOT_FOUND;... Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 16 ---------------- fs/cifs/inode.c | 6 ------ fs/cifs/misc.c | 45 --------------------------------------------- fs/cifs/smb2inode.c | 45 ++++++++++++++++++++++++++++++++------------- fs/cifs/smb2ops.c | 28 ++++++++++++++++++++++++---- 5 files changed, 56 insertions(+), 84 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index fde1c371605a..eab36e4ea130 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -3554,9 +3554,6 @@ static int is_path_remote(struct mount_ctx *mnt_ctx) struct cifs_tcon *tcon = mnt_ctx->tcon; struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; char *full_path; -#ifdef CONFIG_CIFS_DFS_UPCALL - bool nodfs = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS; -#endif if (!server->ops->is_path_accessible) return -EOPNOTSUPP; @@ -3573,19 +3570,6 @@ static int is_path_remote(struct mount_ctx *mnt_ctx) rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, full_path); -#ifdef CONFIG_CIFS_DFS_UPCALL - if (nodfs) { - if (rc == -EREMOTE) - rc = -EOPNOTSUPP; - goto out; - } - - /* path *might* exist with non-ASCII characters in DFS root - * try again with full path (only if nodfs is not set) */ - if (rc == -ENOENT && is_tcon_dfs(tcon)) - rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon, cifs_sb, - full_path); -#endif if (rc != 0 && rc != -EREMOTE) goto out; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 4e2ca3c6e5c0..0c9b619e4386 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -991,12 +991,6 @@ int cifs_get_inode_info(struct inode **inode, const char *full_path, } rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path, &tmp_data, &adjust_tz, &is_reparse_point); -#ifdef CONFIG_CIFS_DFS_UPCALL - if (rc == -ENOENT && is_tcon_dfs(tcon)) - rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon, - cifs_sb, - full_path); -#endif data = &tmp_data; } diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 1cbecd64d697..062175994e87 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1314,49 +1314,4 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; return 0; } - -/** cifs_dfs_query_info_nonascii_quirk - * Handle weird Windows SMB server behaviour. It responds with - * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request - * for "\\\" DFS reference, - * where contains non-ASCII unicode symbols. - * - * Check such DFS reference. - */ -int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid, - struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, - const char *linkpath) -{ - char *treename, *dfspath, sep; - int treenamelen, linkpathlen, rc; - - treename = tcon->tree_name; - /* MS-DFSC: All paths in REQ_GET_DFS_REFERRAL and RESP_GET_DFS_REFERRAL - * messages MUST be encoded with exactly one leading backslash, not two - * leading backslashes. - */ - sep = CIFS_DIR_SEP(cifs_sb); - if (treename[0] == sep && treename[1] == sep) - treename++; - linkpathlen = strlen(linkpath); - treenamelen = strnlen(treename, MAX_TREE_SIZE + 1); - dfspath = kzalloc(treenamelen + linkpathlen + 1, GFP_KERNEL); - if (!dfspath) - return -ENOMEM; - if (treenamelen) - memcpy(dfspath, treename, treenamelen); - memcpy(dfspath + treenamelen, linkpath, linkpathlen); - rc = dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls, - cifs_remap(cifs_sb), dfspath, NULL, NULL); - if (rc == 0) { - cifs_dbg(FYI, "DFS ref '%s' is found, emulate -EREMOTE\n", - dfspath); - rc = -EREMOTE; - } else { - cifs_dbg(FYI, "%s: dfs_cache_find returned %d\n", __func__, rc); - } - kfree(dfspath); - return rc; -} #endif diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 68e08c85fbb8..6c84d2983166 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -540,22 +540,41 @@ int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN, create_options, ACL_NO_MODE, data, SMB2_OP_QUERY_INFO, cfile, err_iov, err_buftype); - if (rc == -EOPNOTSUPP) { - if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER && - ((struct smb2_hdr *)err_iov[0].iov_base)->Command == SMB2_CREATE && - ((struct smb2_hdr *)err_iov[0].iov_base)->Status == STATUS_STOPPED_ON_SYMLINK) { - rc = smb2_parse_symlink_response(cifs_sb, err_iov, &data->symlink_target); + if (rc) { + struct smb2_hdr *hdr = err_iov[0].iov_base; + + if (unlikely(!hdr || err_buftype[0] == CIFS_NO_BUFFER)) + goto out; + if (rc == -EOPNOTSUPP && hdr->Command == SMB2_CREATE && + hdr->Status == STATUS_STOPPED_ON_SYMLINK) { + rc = smb2_parse_symlink_response(cifs_sb, err_iov, + &data->symlink_target); if (rc) goto out; - } - *reparse = true; - create_options |= OPEN_REPARSE_POINT; - /* Failed on a symbolic link - query a reparse point info */ - cifs_get_readable_path(tcon, full_path, &cfile); - rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, - FILE_OPEN, create_options, ACL_NO_MODE, data, - SMB2_OP_QUERY_INFO, cfile, NULL, NULL); + *reparse = true; + create_options |= OPEN_REPARSE_POINT; + + /* Failed on a symbolic link - query a reparse point info */ + cifs_get_readable_path(tcon, full_path, &cfile); + rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, + FILE_READ_ATTRIBUTES, FILE_OPEN, + create_options, ACL_NO_MODE, data, + SMB2_OP_QUERY_INFO, cfile, NULL, NULL); + goto out; + } else if (rc != -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && + hdr->Status == STATUS_OBJECT_NAME_INVALID) { + /* + * Handle weird Windows SMB server behaviour. It responds with + * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request + * for "\\\" DFS reference, + * where contains non-ASCII unicode symbols. + */ + rc = -EREMOTE; + } + if (rc == -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && cifs_sb && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)) + rc = -EOPNOTSUPP; } out: diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 8da98918cf86..1ff5b6b0e07a 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -797,7 +797,9 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, int rc; __le16 *utf16_path; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; + int err_buftype = CIFS_NO_BUFFER; struct cifs_open_parms oparms; + struct kvec err_iov = {}; struct cifs_fid fid; struct cached_fid *cfid; @@ -821,14 +823,32 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, oparms.fid = &fid; oparms.reconnect = false; - rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL, - NULL); + rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, + &err_iov, &err_buftype); if (rc) { - kfree(utf16_path); - return rc; + struct smb2_hdr *hdr = err_iov.iov_base; + + if (unlikely(!hdr || err_buftype == CIFS_NO_BUFFER)) + goto out; + /* + * Handle weird Windows SMB server behaviour. It responds with + * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request + * for "\\\" DFS reference, + * where contains non-ASCII unicode symbols. + */ + if (rc != -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && + hdr->Status == STATUS_OBJECT_NAME_INVALID) + rc = -EREMOTE; + if (rc == -EREMOTE && IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) && cifs_sb && + (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)) + rc = -EOPNOTSUPP; + goto out; } rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + +out: + free_rsp_buf(err_buftype, err_iov.iov_base); kfree(utf16_path); return rc; } From ff3b1a624380c14b81f4e51c48e404a45f047aab Mon Sep 17 00:00:00 2001 From: Alon Zahavi Date: Mon, 15 Aug 2022 14:07:12 +0300 Subject: [PATCH 501/543] fs/ntfs3: Fix attr_punch_hole() null pointer derenference commit 6d5c9e79b726cc473d40e9cb60976dbe8e669624 upstream. The bug occours due to a misuse of `attr` variable instead of `attr_b`. `attr` is being initialized as NULL, then being derenfernced as `attr->res.data_size`. This bug causes a crash of the ntfs3 driver itself, If compiled directly to the kernel, it crashes the whole system. Signed-off-by: Alon Zahavi Co-developed-by: Tal Lossos Signed-off-by: Tal Lossos Signed-off-by: Konstantin Komarov Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/attrib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 578c2bcfb1d9..63169529b52c 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -2038,7 +2038,7 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) return -ENOENT; if (!attr_b->non_res) { - u32 data_size = le32_to_cpu(attr->res.data_size); + u32 data_size = le32_to_cpu(attr_b->res.data_size); u32 from, to; if (vbo > data_size) From f75a91c82dc805af8f718ff106ec9c090234b37b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 5 Dec 2022 11:31:25 +0100 Subject: [PATCH 502/543] arm64: efi: Execute runtime services from a dedicated stack commit ff7a167961d1b97e0e205f245f806e564d3505e7 upstream. With the introduction of PRMT in the ACPI subsystem, the EFI rts workqueue is no longer the only caller of efi_call_virt_pointer() in the kernel. This means the EFI runtime services lock is no longer sufficient to manage concurrent calls into firmware, but also that firmware calls may occur that are not marshalled via the workqueue mechanism, but originate directly from the caller context. For added robustness, and to ensure that the runtime services have 8 KiB of stack space available as per the EFI spec, introduce a spinlock protected EFI runtime stack of 8 KiB, where the spinlock also ensures serialization between the EFI rts workqueue (which itself serializes EFI runtime calls) and other callers of efi_call_virt_pointer(). While at it, use the stack pivot to avoid reloading the shadow call stack pointer from the ordinary stack, as doing so could produce a gadget to defeat it. Signed-off-by: Ard Biesheuvel Cc: Lee Jones Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/efi.h | 3 +++ arch/arm64/kernel/efi-rt-wrapper.S | 13 ++++++++++++- arch/arm64/kernel/efi.c | 27 +++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 439e2bc5d5d8..b9f3165075c9 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -25,6 +25,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); ({ \ efi_virtmap_load(); \ __efi_fpsimd_begin(); \ + spin_lock(&efi_rt_lock); \ }) #undef arch_efi_call_virt @@ -33,10 +34,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define arch_efi_call_virt_teardown() \ ({ \ + spin_unlock(&efi_rt_lock); \ __efi_fpsimd_end(); \ efi_virtmap_unload(); \ }) +extern spinlock_t efi_rt_lock; efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 75691a2641c1..b2786b968fee 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -16,6 +16,12 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) */ stp x1, x18, [sp, #16] + ldr_l x16, efi_rt_stack_top + mov sp, x16 +#ifdef CONFIG_SHADOW_CALL_STACK + str x18, [sp, #-16]! +#endif + /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the @@ -29,6 +35,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x4, x6 blr x8 + mov sp, x29 ldp x1, x2, [sp, #16] cmp x2, x18 ldp x29, x30, [sp], #32 @@ -42,6 +49,10 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) * called with preemption disabled and a separate shadow stack is used * for interrupts. */ - mov x18, x2 +#ifdef CONFIG_SHADOW_CALL_STACK + ldr_l x18, efi_rt_stack_top + ldr x18, [x18, #-16] +#endif + b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index a908a37f0367..386bd81ca12b 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -144,3 +144,30 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } + +DEFINE_SPINLOCK(efi_rt_lock); + +asmlinkage u64 *efi_rt_stack_top __ro_after_init; + +/* EFI requires 8 KiB of stack space for runtime services */ +static_assert(THREAD_SIZE >= SZ_8K); + +static int __init arm64_efi_rt_init(void) +{ + void *p; + + if (!efi_enabled(EFI_RUNTIME_SERVICES)) + return 0; + + p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL, + NUMA_NO_NODE, &&l); +l: if (!p) { + pr_warn("Failed to allocate EFI runtime stack\n"); + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); + return -ENOMEM; + } + + efi_rt_stack_top = p + THREAD_SIZE; + return 0; +} +core_initcall(arm64_efi_rt_init); From 72b0e5faa5149f09c6a7a74e4012f29e33509bab Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 9 Jan 2023 12:41:46 +0100 Subject: [PATCH 503/543] efi: rt-wrapper: Add missing include commit 18bba1843fc7f264f58c9345d00827d082f9c558 upstream. Add the missing #include of asm/assembler.h, which is where the ldr_l macro is defined. Fixes: ff7a167961d1b97e ("arm64: efi: Execute runtime services from a dedicated stack") Signed-off-by: Ard Biesheuvel Cc: Lee Jones Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/efi-rt-wrapper.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index b2786b968fee..2d3c4b02393e 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -4,6 +4,7 @@ */ #include +#include SYM_FUNC_START(__efi_rt_asm_wrapper) stp x29, x30, [sp, #-32]! From acc767cc70ce10ea5b001b6c92a535c476a58d49 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 15:43:21 -0800 Subject: [PATCH 504/543] panic: Separate sysctl logic from CONFIG_SMP commit 9360d035a579d95d1e76c471061b9065b18a0eb1 upstream. In preparation for adding more sysctls directly in kernel/panic.c, split CONFIG_SMP from the logic that adds sysctls. Cc: Petr Mladek Cc: Andrew Morton Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: Sebastian Andrzej Siewior Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-1-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- kernel/panic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/panic.c b/kernel/panic.c index da323209f583..d843d036651e 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -75,8 +75,9 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list); EXPORT_SYMBOL(panic_notifier_list); -#if defined(CONFIG_SMP) && defined(CONFIG_SYSCTL) +#ifdef CONFIG_SYSCTL static struct ctl_table kern_panic_table[] = { +#ifdef CONFIG_SMP { .procname = "oops_all_cpu_backtrace", .data = &sysctl_oops_all_cpu_backtrace, @@ -86,6 +87,7 @@ static struct ctl_table kern_panic_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#endif { } }; From 767997ef5dc0de5f395b69f5f26eb8b01c65ae1a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 17 Nov 2022 15:43:22 -0800 Subject: [PATCH 505/543] exit: Put an upper limit on how often we can oops commit d4ccd54d28d3c8598e2354acc13e28c060961dbb upstream. Many Linux systems are configured to not panic on oops; but allowing an attacker to oops the system **really** often can make even bugs that look completely unexploitable exploitable (like NULL dereferences and such) if each crash elevates a refcount by one or a lock is taken in read mode, and this causes a counter to eventually overflow. The most interesting counters for this are 32 bits wide (like open-coded refcounts that don't use refcount_t). (The ldsem reader count on 32-bit platforms is just 16 bits, but probably nobody cares about 32-bit platforms that much nowadays.) So let's panic the system if the kernel is constantly oopsing. The speed of oopsing 2^32 times probably depends on several factors, like how long the stack trace is and which unwinder you're using; an empirically important one is whether your console is showing a graphical environment or a text console that oopses will be printed to. In a quick single-threaded benchmark, it looks like oopsing in a vfork() child with a very short stack trace only takes ~510 microseconds per run when a graphical console is active; but switching to a text console that oopses are printed to slows it down around 87x, to ~45 milliseconds per run. (Adding more threads makes this faster, but the actual oops printing happens under &die_lock on x86, so you can maybe speed this up by a factor of around 2 and then any further improvement gets eaten up by lock contention.) It looks like it would take around 8-12 days to overflow a 32-bit counter with repeated oopsing on a multi-core X86 system running a graphical environment; both me (in an X86 VM) and Seth (with a distro kernel on normal hardware in a standard configuration) got numbers in that ballpark. 12 days aren't *that* short on a desktop system, and you'd likely need much longer on a typical server system (assuming that people don't run graphical desktop environments on their servers), and this is a *very* noisy and violent approach to exploiting the kernel; and it also seems to take orders of magnitude longer on some machines, probably because stuff like EFI pstore will slow it down a ton if that's active. Signed-off-by: Jann Horn Link: https://lore.kernel.org/r/20221107201317.324457-1-jannh@google.com Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-2-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/sysctl/kernel.rst | 8 ++++ kernel/exit.c | 42 +++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index c2c64c1b706f..6075fa604fc9 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -667,6 +667,14 @@ This is the default behavior. an oops event is detected. +oops_limit +========== + +Number of kernel oopses after which the kernel should panic when +``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect +as setting ``panic_on_oops=1``. + + osrelease, ostype & version =========================== diff --git a/kernel/exit.c b/kernel/exit.c index 35e0a31a0315..2ab3ead62118 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -72,6 +72,33 @@ #include #include +/* + * The default value should be high enough to not crash a system that randomly + * crashes its kernel from time to time, but low enough to at least not permit + * overflowing 32-bit refcounts or the ldsem writer count. + */ +static unsigned int oops_limit = 10000; + +#ifdef CONFIG_SYSCTL +static struct ctl_table kern_exit_table[] = { + { + .procname = "oops_limit", + .data = &oops_limit, + .maxlen = sizeof(oops_limit), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { } +}; + +static __init int kernel_exit_sysctls_init(void) +{ + register_sysctl_init("kernel", kern_exit_table); + return 0; +} +late_initcall(kernel_exit_sysctls_init); +#endif + static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; @@ -874,6 +901,8 @@ void __noreturn do_exit(long code) void __noreturn make_task_dead(int signr) { + static atomic_t oops_count = ATOMIC_INIT(0); + /* * Take the task off the cpu after something catastrophic has * happened. @@ -897,6 +926,19 @@ void __noreturn make_task_dead(int signr) preempt_count_set(PREEMPT_ENABLED); } + /* + * Every time the system oopses, if the oops happens while a reference + * to an object was held, the reference leaks. + * If the oops doesn't also leak memory, repeated oopsing can cause + * reference counters to wrap around (if they're not using refcount_t). + * This means that repeated oopsing can make unexploitable-looking bugs + * exploitable through repeated oopsing. + * To make sure this can't happen, place an upper bound on how often the + * kernel may oops without panic(). + */ + if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit)) + panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); + /* * We're taking recursive faults here in make_task_dead. Safest is to just * leave this task alone and wait for reboot. From 46cacd7913c86f84b784dc2e0151bfaefde315d7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 15:43:23 -0800 Subject: [PATCH 506/543] exit: Expose "oops_count" to sysfs commit 9db89b41117024f80b38b15954017fb293133364 upstream. Since Oops count is now tracked and is a fairly interesting signal, add the entry /sys/kernel/oops_count to expose it to userspace. Cc: "Eric W. Biederman" Cc: Jann Horn Cc: Arnd Bergmann Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-3-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-kernel-oops_count | 6 +++++ MAINTAINERS | 1 + kernel/exit.c | 22 +++++++++++++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-oops_count diff --git a/Documentation/ABI/testing/sysfs-kernel-oops_count b/Documentation/ABI/testing/sysfs-kernel-oops_count new file mode 100644 index 000000000000..156cca9dbc96 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-oops_count @@ -0,0 +1,6 @@ +What: /sys/kernel/oops_count +Date: November 2022 +KernelVersion: 6.2.0 +Contact: Linux Kernel Hardening List +Description: + Shows how many times the system has Oopsed since last boot. diff --git a/MAINTAINERS b/MAINTAINERS index 886d3f69ee64..69d0342ce36e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11112,6 +11112,7 @@ M: Kees Cook L: linux-hardening@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening +F: Documentation/ABI/testing/sysfs-kernel-oops_count F: include/linux/overflow.h F: include/linux/randomize_kstack.h F: mm/usercopy.c diff --git a/kernel/exit.c b/kernel/exit.c index 2ab3ead62118..dc1a32149f94 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include @@ -99,6 +100,25 @@ static __init int kernel_exit_sysctls_init(void) late_initcall(kernel_exit_sysctls_init); #endif +static atomic_t oops_count = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%d\n", atomic_read(&oops_count)); +} + +static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count); + +static __init int kernel_exit_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL); + return 0; +} +late_initcall(kernel_exit_sysfs_init); +#endif + static void __unhash_process(struct task_struct *p, bool group_dead) { nr_threads--; @@ -901,8 +921,6 @@ void __noreturn do_exit(long code) void __noreturn make_task_dead(int signr) { - static atomic_t oops_count = ATOMIC_INIT(0); - /* * Take the task off the cpu after something catastrophic has * happened. From e0738725bbf64b4c24ea8da4bbc70904e5d3f5f8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 2 Dec 2022 12:59:11 -0800 Subject: [PATCH 507/543] exit: Allow oops_limit to be disabled commit de92f65719cd672f4b48397540b9f9eff67eca40 upstream. In preparation for keeping oops_limit logic in sync with warn_limit, have oops_limit == 0 disable checking the Oops counter. Cc: Jann Horn Cc: Jonathan Corbet Cc: Andrew Morton Cc: Baolin Wang Cc: "Jason A. Donenfeld" Cc: Eric Biggers Cc: Huang Ying Cc: "Eric W. Biederman" Cc: Arnd Bergmann Cc: linux-doc@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/sysctl/kernel.rst | 5 +++-- kernel/exit.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 6075fa604fc9..0d9b66f9b0f4 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -671,8 +671,9 @@ oops_limit ========== Number of kernel oopses after which the kernel should panic when -``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect -as setting ``panic_on_oops=1``. +``panic_on_oops`` is not set. Setting this to 0 disables checking +the count. Setting this to 1 has the same effect as setting +``panic_on_oops=1``. The default value is 10000. osrelease, ostype & version diff --git a/kernel/exit.c b/kernel/exit.c index dc1a32149f94..deffb8e4b1b2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -954,7 +954,7 @@ void __noreturn make_task_dead(int signr) * To make sure this can't happen, place an upper bound on how often the * kernel may oops without panic(). */ - if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit)) + if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit) && oops_limit) panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); /* From 13aa82f0072757bf90aaf690b07971edd7d8c183 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 15:43:24 -0800 Subject: [PATCH 508/543] panic: Consolidate open-coded panic_on_warn checks commit 79cc1ba7badf9e7a12af99695a557e9ce27ee967 upstream. Several run-time checkers (KASAN, UBSAN, KFENCE, KCSAN, sched) roll their own warnings, and each check "panic_on_warn". Consolidate this into a single function so that future instrumentation can be added in a single location. Cc: Marco Elver Cc: Dmitry Vyukov Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Juri Lelli Cc: Vincent Guittot Cc: Dietmar Eggemann Cc: Steven Rostedt Cc: Ben Segall Cc: Mel Gorman Cc: Daniel Bristot de Oliveira Cc: Valentin Schneider Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Vincenzo Frascino Cc: Andrew Morton Cc: David Gow Cc: tangmeng Cc: Jann Horn Cc: Shuah Khan Cc: Petr Mladek Cc: "Paul E. McKenney" Cc: Sebastian Andrzej Siewior Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: kasan-dev@googlegroups.com Cc: linux-mm@kvack.org Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Reviewed-by: Marco Elver Reviewed-by: Andrey Konovalov Link: https://lore.kernel.org/r/20221117234328.594699-4-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- include/linux/panic.h | 1 + kernel/kcsan/report.c | 3 +-- kernel/panic.c | 9 +++++++-- kernel/sched/core.c | 3 +-- lib/ubsan.c | 3 +-- mm/kasan/report.c | 4 ++-- mm/kfence/report.c | 3 +-- 7 files changed, 14 insertions(+), 12 deletions(-) diff --git a/include/linux/panic.h b/include/linux/panic.h index c7759b3f2045..979b776e3bcb 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -11,6 +11,7 @@ extern long (*panic_blink)(int state); __printf(1, 2) void panic(const char *fmt, ...) __noreturn __cold; void nmi_panic(struct pt_regs *regs, const char *msg); +void check_panic_on_warn(const char *origin); extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 67794404042a..e95ce7d7a76e 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -492,8 +492,7 @@ static void print_report(enum kcsan_value_change value_change, dump_stack_print_info(KERN_DEFAULT); pr_err("==================================================================\n"); - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("KCSAN"); } static void release_report(unsigned long *flags, struct other_info *other_info) diff --git a/kernel/panic.c b/kernel/panic.c index d843d036651e..cfa354322d5f 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -201,6 +201,12 @@ static void panic_print_sys_info(bool console_flush) ftrace_dump(DUMP_ALL); } +void check_panic_on_warn(const char *origin) +{ + if (panic_on_warn) + panic("%s: panic_on_warn set ...\n", origin); +} + /** * panic - halt the system * @fmt: The text string to print @@ -619,8 +625,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint, if (regs) show_regs(regs); - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("kernel"); if (!regs) dump_stack(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 172ec79b66f6..f730b6fe94a7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5778,8 +5778,7 @@ static noinline void __schedule_bug(struct task_struct *prev) pr_err("Preemption disabled at:"); print_ip_sym(KERN_ERR, preempt_disable_ip); } - if (panic_on_warn) - panic("scheduling while atomic\n"); + check_panic_on_warn("scheduling while atomic"); dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); diff --git a/lib/ubsan.c b/lib/ubsan.c index 36bd75e33426..60c7099857a0 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -154,8 +154,7 @@ static void ubsan_epilogue(void) current->in_ubsan--; - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("UBSAN"); } void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs) diff --git a/mm/kasan/report.c b/mm/kasan/report.c index df3602062bfd..cc98dfdd3ed2 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -164,8 +164,8 @@ static void end_report(unsigned long *flags, void *addr) (unsigned long)addr); pr_err("==================================================================\n"); spin_unlock_irqrestore(&report_lock, *flags); - if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) - panic("panic_on_warn set ...\n"); + if (!test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags)) + check_panic_on_warn("KASAN"); if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC) panic("kasan.fault=panic set ...\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); diff --git a/mm/kfence/report.c b/mm/kfence/report.c index 46ecea18c4ca..60205f1257ef 100644 --- a/mm/kfence/report.c +++ b/mm/kfence/report.c @@ -273,8 +273,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r lockdep_on(); - if (panic_on_warn) - panic("panic_on_warn set ...\n"); + check_panic_on_warn("KFENCE"); /* We encountered a memory safety error, taint the kernel! */ add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK); From f53b6dda4d9b6e4ba1af5efd6c6650784996b4e7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 15:43:25 -0800 Subject: [PATCH 509/543] panic: Introduce warn_limit commit 9fc9e278a5c0b708eeffaf47d6eb0c82aa74ed78 upstream. Like oops_limit, add warn_limit for limiting the number of warnings when panic_on_warn is not set. Cc: Jonathan Corbet Cc: Andrew Morton Cc: Baolin Wang Cc: "Jason A. Donenfeld" Cc: Eric Biggers Cc: Huang Ying Cc: Petr Mladek Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Tiezhu Yang Cc: Sebastian Andrzej Siewior Cc: linux-doc@vger.kernel.org Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-5-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/sysctl/kernel.rst | 10 ++++++++++ kernel/panic.c | 14 ++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 0d9b66f9b0f4..b3588fff1ec0 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1532,6 +1532,16 @@ entry will default to 2 instead of 0. 2 Unprivileged calls to ``bpf()`` are disabled = ============================================================= + +warn_limit +========== + +Number of kernel warnings after which the kernel should panic when +``panic_on_warn`` is not set. Setting this to 0 disables checking +the warning count. Setting this to 1 has the same effect as setting +``panic_on_warn=1``. The default value is 0. + + watchdog ======== diff --git a/kernel/panic.c b/kernel/panic.c index cfa354322d5f..f4403fc14f67 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -58,6 +58,7 @@ bool crash_kexec_post_notifiers; int panic_on_warn __read_mostly; unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; +static unsigned int warn_limit __read_mostly; int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -88,6 +89,13 @@ static struct ctl_table kern_panic_table[] = { .extra2 = SYSCTL_ONE, }, #endif + { + .procname = "warn_limit", + .data = &warn_limit, + .maxlen = sizeof(warn_limit), + .mode = 0644, + .proc_handler = proc_douintvec, + }, { } }; @@ -203,8 +211,14 @@ static void panic_print_sys_info(bool console_flush) void check_panic_on_warn(const char *origin) { + static atomic_t warn_count = ATOMIC_INIT(0); + if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); + + if (atomic_inc_return(&warn_count) >= READ_ONCE(warn_limit) && warn_limit) + panic("%s: system warned too often (kernel.warn_limit is %d)", + origin, warn_limit); } /** From 72c93f9897e9599aee992f95895779dbbe634290 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 17 Nov 2022 15:43:26 -0800 Subject: [PATCH 510/543] panic: Expose "warn_count" to sysfs commit 8b05aa26336113c4cea25f1c333ee8cd4fc212a6 upstream. Since Warn count is now tracked and is a fairly interesting signal, add the entry /sys/kernel/warn_count to expose it to userspace. Cc: Petr Mladek Cc: Andrew Morton Cc: tangmeng Cc: "Guilherme G. Piccoli" Cc: Sebastian Andrzej Siewior Cc: Tiezhu Yang Reviewed-by: Luis Chamberlain Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221117234328.594699-6-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman --- .../ABI/testing/sysfs-kernel-warn_count | 6 +++++ MAINTAINERS | 1 + kernel/panic.c | 22 +++++++++++++++++-- 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-kernel-warn_count diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count new file mode 100644 index 000000000000..08f083d2fd51 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-warn_count @@ -0,0 +1,6 @@ +What: /sys/kernel/oops_count +Date: November 2022 +KernelVersion: 6.2.0 +Contact: Linux Kernel Hardening List +Description: + Shows how many times the system has Warned since last boot. diff --git a/MAINTAINERS b/MAINTAINERS index 69d0342ce36e..d4822ae39e39 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11113,6 +11113,7 @@ L: linux-hardening@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening F: Documentation/ABI/testing/sysfs-kernel-oops_count +F: Documentation/ABI/testing/sysfs-kernel-warn_count F: include/linux/overflow.h F: include/linux/randomize_kstack.h F: mm/usercopy.c diff --git a/kernel/panic.c b/kernel/panic.c index f4403fc14f67..54deb743b2d5 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -107,6 +108,25 @@ static __init int kernel_panic_sysctls_init(void) late_initcall(kernel_panic_sysctls_init); #endif +static atomic_t warn_count = ATOMIC_INIT(0); + +#ifdef CONFIG_SYSFS +static ssize_t warn_count_show(struct kobject *kobj, struct kobj_attribute *attr, + char *page) +{ + return sysfs_emit(page, "%d\n", atomic_read(&warn_count)); +} + +static struct kobj_attribute warn_count_attr = __ATTR_RO(warn_count); + +static __init int kernel_panic_sysfs_init(void) +{ + sysfs_add_file_to_group(kernel_kobj, &warn_count_attr.attr, NULL); + return 0; +} +late_initcall(kernel_panic_sysfs_init); +#endif + static long no_blink(int state) { return 0; @@ -211,8 +231,6 @@ static void panic_print_sys_info(bool console_flush) void check_panic_on_warn(const char *origin) { - static atomic_t warn_count = ATOMIC_INIT(0); - if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); From 3f38c21707143ca9e728696503d8be3feb1e5486 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 14 Dec 2022 14:35:47 -0800 Subject: [PATCH 511/543] docs: Fix path paste-o for /sys/kernel/warn_count commit 00dd027f721e0458418f7750d8a5a664ed3e5994 upstream. Running "make htmldocs" shows that "/sys/kernel/oops_count" was duplicated. This should have been "warn_count": Warning: /sys/kernel/oops_count is defined 2 times: ./Documentation/ABI/testing/sysfs-kernel-warn_count:0 ./Documentation/ABI/testing/sysfs-kernel-oops_count:0 Fix the typo. Reported-by: kernel test robot Link: https://lore.kernel.org/linux-doc/202212110529.A3Qav8aR-lkp@intel.com Fixes: 8b05aa263361 ("panic: Expose "warn_count" to sysfs") Cc: linux-hardening@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- Documentation/ABI/testing/sysfs-kernel-warn_count | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count index 08f083d2fd51..90a029813717 100644 --- a/Documentation/ABI/testing/sysfs-kernel-warn_count +++ b/Documentation/ABI/testing/sysfs-kernel-warn_count @@ -1,4 +1,4 @@ -What: /sys/kernel/oops_count +What: /sys/kernel/warn_count Date: November 2022 KernelVersion: 6.2.0 Contact: Linux Kernel Hardening List From a18417e27ed8dde4e774fb0611a10479d0c8eed1 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 16 Dec 2022 12:26:57 -0800 Subject: [PATCH 512/543] exit: Use READ_ONCE() for all oops/warn limit reads commit 7535b832c6399b5ebfc5b53af5c51dd915ee2538 upstream. Use a temporary variable to take full advantage of READ_ONCE() behavior. Without this, the report (and even the test) might be out of sync with the initial test. Reported-by: Peter Zijlstra Link: https://lore.kernel.org/lkml/Y5x7GXeluFmZ8E0E@hirez.programming.kicks-ass.net Fixes: 9fc9e278a5c0 ("panic: Introduce warn_limit") Fixes: d4ccd54d28d3 ("exit: Put an upper limit on how often we can oops") Cc: "Eric W. Biederman" Cc: Jann Horn Cc: Arnd Bergmann Cc: Petr Mladek Cc: Andrew Morton Cc: Luis Chamberlain Cc: Marco Elver Cc: tangmeng Cc: Sebastian Andrzej Siewior Cc: Tiezhu Yang Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- kernel/exit.c | 6 ++++-- kernel/panic.c | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index deffb8e4b1b2..15dc2ec80c46 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -931,6 +931,7 @@ void __noreturn make_task_dead(int signr) * Then do everything else. */ struct task_struct *tsk = current; + unsigned int limit; if (unlikely(in_interrupt())) panic("Aiee, killing interrupt handler!"); @@ -954,8 +955,9 @@ void __noreturn make_task_dead(int signr) * To make sure this can't happen, place an upper bound on how often the * kernel may oops without panic(). */ - if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit) && oops_limit) - panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit); + limit = READ_ONCE(oops_limit); + if (atomic_inc_return(&oops_count) >= limit && limit) + panic("Oopsed too often (kernel.oops_limit is %d)", limit); /* * We're taking recursive faults here in make_task_dead. Safest is to just diff --git a/kernel/panic.c b/kernel/panic.c index 54deb743b2d5..7834c9854e02 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -231,12 +231,15 @@ static void panic_print_sys_info(bool console_flush) void check_panic_on_warn(const char *origin) { + unsigned int limit; + if (panic_on_warn) panic("%s: panic_on_warn set ...\n", origin); - if (atomic_inc_return(&warn_count) >= READ_ONCE(warn_limit) && warn_limit) + limit = READ_ONCE(warn_limit); + if (atomic_inc_return(&warn_count) >= limit && limit) panic("%s: system warned too often (kernel.warn_limit is %d)", - origin, warn_limit); + origin, limit); } /** From 2705a9548594888d6abcececdb898cbedd9c853b Mon Sep 17 00:00:00 2001 From: YingChi Long Date: Fri, 18 Nov 2022 08:55:35 +0800 Subject: [PATCH 513/543] x86/fpu: Use _Alignof to avoid undefined behavior in TYPE_ALIGN commit 55228db2697c09abddcb9487c3d9fa5854a932cd upstream. WG14 N2350 specifies that it is an undefined behavior to have type definitions within offsetof", see https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2350.htm This specification is also part of C23. Therefore, replace the TYPE_ALIGN macro with the _Alignof builtin to avoid undefined behavior. (_Alignof itself is C11 and the kernel is built with -gnu11). ISO C11 _Alignof is subtly different from the GNU C extension __alignof__. Latter is the preferred alignment and _Alignof the minimal alignment. For long long on x86 these are 8 and 4 respectively. The macro TYPE_ALIGN's behavior matches _Alignof rather than __alignof__. [ bp: Massage commit message. ] Signed-off-by: YingChi Long Signed-off-by: Borislav Petkov Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20220925153151.2467884-1-me@inclyc.cn Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/init.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 8946f89761cc..851eb13edc01 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -133,9 +133,6 @@ static void __init fpu__init_system_generic(void) fpu__init_system_mxcsr(); } -/* Get alignment of the TYPE. */ -#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test) - /* * Enforce that 'MEMBER' is the last field of 'TYPE'. * @@ -143,8 +140,8 @@ static void __init fpu__init_system_generic(void) * because that's how C aligns structs. */ #define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ - BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \ - TYPE_ALIGN(TYPE))) + BUILD_BUG_ON(sizeof(TYPE) != \ + ALIGN(offsetofend(TYPE, MEMBER), _Alignof(TYPE))) /* * We append the 'struct fpu' to the task_struct: From 708509058b79aa584d4303fcfcfa397790bf657e Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 11:08:49 +0800 Subject: [PATCH 514/543] drm/amdgpu/discovery: enable soc21 common for GC 11.0.4 commit 69dc98bbd44160930b6b3ca9ca558f89435d2702 upstream. Enable soc21 common for GC 11.0.4. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 3993e6134914..623b558e9afc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1507,6 +1507,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; default: From 907d5b697d850a548e6a226fef6e84f1b665bbb3 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 11:09:50 +0800 Subject: [PATCH 515/543] drm/amdgpu/discovery: enable gmc v11 for GC 11.0.4 commit d5fd8c89ed206b2df3933bc4ea129401b2b60869 upstream. Enable gmc (graphic memory controller) v11 for GC 11.0.4. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 623b558e9afc..97d6f70c2ef3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1552,6 +1552,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; default: From db4fa3b2d7d4eb0a5e05e477d2087c84eade50a6 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 11:11:09 +0800 Subject: [PATCH 516/543] drm/amdgpu/discovery: enable gfx v11 for GC 11.0.4 commit b952d6b3d3ff3c1570fab77f2137d5e5280a0e57 upstream. Enable gfx v11 for GC 11.0.4. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 97d6f70c2ef3..7df2e088159e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1787,6 +1787,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; default: From 2ecc26293852b3a6d093b49857f309ba7497a6be Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 11:11:57 +0800 Subject: [PATCH 517/543] drm/amdgpu/discovery: enable mes support for GC v11.0.4 commit 6a6af77570add4e58721386be429dbd02cd4b9dd upstream. this patch is to enable mes for GC 11.0.4. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 7df2e088159e..ec49b61e2c13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1951,6 +1951,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; From 333814dd71d88c1b0fe6afb1147c8493f9dc116c Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 11:15:24 +0800 Subject: [PATCH 518/543] drm/amdgpu: set GC 11.0.4 family commit 94ab70685844227b5c9cb9027a5c4acd3b0e4564 upstream. this patch is to set GC 11.0.4 family. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ec49b61e2c13..69de5e4a0279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2181,6 +2181,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->family = AMDGPU_FAMILY_GC_11_0_0; break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): adev->family = AMDGPU_FAMILY_GC_11_0_1; break; default: From 09157e804d9e578c3bc8eff4d1ac4712f6e0fc1d Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 11:16:29 +0800 Subject: [PATCH 519/543] drm/amdgpu/discovery: set the APU flag for GC 11.0.4 commit dd2d9c7fd7716838d477e257f43facd68c53d3a9 upstream. Set the APU flag appropriately for GC 11.0.4. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 69de5e4a0279..dab3a3811adc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2199,6 +2199,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 7): case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): adev->flags |= AMD_IS_APU; break; default: From ea8f7acc35e2eb548c4ab56df22eccae69a660e6 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 12:49:06 +0800 Subject: [PATCH 520/543] drm/amdgpu: add gfx support for GC 11.0.4 commit 1763cb65e870e783e26d2dc9def4edbeadcb1050 upstream. this patch to add GC 11.0.4 gfx support to gfx11 implementation. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0fecc5bf45bc..22fbe74d2b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -77,6 +77,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { @@ -262,6 +266,7 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): soc15_program_register_sequence(adev, golden_settings_gc_11_0_1, (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); @@ -856,6 +861,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1285,6 +1291,7 @@ static int gfx_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -2486,7 +2493,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) for (i = 0; i < adev->usec_timeout; i++) { cp_status = RREG32_SOC15(GC, 0, regCP_STAT); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1)) + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) || + adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else From 7d1e2273f21fd756bfc673e3e6244dc7912d83c2 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 12:56:01 +0800 Subject: [PATCH 521/543] drm/amdgpu: add gmc v11 support for GC 11.0.4 commit d0ca8248999e4c5b02ac64f40536ff46dc14dda7 upstream. Add gmc v11 support for GC 11.0.4. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 66dfb574cc7d..96e0bb5bee78 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -749,6 +749,7 @@ static int gmc_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): adev->num_vmhubs = 2; /* * To fulfill 4-level page support, From 3ed03b0b42c4077806d652c8ced1acc7ff0ac001 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 21 Nov 2022 16:19:44 +0800 Subject: [PATCH 522/543] drm/amdgpu/discovery: add PSP IP v13.0.11 support commit 7c1389f1b1228b96e621815e63eaa2e89b9f7511 upstream. Add PSP IP v13.0.11 ip discovery support. Signed-off-by: Tim Huang Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index dab3a3811adc..587b4f2af3eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1638,6 +1638,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; case IP_VERSION(13, 0, 4): From bb856053b7d7526784e596cf3be16e9d047ccb99 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 13:08:30 +0800 Subject: [PATCH 523/543] drm/amdgpu/pm: enable swsmu for SMU IP v13.0.11 commit 16412a94364d1dcebded9217ecb693c9659eaabc upstream. Add the entry to set the ppt functions for SMU IP v13.0.11. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index b880f4d7d67e..2875f6bc3a6a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -585,6 +585,7 @@ static int smu_set_funcs(struct amdgpu_device *adev) yellow_carp_set_ppt_funcs(smu); break; case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): smu_v13_0_4_set_ppt_funcs(smu); break; case IP_VERSION(13, 0, 5): From 6ed56b86918d27f72b2cf283dcec6bb822813bc5 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 13:16:25 +0800 Subject: [PATCH 524/543] drm/amdgpu: add smu 13 support for smu 13.0.11 commit 51e7a2168769c2f46edd93a18d4cba4a6d4adb13 upstream. this patch to add smu 13 support for smu 13.0.11. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 587b4f2af3eb..a9f0780a8e5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1689,6 +1689,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; default: diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 9f9f64c5cdd8..cd37bac00438 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -250,6 +250,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): mp1_fw_flags = RREG32_PCIE(MP1_Public | (smnMP1_V13_0_4_FIRMWARE_FLAGS & 0xffffffff)); break; @@ -303,6 +304,7 @@ int smu_v13_0_check_fw_version(struct smu_context *smu) smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_YELLOW_CARP; break; case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_4; break; case IP_VERSION(13, 0, 5): From c776433afefde3f0dd74d43fb2cc5791bbd1caaf Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 13:17:40 +0800 Subject: [PATCH 525/543] drm/amdgpu/pm: add GFXOFF control IP version check for SMU IP v13.0.11 commit 9f83e61201bb21957e4993736532edad7a11c7fa upstream. Enable the SMU IP v13.0.11 GFXOFF control Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index cd37bac00438..479cbf05c331 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -845,6 +845,7 @@ int smu_v13_0_gfx_off_control(struct smu_context *smu, bool enable) case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return 0; if (enable) From fe4d9fb3326ecac3d2cdd1acbc0011013b1d7a94 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 24 Oct 2022 10:32:05 +0800 Subject: [PATCH 526/543] drm/amdgpu/soc21: add mode2 asic reset for SMU IP v13.0.11 commit 18ad18853cf2d8b94cef0112ba94f7a7535a9e89 upstream. Set the default reset method to mode2 for SMU IP v13.0.11 Signed-off-by: Tim Huang Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 909cf9f220c1..64b06104a251 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -325,6 +325,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): + case IP_VERSION(13, 0, 11): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) From 703011b41c4f0d333e731edbb632eab62c0e1810 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 24 Oct 2022 11:05:59 +0800 Subject: [PATCH 527/543] drm/amdgpu/pm: use the specific mailbox registers only for SMU IP v13.0.4 commit 069a5af97ce3a1448a3566ce8b63b60e51e19958 upstream. The SMU IP v13.0.4 ppt interface is shared by IP v13.0.11, they use the different mailbox register offset. So use the specific mailbox registers offset for v13.0.4. Signed-off-by: Tim Huang Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- .../drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c index 97e1d55dcaad..8fa9a36c38b6 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c @@ -1026,6 +1026,15 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = { .set_gfx_power_up_by_imu = smu_v13_0_set_gfx_power_up_by_imu, }; +static void smu_v13_0_4_set_smu_mailbox_registers(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); + smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); + smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); +} + void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; @@ -1035,7 +1044,9 @@ void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu) smu->feature_map = smu_v13_0_4_feature_mask_map; smu->table_map = smu_v13_0_4_table_map; smu->is_apu = true; - smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); - smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); - smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); + + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 4)) + smu_v13_0_4_set_smu_mailbox_registers(smu); + else + smu_v13_0_set_smu_mailbox_registers(smu); } From 11ffb993373095468936788ecdec3cd237f8983d Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Tue, 25 Oct 2022 16:59:44 +0800 Subject: [PATCH 528/543] drm/amdgpu/discovery: enable nbio support for NBIO v7.7.1 commit 7308ceb44663f40bf9e7373c3b1aa4f7f433d625 upstream. this patch is to enable nbio support for NBIO v7.7.1. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index a9f0780a8e5c..d8441e273cb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2258,6 +2258,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg; break; case IP_VERSION(7, 7, 0): + case IP_VERSION(7, 7, 1): adev->nbio.funcs = &nbio_v7_7_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg; break; From 8cbe04b2e0a1a0197a47e14fbbbcf0ef18299bce Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 21 Nov 2022 10:24:14 +0800 Subject: [PATCH 529/543] drm/amdgpu: enable PSP IP v13.0.11 support commit 2c83e3fd928b9cb1e35340e58d4b1bd2eea23ed6 upstream. Enable PSP FW loading for PSP IP v13.0.11 Signed-off-by: Tim Huang Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 1 + drivers/gpu/drm/amd/amdgpu/psp_v13_0.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7978307e1d6d..712dd72f3ccf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -139,6 +139,7 @@ static int psp_early_init(void *handle) case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 11): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; break; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 88f9b327183a..8c5fa4b7b68a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -46,6 +46,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -102,6 +104,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): + case IP_VERSION(13, 0, 11): err = psp_init_toc_microcode(psp, chip_name); if (err) return err; From 09af1ee53ced112a69b2a8f5d3b0c0e3818fcb4e Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 5 Dec 2022 11:24:37 +0800 Subject: [PATCH 530/543] drm/amdgpu: enable GFX IP v11.0.4 CG support commit f2b91e5a7cc0368709964994ca253781b51a486a upstream. Add CG support for GFX/MC/HDP/ATHUB/IH/BIF. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/soc21.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 64b06104a251..9bc9852b9cda 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -655,7 +655,23 @@ static int soc21_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x20; break; case IP_VERSION(11, 0, 4): - adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | + adev->cg_flags = + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | From 834a9142586542f7de693135caf9cc43c0b1e1b2 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 5 Dec 2022 13:55:36 +0800 Subject: [PATCH 531/543] drm/amdgpu: enable GFX Power Gating for GC IP v11.0.4 commit a89e2965da6e644729a8ee9c318b7fa9a2990353 upstream. Enable GFX Power Gating control for GC IP v11.0.4. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 22fbe74d2b07..c150314560a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5030,6 +5030,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5063,6 +5064,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, amdgpu_gfx_off_ctrl(adev, enable); break; case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): gfx_v11_cntl_pg(adev, enable); amdgpu_gfx_off_ctrl(adev, enable); break; From a74805429d38775ac5cc24a8b04df74afe449ffb Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 5 Dec 2022 14:18:23 +0800 Subject: [PATCH 532/543] drm/amdgpu: enable GFX Clock Gating control for GC IP v11.0.4 commit f9caa237372b106b5e70ba1a4bfd4222eb79ec71 upstream. Enable GFX IP v11.0.4 CG gate/ungate control. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c150314560a1..5e3868a7e95a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -5088,6 +5088,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): + case IP_VERSION(11, 0, 4): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; From a63bd0539bf395bd57ad7bb2fb6d7581bc19bff0 Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Sun, 9 Oct 2022 14:35:20 +0800 Subject: [PATCH 533/543] drm/amdgpu: add tmz support for GC 11.0.1 commit 97074216917b4188f0af3e52cc5b3f2b277bbbca upstream. this patch to add tmz support for GC 11.0.1. Signed-off-by: Yifan Zhang Reviewed-by: Tim Huang Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 28612e56d0d4..e970e3760cec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -548,6 +548,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): + case IP_VERSION(11, 0, 1): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { From 40a66b6ca4781cb65449ff0794924fc39f599d74 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Mon, 5 Dec 2022 14:33:25 +0800 Subject: [PATCH 534/543] drm/amdgpu: add tmz support for GC IP v11.0.4 commit 2aecbe492a3c0bf4c21f78c099a6f6c205fab0c7 upstream. Add tmz support for GC 11.0.4. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: "Limonciello, Mario" Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index e970e3760cec..02a4c93673ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -549,6 +549,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { From 6da17ac15e354ce483044c924c801a1b25ec8e4a Mon Sep 17 00:00:00 2001 From: Lang Yu Date: Wed, 11 Jan 2023 09:52:11 +0800 Subject: [PATCH 535/543] drm/amdgpu: correct MEC number for gfx11 APUs commit 0ddadc3a2208aedb1b27dbb76d0b4e722b5b527a upstream. There is only one MEC on these APUs. Signed-off-by: Lang Yu Reviewed-by: Aaron Liu Reviewed-by: Yifan Zhang Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 5e3868a7e95a..3bf4f2edc108 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1288,10 +1288,8 @@ static int gfx_v11_0_sw_init(void *handle) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(11, 0, 0): - case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - case IP_VERSION(11, 0, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -1299,6 +1297,15 @@ static int gfx_v11_0_sw_init(void *handle) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 4; break; + case IP_VERSION(11, 0, 1): + case IP_VERSION(11, 0, 4): + adev->gfx.me.num_me = 1; + adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_queue_per_pipe = 1; + adev->gfx.mec.num_mec = 1; + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 4; + break; default: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; From 1eb57b87f106c90cee6b2a56a10f2e29c7a25f3e Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Fri, 13 Jan 2023 11:49:02 +0530 Subject: [PATCH 536/543] octeontx2-pf: Avoid use of GFP_KERNEL in atomic context commit 87b93b678e95c7d93fe6a55b0e0fbda26d8c7760 upstream. Using GFP_KERNEL in preemption disable context, causing below warning when CONFIG_DEBUG_ATOMIC_SLEEP is enabled. [ 32.542271] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:274 [ 32.550883] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 [ 32.558707] preempt_count: 1, expected: 0 [ 32.562710] RCU nest depth: 0, expected: 0 [ 32.566800] CPU: 3 PID: 1 Comm: swapper/0 Tainted: G W 6.2.0-rc2-00269-gae9dcb91c606 #7 [ 32.576188] Hardware name: Marvell CN106XX board (DT) [ 32.581232] Call trace: [ 32.583670] dump_backtrace.part.0+0xe0/0xf0 [ 32.587937] show_stack+0x18/0x30 [ 32.591245] dump_stack_lvl+0x68/0x84 [ 32.594900] dump_stack+0x18/0x34 [ 32.598206] __might_resched+0x12c/0x160 [ 32.602122] __might_sleep+0x48/0xa0 [ 32.605689] __kmem_cache_alloc_node+0x2b8/0x2e0 [ 32.610301] __kmalloc+0x58/0x190 [ 32.613610] otx2_sq_aura_pool_init+0x1a8/0x314 [ 32.618134] otx2_open+0x1d4/0x9d0 To avoid use of GFP_ATOMIC for memory allocation, disable preemption after all memory allocation is done. Fixes: 4af1b64f80fb ("octeontx2-pf: Fix lmtst ID used in aura free") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Kovvuri Goutham Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 88f8772a61cd..497b777b6a34 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1370,7 +1370,6 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) if (err) goto fail; - get_cpu(); /* Allocate pointers and free them to aura/pool */ for (qidx = 0; qidx < hw->tot_tx_queues; qidx++) { pool_id = otx2_get_pool_idx(pfvf, AURA_NIX_SQ, qidx); @@ -1388,13 +1387,14 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) err = otx2_alloc_rbuf(pfvf, pool, &bufptr); if (err) goto err_mem; + get_cpu(); pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); + put_cpu(); sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; } } err_mem: - put_cpu(); return err ? -ENOMEM : 0; fail: From ddb98087bd2aed12cbe867332c68473fc4d48855 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 18 Jan 2023 13:24:12 +0100 Subject: [PATCH 537/543] net/ulp: use consistent error code when blocking ULP commit 8ccc99362b60c6f27bb46f36fdaaccf4ef0303de upstream. The referenced commit changed the error code returned by the kernel when preventing a non-established socket from attaching the ktls ULP. Before to such a commit, the user-space got ENOTCONN instead of EINVAL. The existing self-tests depend on such error code, and the change caused a failure: RUN global.non_established ... tls.c:1673:non_established:Expected errno (22) == ENOTCONN (107) non_established: Test failed at step #3 FAIL global.non_established In the unlikely event existing applications do the same, address the issue by restoring the prior error code in the above scenario. Note that the only other ULP performing similar checks at init time - smc_ulp_ops - also fails with ENOTCONN when trying to attach the ULP to a non-established socket. Reported-by: Sabrina Dubroca Fixes: 2c02d41d71f9 ("net/ulp: prevent ULP without clone op from entering the LISTEN status") Signed-off-by: Paolo Abeni Reviewed-by: Sabrina Dubroca Link: https://lore.kernel.org/r/7bb199e7a93317fb6f8bf8b9b2dc71c18f337cde.1674042685.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ulp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 05b6077b9f2c..2aa442128630 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -139,7 +139,7 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) if (sk->sk_socket) clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); - err = -EINVAL; + err = -ENOTCONN; if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN) goto out_err; From 659518e013d6bd562bb0f1d2d9f99d0ac54720e2 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Wed, 18 Jan 2023 15:13:00 +0800 Subject: [PATCH 538/543] octeontx2-pf: Fix the use of GFP_KERNEL in atomic context on rt commit 55ba18dc62deff5910c0fa64486dea1ff20832ff upstream. The commit 4af1b64f80fb ("octeontx2-pf: Fix lmtst ID used in aura free") uses the get/put_cpu() to protect the usage of percpu pointer in ->aura_freeptr() callback, but it also unnecessarily disable the preemption for the blockable memory allocation. The commit 87b93b678e95 ("octeontx2-pf: Avoid use of GFP_KERNEL in atomic context") tried to fix these sleep inside atomic warnings. But it only fix the one for the non-rt kernel. For the rt kernel, we still get the similar warnings like below. BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0 preempt_count: 1, expected: 0 RCU nest depth: 0, expected: 0 3 locks held by swapper/0/1: #0: ffff800009fc5fe8 (rtnl_mutex){+.+.}-{3:3}, at: rtnl_lock+0x24/0x30 #1: ffff000100c276c0 (&mbox->lock){+.+.}-{3:3}, at: otx2_init_hw_resources+0x8c/0x3a4 #2: ffffffbfef6537e0 (&cpu_rcache->lock){+.+.}-{2:2}, at: alloc_iova_fast+0x1ac/0x2ac Preemption disabled at: [] otx2_rq_aura_pool_init+0x14c/0x284 CPU: 20 PID: 1 Comm: swapper/0 Tainted: G W 6.2.0-rc3-rt1-yocto-preempt-rt #1 Hardware name: Marvell OcteonTX CN96XX board (DT) Call trace: dump_backtrace.part.0+0xe8/0xf4 show_stack+0x20/0x30 dump_stack_lvl+0x9c/0xd8 dump_stack+0x18/0x34 __might_resched+0x188/0x224 rt_spin_lock+0x64/0x110 alloc_iova_fast+0x1ac/0x2ac iommu_dma_alloc_iova+0xd4/0x110 __iommu_dma_map+0x80/0x144 iommu_dma_map_page+0xe8/0x260 dma_map_page_attrs+0xb4/0xc0 __otx2_alloc_rbuf+0x90/0x150 otx2_rq_aura_pool_init+0x1c8/0x284 otx2_init_hw_resources+0xe4/0x3a4 otx2_open+0xf0/0x610 __dev_open+0x104/0x224 __dev_change_flags+0x1e4/0x274 dev_change_flags+0x2c/0x7c ic_open_devs+0x124/0x2f8 ip_auto_config+0x180/0x42c do_one_initcall+0x90/0x4dc do_basic_setup+0x10c/0x14c kernel_init_freeable+0x10c/0x13c kernel_init+0x2c/0x140 ret_from_fork+0x10/0x20 Of course, we can shuffle the get/put_cpu() to only wrap the invocation of ->aura_freeptr() as what commit 87b93b678e95 does. But there are only two ->aura_freeptr() callbacks, otx2_aura_freeptr() and cn10k_aura_freeptr(). There is no usage of perpcu variable in the otx2_aura_freeptr() at all, so the get/put_cpu() seems redundant to it. We can move the get/put_cpu() into the corresponding callback which really has the percpu variable usage and avoid the sprinkling of get/put_cpu() in several places. Fixes: 4af1b64f80fb ("octeontx2-pf: Fix lmtst ID used in aura free") Signed-off-by: Kevin Hao Link: https://lore.kernel.org/r/20230118071300.3271125-1-haokexin@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/marvell/octeontx2/nic/otx2_common.c | 11 ++--------- .../net/ethernet/marvell/octeontx2/nic/otx2_common.h | 2 ++ 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 497b777b6a34..8a41ad8ca04f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1012,7 +1012,6 @@ static void otx2_pool_refill_task(struct work_struct *work) rbpool = cq->rbpool; free_ptrs = cq->pool_ptrs; - get_cpu(); while (cq->pool_ptrs) { if (otx2_alloc_rbuf(pfvf, rbpool, &bufptr)) { /* Schedule a WQ if we fails to free atleast half of the @@ -1032,7 +1031,6 @@ static void otx2_pool_refill_task(struct work_struct *work) pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM); cq->pool_ptrs--; } - put_cpu(); cq->refill_task_sched = false; } @@ -1387,9 +1385,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) err = otx2_alloc_rbuf(pfvf, pool, &bufptr); if (err) goto err_mem; - get_cpu(); pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); - put_cpu(); sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; } } @@ -1435,21 +1431,18 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) if (err) goto fail; - get_cpu(); /* Allocate pointers and free them to aura/pool */ for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) { pool = &pfvf->qset.pool[pool_id]; for (ptr = 0; ptr < num_ptrs; ptr++) { err = otx2_alloc_rbuf(pfvf, pool, &bufptr); if (err) - goto err_mem; + return -ENOMEM; pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr + OTX2_HEAD_ROOM); } } -err_mem: - put_cpu(); - return err ? -ENOMEM : 0; + return 0; fail: otx2_mbox_reset(&pfvf->mbox.mbox, 0); otx2_aura_pool_free(pfvf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 67aa02bb2b85..712715a49d20 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -733,8 +733,10 @@ static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf) u64 ptrs[2]; ptrs[1] = buf; + get_cpu(); /* Free only one buffer at time during init and teardown */ __cn10k_aura_freeptr(pfvf, aura, ptrs, 2); + put_cpu(); } /* Alloc pointer from pool/aura */ From 5a69eccf5638171082bf9037a58f6bce62108594 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 5 Jan 2023 19:42:20 +0800 Subject: [PATCH 539/543] net/mlx5: fix missing mutex_unlock in mlx5_fw_fatal_reporter_err_work() commit 90e7cb78b81543998217b0eb446c067ce2191a79 upstream. Add missing mutex_unlock() before returning from mlx5_fw_fatal_reporter_err_work(). Fixes: 9078e843efec ("net/mlx5: Avoid recovery in probe flows") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Yang Yingliang Signed-off-by: Shay Drory Signed-off-by: Saeed Mahameed Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 96417c5feed7..879555ba847d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -677,6 +677,7 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) { mlx5_core_err(dev, "health works are not permitted at this stage\n"); + mutex_unlock(&dev->intf_state_mutex); return; } mutex_unlock(&dev->intf_state_mutex); From fa6357de7c5e2974719e6a2c9de0e03e91f5fa84 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 26 Nov 2022 11:55:49 +0900 Subject: [PATCH 540/543] block: mq-deadline: Rename deadline_is_seq_writes() commit 3692fec8bb476e8583e559ff5783a6adef306cf2 upstream. Rename deadline_is_seq_writes() to deadline_is_seq_write() (remove the "s" plural) to more correctly reflect the fact that this function tests a single request, not multiple requests. Fixes: 015d02f48537 ("block: mq-deadline: Do not break sequential write streams to zoned HDDs") Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20221126025550.967914-2-damien.lemoal@opensource.wdc.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/mq-deadline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 6672f1bce379..f10c2a0d18d4 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -294,7 +294,7 @@ static inline int deadline_check_fifo(struct dd_per_prio *per_prio, /* * Check if rq has a sequential request preceding it. */ -static bool deadline_is_seq_writes(struct deadline_data *dd, struct request *rq) +static bool deadline_is_seq_write(struct deadline_data *dd, struct request *rq) { struct request *prev = deadline_earlier_request(rq); @@ -353,7 +353,7 @@ deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio, list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) { if (blk_req_can_dispatch_to_zone(rq) && (blk_queue_nonrot(rq->q) || - !deadline_is_seq_writes(dd, rq))) + !deadline_is_seq_write(dd, rq))) goto out; } rq = NULL; From effecd8d116d3d3a28b4f628e61bba8d318fdfcf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 13 Jan 2023 12:43:26 +0000 Subject: [PATCH 541/543] Revert "wifi: mac80211: fix memory leak in ieee80211_if_add()" commit 80f8a66dede0a4b4e9e846765a97809c6fe49ce5 upstream. This reverts commit 13e5afd3d773c6fc6ca2b89027befaaaa1ea7293. ieee80211_if_free() is already called from free_netdev(ndev) because ndev->priv_destructor == ieee80211_if_free syzbot reported: general protection fault, probably for non-canonical address 0xdffffc0000000004: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000020-0x0000000000000027] CPU: 0 PID: 10041 Comm: syz-executor.0 Not tainted 6.2.0-rc2-syzkaller-00388-g55b98837e37d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 RIP: 0010:pcpu_get_page_chunk mm/percpu.c:262 [inline] RIP: 0010:pcpu_chunk_addr_search mm/percpu.c:1619 [inline] RIP: 0010:free_percpu mm/percpu.c:2271 [inline] RIP: 0010:free_percpu+0x186/0x10f0 mm/percpu.c:2254 Code: 80 3c 02 00 0f 85 f5 0e 00 00 48 8b 3b 48 01 ef e8 cf b3 0b 00 48 ba 00 00 00 00 00 fc ff df 48 8d 78 20 48 89 f9 48 c1 e9 03 <80> 3c 11 00 0f 85 3b 0e 00 00 48 8b 58 20 48 b8 00 00 00 00 00 fc RSP: 0018:ffffc90004ba7068 EFLAGS: 00010002 RAX: 0000000000000000 RBX: ffff88823ffe2b80 RCX: 0000000000000004 RDX: dffffc0000000000 RSI: ffffffff81c1f4e7 RDI: 0000000000000020 RBP: ffffe8fffe8fc220 R08: 0000000000000005 R09: 0000000000000000 R10: 0000000000000000 R11: 1ffffffff2179ab2 R12: ffff8880b983d000 R13: 0000000000000003 R14: 0000607f450fc220 R15: ffff88823ffe2988 FS: 00007fcb349de700(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b32220000 CR3: 000000004914f000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: netdev_run_todo+0x6bf/0x1100 net/core/dev.c:10352 ieee80211_register_hw+0x2663/0x4040 net/mac80211/main.c:1411 mac80211_hwsim_new_radio+0x2537/0x4d80 drivers/net/wireless/mac80211_hwsim.c:4583 hwsim_new_radio_nl+0xa09/0x10f0 drivers/net/wireless/mac80211_hwsim.c:5176 genl_family_rcv_msg_doit.isra.0+0x1e6/0x2d0 net/netlink/genetlink.c:968 genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline] genl_rcv_msg+0x4ff/0x7e0 net/netlink/genetlink.c:1065 netlink_rcv_skb+0x165/0x440 net/netlink/af_netlink.c:2564 genl_rcv+0x28/0x40 net/netlink/genetlink.c:1076 netlink_unicast_kernel net/netlink/af_netlink.c:1330 [inline] netlink_unicast+0x547/0x7f0 net/netlink/af_netlink.c:1356 netlink_sendmsg+0x91b/0xe10 net/netlink/af_netlink.c:1932 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg+0xd3/0x120 net/socket.c:734 ____sys_sendmsg+0x712/0x8c0 net/socket.c:2476 ___sys_sendmsg+0x110/0x1b0 net/socket.c:2530 __sys_sendmsg+0xf7/0x1c0 net/socket.c:2559 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x39/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Reported-by: syzbot Fixes: 13e5afd3d773 ("wifi: mac80211: fix memory leak in ieee80211_if_add()") Signed-off-by: Eric Dumazet Cc: Zhengchao Shao Cc: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20230113124326.3533978-1-edumazet@google.com Signed-off-by: Greg Kroah-Hartman --- net/mac80211/iface.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8375900f210e..8dd3c10a99e0 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -2260,7 +2260,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = cfg80211_register_netdevice(ndev); if (ret) { - ieee80211_if_free(ndev); free_netdev(ndev); return ret; } From ebb677d219453f29cec0ca60b73f8397b0d8eb67 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Thu, 29 Dec 2022 16:16:48 +0100 Subject: [PATCH 542/543] soc: qcom: apr: Make qcom,protection-domain optional again commit 599d41fb8ea8bd2a99ca9525dd69405020e43dda upstream. APR should not fail if the service device tree node does not have the qcom,protection-domain property, since this functionality does not exist on older platforms such as MSM8916 and MSM8996. Ignore -EINVAL (returned when the property does not exist) to fix a regression on 6.2-rc1 that prevents audio from working: qcom,apr remoteproc0:smd-edge.apr_audio_svc.-1.-1: Failed to read second value of qcom,protection-domain qcom,apr remoteproc0:smd-edge.apr_audio_svc.-1.-1: Failed to add apr 3 svc Fixes: 6d7860f5750d ("soc: qcom: apr: Add check for idr_alloc and of_property_read_string_index") Signed-off-by: Stephan Gerhold Reviewed-by: Bjorn Andersson Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20221229151648.19839-3-stephan@gerhold.net Signed-off-by: Greg Kroah-Hartman --- drivers/soc/qcom/apr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c index cd44f17dad3d..d51abb462ae5 100644 --- a/drivers/soc/qcom/apr.c +++ b/drivers/soc/qcom/apr.c @@ -461,9 +461,10 @@ static int apr_add_device(struct device *dev, struct device_node *np, goto out; } + /* Protection domain is optional, it does not exist on older platforms */ ret = of_property_read_string_index(np, "qcom,protection-domain", 1, &adev->service_path); - if (ret < 0) { + if (ret < 0 && ret != -EINVAL) { dev_err(dev, "Failed to read second value of qcom,protection-domain\n"); goto out; } From 93f875a8526a291005e7f38478079526c843cbec Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 24 Jan 2023 07:24:44 +0100 Subject: [PATCH 543/543] Linux 6.1.8 Link: https://lore.kernel.org/r/20230122150246.321043584@linuxfoundation.org Tested-by: Ronald Warsow Tested-by: Salvatore Bonaccorso Tested-by: Takeshi Ogasawara Tested-by: Joel Fernandes (Google) Tested-by: Rudi Heitbaum Tested-by: Conor Dooley Tested-by: Bagas Sanjaya Tested-by: Fenil Jain Tested-by: Ron Economos Tested-by: Sudip Mukherjee Link: https://lore.kernel.org/r/20230123094931.568794202@linuxfoundation.org Tested-by: Rudi Heitbaum Tested-by: Ronald Warsow Tested-by: Allen Pais Tested-by: Florian Fainelli Tested-by: Takeshi Ogasawara Tested-by: Kelsey Steele Tested-by: Shuah Khan Tested-by: Guenter Roeck Tested-by: Bagas Sanjaya Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7eb6793ecfbf..49261450039a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 7 +SUBLEVEL = 8 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth