From d0b30d8e4d257874007c0140743801e42db612c2 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Fri, 7 Jul 2023 12:27:21 +0530 Subject: [PATCH 001/224] HID: amd_sfh: Rename the float32 variable commit c1685a862a4bea863537f06abaa37a123aef493c upstream. As float32 is also used in other places as a data type, it is necessary to rename the float32 variable in order to avoid confusion. Cc: stable@vger.kernel.org Tested-by: Kai-Heng Feng Signed-off-by: Basavaraj Natikar Signed-off-by: Akshata MukundShetty Link: https://lore.kernel.org/r/20230707065722.9036-2-Basavaraj.Natikar@amd.com Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c index 6f0d332ccf51..c81d20cd3081 100644 --- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c +++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c @@ -132,13 +132,13 @@ static void get_common_inputs(struct common_input_property *common, int report_i common->event_type = HID_USAGE_SENSOR_EVENT_DATA_UPDATED_ENUM; } -static int float_to_int(u32 float32) +static int float_to_int(u32 flt32_val) { int fraction, shift, mantissa, sign, exp, zeropre; - mantissa = float32 & GENMASK(22, 0); - sign = (float32 & BIT(31)) ? -1 : 1; - exp = (float32 & ~BIT(31)) >> 23; + mantissa = flt32_val & GENMASK(22, 0); + sign = (flt32_val & BIT(31)) ? -1 : 1; + exp = (flt32_val & ~BIT(31)) >> 23; if (!exp && !mantissa) return 0; @@ -151,10 +151,10 @@ static int float_to_int(u32 float32) } shift = 23 - exp; - float32 = BIT(exp) + (mantissa >> shift); + flt32_val = BIT(exp) + (mantissa >> shift); fraction = mantissa & GENMASK(shift - 1, 0); - return (((fraction * 100) >> shift) >= 50) ? sign * (float32 + 1) : sign * float32; + return (((fraction * 100) >> shift) >= 50) ? sign * (flt32_val + 1) : sign * flt32_val; } static u8 get_input_rep(u8 current_index, int sensor_idx, int report_id, From 5a45ed1ae34bb0e68944471f4bafb68e0a572791 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Fri, 7 Jul 2023 12:27:22 +0530 Subject: [PATCH 002/224] HID: amd_sfh: Fix for shift-out-of-bounds commit 87854366176403438d01f368b09de3ec2234e0f5 upstream. Shift operation of 'exp' and 'shift' variables exceeds the maximum number of shift values in the u32 range leading to UBSAN shift-out-of-bounds. ... [ 6.120512] UBSAN: shift-out-of-bounds in drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c:149:50 [ 6.120598] shift exponent 104 is too large for 64-bit type 'long unsigned int' [ 6.120659] CPU: 4 PID: 96 Comm: kworker/4:1 Not tainted 6.4.0amd_1-next-20230519-dirty #10 [ 6.120665] Hardware name: AMD Birman-PHX/Birman-PHX, BIOS SFH_with_HPD_SEN.FD 04/05/2023 [ 6.120667] Workqueue: events amd_sfh_work_buffer [amd_sfh] [ 6.120687] Call Trace: [ 6.120690] [ 6.120694] dump_stack_lvl+0x48/0x70 [ 6.120704] dump_stack+0x10/0x20 [ 6.120707] ubsan_epilogue+0x9/0x40 [ 6.120716] __ubsan_handle_shift_out_of_bounds+0x10f/0x170 [ 6.120720] ? psi_group_change+0x25f/0x4b0 [ 6.120729] float_to_int.cold+0x18/0xba [amd_sfh] [ 6.120739] get_input_rep+0x57/0x340 [amd_sfh] [ 6.120748] ? __schedule+0xba7/0x1b60 [ 6.120756] ? __pfx_get_input_rep+0x10/0x10 [amd_sfh] [ 6.120764] amd_sfh_work_buffer+0x91/0x180 [amd_sfh] [ 6.120772] process_one_work+0x229/0x430 [ 6.120780] worker_thread+0x4a/0x3c0 [ 6.120784] ? __pfx_worker_thread+0x10/0x10 [ 6.120788] kthread+0xf7/0x130 [ 6.120792] ? __pfx_kthread+0x10/0x10 [ 6.120795] ret_from_fork+0x29/0x50 [ 6.120804] ... Fix this by adding the condition to validate shift ranges. Fixes: 93ce5e0231d7 ("HID: amd_sfh: Implement SFH1.1 functionality") Cc: stable@vger.kernel.org Tested-by: Kai-Heng Feng Signed-off-by: Basavaraj Natikar Signed-off-by: Akshata MukundShetty Link: https://lore.kernel.org/r/20230707065722.9036-3-Basavaraj.Natikar@amd.com Signed-off-by: Benjamin Tissoires Signed-off-by: Greg Kroah-Hartman --- drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c index c81d20cd3081..06bdcf072d10 100644 --- a/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c +++ b/drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c @@ -143,16 +143,32 @@ static int float_to_int(u32 flt32_val) if (!exp && !mantissa) return 0; + /* + * Calculate the exponent and fraction part of floating + * point representation. + */ exp -= 127; if (exp < 0) { exp = -exp; + if (exp >= BITS_PER_TYPE(u32)) + return 0; zeropre = (((BIT(23) + mantissa) * 100) >> 23) >> exp; return zeropre >= 50 ? sign : 0; } shift = 23 - exp; - flt32_val = BIT(exp) + (mantissa >> shift); - fraction = mantissa & GENMASK(shift - 1, 0); + if (abs(shift) >= BITS_PER_TYPE(u32)) + return 0; + + if (shift < 0) { + shift = -shift; + flt32_val = BIT(exp) + (mantissa << shift); + shift = 0; + } else { + flt32_val = BIT(exp) + (mantissa >> shift); + } + + fraction = (shift == 0) ? 0 : mantissa & GENMASK(shift - 1, 0); return (((fraction * 100) >> shift) >= 50) ? sign * (flt32_val + 1) : sign * flt32_val; } From 4b5ab640aafca7ea0eb9f0ab939543b648acb2ab Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Tue, 27 Jun 2023 03:50:00 +0000 Subject: [PATCH 003/224] net: lan743x: Don't sleep in atomic context commit 7a8227b2e76be506b2ac64d2beac950ca04892a5 upstream. dev_set_rx_mode() grabs a spin_lock, and the lan743x implementation proceeds subsequently to go to sleep using readx_poll_timeout(). Introduce a helper wrapping the readx_poll_timeout_atomic() function and use it to replace the calls to readx_polL_timeout(). Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Cc: stable@vger.kernel.org Cc: Bryan Whitehead Cc: UNGLinuxDriver@microchip.com Signed-off-by: Moritz Fischer Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230627035000.1295254-1-moritzf@google.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/microchip/lan743x_main.c | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 50eeecba1f18..e804613faa1f 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -144,6 +144,18 @@ static int lan743x_csr_light_reset(struct lan743x_adapter *adapter) !(data & HW_CFG_LRST_), 100000, 10000000); } +static int lan743x_csr_wait_for_bit_atomic(struct lan743x_adapter *adapter, + int offset, u32 bit_mask, + int target_value, int udelay_min, + int udelay_max, int count) +{ + u32 data; + + return readx_poll_timeout_atomic(LAN743X_CSR_READ_OP, offset, data, + target_value == !!(data & bit_mask), + udelay_max, udelay_min * count); +} + static int lan743x_csr_wait_for_bit(struct lan743x_adapter *adapter, int offset, u32 bit_mask, int target_value, int usleep_min, @@ -746,8 +758,8 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, u32 dp_sel; int i; - if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, - 1, 40, 100, 100)) + if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, DP_SEL_DPRDY_, + 1, 40, 100, 100)) return -EIO; dp_sel = lan743x_csr_read(adapter, DP_SEL); dp_sel &= ~DP_SEL_MASK_; @@ -758,8 +770,9 @@ static int lan743x_dp_write(struct lan743x_adapter *adapter, lan743x_csr_write(adapter, DP_ADDR, addr + i); lan743x_csr_write(adapter, DP_DATA_0, buf[i]); lan743x_csr_write(adapter, DP_CMD, DP_CMD_WRITE_); - if (lan743x_csr_wait_for_bit(adapter, DP_SEL, DP_SEL_DPRDY_, - 1, 40, 100, 100)) + if (lan743x_csr_wait_for_bit_atomic(adapter, DP_SEL, + DP_SEL_DPRDY_, + 1, 40, 100, 100)) return -EIO; } From 2d57a1590f4d8c516f5aaf8fd5bb4f52d67275d8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Jun 2023 12:08:14 -0700 Subject: [PATCH 004/224] workqueue: clean up WORK_* constant types, clarify masking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit afa4bb778e48d79e4a642ed41e3b4e0de7489a6c upstream. Dave Airlie reports that gcc-13.1.1 has started complaining about some of the workqueue code in 32-bit arm builds: kernel/workqueue.c: In function ‘get_work_pwq’: kernel/workqueue.c:713:24: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] 713 | return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); | ^ [ ... a couple of other cases ... ] and while it's not immediately clear exactly why gcc started complaining about it now, I suspect it's some C23-induced enum type handlign fixup in gcc-13 is the cause. Whatever the reason for starting to complain, the code and data types are indeed disgusting enough that the complaint is warranted. The wq code ends up creating various "helper constants" (like that WORK_STRUCT_WQ_DATA_MASK) using an enum type, which is all kinds of confused. The mask needs to be 'unsigned long', not some unspecified enum type. To make matters worse, the actual "mask and cast to a pointer" is repeated a couple of times, and the cast isn't even always done to the right pointer, but - as the error case above - to a 'void *' with then the compiler finishing the job. That's now how we roll in the kernel. So create the masks using the proper types rather than some ambiguous enumeration, and use a nice helper that actually does the type conversion in one well-defined place. Incidentally, this magically makes clang generate better code. That, admittedly, is really just a sign of clang having been seriously confused before, and cleaning up the typing unconfuses the compiler too. Reported-by: Dave Airlie Link: https://lore.kernel.org/lkml/CAPM=9twNnV4zMCvrPkw3H-ajZOH-01JVh_kDrxdPYQErz8ZTdA@mail.gmail.com/ Cc: Arnd Bergmann Cc: Tejun Heo Cc: Nick Desaulniers Cc: Nathan Chancellor Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/workqueue.h | 15 ++++++++------- kernel/workqueue.c | 13 ++++++++----- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index a0143dd24430..3ca41b9da647 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -68,7 +68,6 @@ enum { WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, - WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING), /* * When a work item is off queue, its high bits point to the last @@ -79,12 +78,6 @@ enum { WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, - WORK_OFFQ_POOL_NONE = (1LU << WORK_OFFQ_POOL_BITS) - 1, - - /* convenience constants */ - WORK_STRUCT_FLAG_MASK = (1UL << WORK_STRUCT_FLAG_BITS) - 1, - WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK, - WORK_STRUCT_NO_POOL = (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT, /* bit mask for work_busy() return values */ WORK_BUSY_PENDING = 1 << 0, @@ -94,6 +87,14 @@ enum { WORKER_DESC_LEN = 24, }; +/* Convenience constants - of type 'unsigned long', not 'enum'! */ +#define WORK_OFFQ_CANCELING (1ul << __WORK_OFFQ_CANCELING) +#define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) +#define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) + +#define WORK_STRUCT_FLAG_MASK ((1ul << WORK_STRUCT_FLAG_BITS) - 1) +#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK) + struct work_struct { atomic_long_t data; struct list_head entry; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4dd494f786bc..1e1557e42d2c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -698,12 +698,17 @@ static void clear_work_data(struct work_struct *work) set_work_data(work, WORK_STRUCT_NO_POOL, 0); } +static inline struct pool_workqueue *work_struct_pwq(unsigned long data) +{ + return (struct pool_workqueue *)(data & WORK_STRUCT_WQ_DATA_MASK); +} + static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) - return (void *)(data & WORK_STRUCT_WQ_DATA_MASK); + return work_struct_pwq(data); else return NULL; } @@ -731,8 +736,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work) assert_rcu_or_pool_mutex(); if (data & WORK_STRUCT_PWQ) - return ((struct pool_workqueue *) - (data & WORK_STRUCT_WQ_DATA_MASK))->pool; + return work_struct_pwq(data)->pool; pool_id = data >> WORK_OFFQ_POOL_SHIFT; if (pool_id == WORK_OFFQ_POOL_NONE) @@ -753,8 +757,7 @@ static int get_work_pool_id(struct work_struct *work) unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) - return ((struct pool_workqueue *) - (data & WORK_STRUCT_WQ_DATA_MASK))->pool->id; + return work_struct_pwq(data)->pool->id; return data >> WORK_OFFQ_POOL_SHIFT; } From 869ef4f2965bbb91157dad220133f76c16faba9b Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 24 Jun 2023 12:35:39 +0900 Subject: [PATCH 005/224] ksmbd: add missing compound request handing in some commands commit 7b7d709ef7cf285309157fb94c33f625dd22c5e1 upstream. This patch add the compound request handling to the some commands. Existing clients do not send these commands as compound requests, but ksmbd should consider that they may come. Cc: stable@vger.kernel.org Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 78 ++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 25 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 2b7c0ba6a77d..af0428e56be3 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1926,14 +1926,16 @@ out_err: int smb2_tree_connect(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; - struct smb2_tree_connect_req *req = smb2_get_msg(work->request_buf); - struct smb2_tree_connect_rsp *rsp = smb2_get_msg(work->response_buf); + struct smb2_tree_connect_req *req; + struct smb2_tree_connect_rsp *rsp; struct ksmbd_session *sess = work->sess; char *treename = NULL, *name = NULL; struct ksmbd_tree_conn_status status; struct ksmbd_share_config *share; int rc = -EINVAL; + WORK_BUFFERS(work, req, rsp); + treename = smb_strndup_from_utf16(req->Buffer, le16_to_cpu(req->PathLength), true, conn->local_nls); @@ -2102,19 +2104,19 @@ static int smb2_create_open_flags(bool file_present, __le32 access, */ int smb2_tree_disconnect(struct ksmbd_work *work) { - struct smb2_tree_disconnect_rsp *rsp = smb2_get_msg(work->response_buf); + struct smb2_tree_disconnect_rsp *rsp; + struct smb2_tree_disconnect_req *req; struct ksmbd_session *sess = work->sess; struct ksmbd_tree_connect *tcon = work->tcon; + WORK_BUFFERS(work, req, rsp); + rsp->StructureSize = cpu_to_le16(4); inc_rfc1001_len(work->response_buf, 4); ksmbd_debug(SMB, "request\n"); if (!tcon || test_and_set_bit(TREE_CONN_EXPIRE, &tcon->status)) { - struct smb2_tree_disconnect_req *req = - smb2_get_msg(work->request_buf); - ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId); rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED; @@ -2137,10 +2139,14 @@ int smb2_tree_disconnect(struct ksmbd_work *work) int smb2_session_logoff(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; - struct smb2_logoff_rsp *rsp = smb2_get_msg(work->response_buf); + struct smb2_logoff_req *req; + struct smb2_logoff_rsp *rsp; struct ksmbd_session *sess; - struct smb2_logoff_req *req = smb2_get_msg(work->request_buf); - u64 sess_id = le64_to_cpu(req->hdr.SessionId); + u64 sess_id; + + WORK_BUFFERS(work, req, rsp); + + sess_id = le64_to_cpu(req->hdr.SessionId); rsp->StructureSize = cpu_to_le16(4); inc_rfc1001_len(work->response_buf, 4); @@ -2180,12 +2186,14 @@ int smb2_session_logoff(struct ksmbd_work *work) */ static noinline int create_smb2_pipe(struct ksmbd_work *work) { - struct smb2_create_rsp *rsp = smb2_get_msg(work->response_buf); - struct smb2_create_req *req = smb2_get_msg(work->request_buf); + struct smb2_create_rsp *rsp; + struct smb2_create_req *req; int id; int err; char *name; + WORK_BUFFERS(work, req, rsp); + name = smb_strndup_from_utf16(req->Buffer, le16_to_cpu(req->NameLength), 1, work->conn->local_nls); if (IS_ERR(name)) { @@ -5321,8 +5329,10 @@ int smb2_query_info(struct ksmbd_work *work) static noinline int smb2_close_pipe(struct ksmbd_work *work) { u64 id; - struct smb2_close_req *req = smb2_get_msg(work->request_buf); - struct smb2_close_rsp *rsp = smb2_get_msg(work->response_buf); + struct smb2_close_req *req; + struct smb2_close_rsp *rsp; + + WORK_BUFFERS(work, req, rsp); id = req->VolatileFileId; ksmbd_session_rpc_close(work->sess, id); @@ -5464,6 +5474,9 @@ int smb2_echo(struct ksmbd_work *work) { struct smb2_echo_rsp *rsp = smb2_get_msg(work->response_buf); + if (work->next_smb2_rcv_hdr_off) + rsp = ksmbd_resp_buf_next(work); + rsp->StructureSize = cpu_to_le16(4); rsp->Reserved = 0; inc_rfc1001_len(work->response_buf, 4); @@ -6178,8 +6191,10 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work) int nbytes = 0, err; u64 id; struct ksmbd_rpc_command *rpc_resp; - struct smb2_read_req *req = smb2_get_msg(work->request_buf); - struct smb2_read_rsp *rsp = smb2_get_msg(work->response_buf); + struct smb2_read_req *req; + struct smb2_read_rsp *rsp; + + WORK_BUFFERS(work, req, rsp); id = req->VolatileFileId; @@ -6427,14 +6442,16 @@ out: */ static noinline int smb2_write_pipe(struct ksmbd_work *work) { - struct smb2_write_req *req = smb2_get_msg(work->request_buf); - struct smb2_write_rsp *rsp = smb2_get_msg(work->response_buf); + struct smb2_write_req *req; + struct smb2_write_rsp *rsp; struct ksmbd_rpc_command *rpc_resp; u64 id = 0; int err = 0, ret = 0; char *data_buf; size_t length; + WORK_BUFFERS(work, req, rsp); + length = le32_to_cpu(req->Length); id = req->VolatileFileId; @@ -6703,6 +6720,9 @@ int smb2_cancel(struct ksmbd_work *work) struct ksmbd_work *iter; struct list_head *command_list; + if (work->next_smb2_rcv_hdr_off) + hdr = ksmbd_resp_buf_next(work); + ksmbd_debug(SMB, "smb2 cancel called on mid %llu, async flags 0x%x\n", hdr->MessageId, hdr->Flags); @@ -6862,8 +6882,8 @@ static inline bool lock_defer_pending(struct file_lock *fl) */ int smb2_lock(struct ksmbd_work *work) { - struct smb2_lock_req *req = smb2_get_msg(work->request_buf); - struct smb2_lock_rsp *rsp = smb2_get_msg(work->response_buf); + struct smb2_lock_req *req; + struct smb2_lock_rsp *rsp; struct smb2_lock_element *lock_ele; struct ksmbd_file *fp = NULL; struct file_lock *flock = NULL; @@ -6880,6 +6900,8 @@ int smb2_lock(struct ksmbd_work *work) LIST_HEAD(rollback_list); int prior_lock = 0; + WORK_BUFFERS(work, req, rsp); + ksmbd_debug(SMB, "Received lock request\n"); fp = ksmbd_lookup_fd_slow(work, req->VolatileFileId, req->PersistentFileId); if (!fp) { @@ -7992,8 +8014,8 @@ out: */ static void smb20_oplock_break_ack(struct ksmbd_work *work) { - struct smb2_oplock_break *req = smb2_get_msg(work->request_buf); - struct smb2_oplock_break *rsp = smb2_get_msg(work->response_buf); + struct smb2_oplock_break *req; + struct smb2_oplock_break *rsp; struct ksmbd_file *fp; struct oplock_info *opinfo = NULL; __le32 err = 0; @@ -8002,6 +8024,8 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work) char req_oplevel = 0, rsp_oplevel = 0; unsigned int oplock_change_type; + WORK_BUFFERS(work, req, rsp); + volatile_id = req->VolatileFid; persistent_id = req->PersistentFid; req_oplevel = req->OplockLevel; @@ -8136,8 +8160,8 @@ static int check_lease_state(struct lease *lease, __le32 req_state) static void smb21_lease_break_ack(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; - struct smb2_lease_ack *req = smb2_get_msg(work->request_buf); - struct smb2_lease_ack *rsp = smb2_get_msg(work->response_buf); + struct smb2_lease_ack *req; + struct smb2_lease_ack *rsp; struct oplock_info *opinfo; __le32 err = 0; int ret = 0; @@ -8145,6 +8169,8 @@ static void smb21_lease_break_ack(struct ksmbd_work *work) __le32 lease_state; struct lease *lease; + WORK_BUFFERS(work, req, rsp); + ksmbd_debug(OPLOCK, "smb21 lease break, lease state(0x%x)\n", le32_to_cpu(req->LeaseState)); opinfo = lookup_lease_in_table(conn, req->LeaseKey); @@ -8270,8 +8296,10 @@ err_out: */ int smb2_oplock_break(struct ksmbd_work *work) { - struct smb2_oplock_break *req = smb2_get_msg(work->request_buf); - struct smb2_oplock_break *rsp = smb2_get_msg(work->response_buf); + struct smb2_oplock_break *req; + struct smb2_oplock_break *rsp; + + WORK_BUFFERS(work, req, rsp); switch (le16_to_cpu(req->StructureSize)) { case OP_BREAK_STRUCT_SIZE_20: From ef572ffa8eb44111eed2925fbb2adca78bdcbf61 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 24 Jun 2023 12:33:09 +0900 Subject: [PATCH 006/224] ksmbd: fix out of bounds read in smb2_sess_setup commit 98422bdd4cb3ca4d08844046f6507d7ec2c2b8d8 upstream. ksmbd does not consider the case of that smb2 session setup is in compound request. If this is the second payload of the compound, OOB read issue occurs while processing the first payload in the smb2_sess_setup(). Cc: stable@vger.kernel.org Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-21355 Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index af0428e56be3..26cf73d664f9 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1339,9 +1339,8 @@ static int decode_negotiation_token(struct ksmbd_conn *conn, static int ntlm_negotiate(struct ksmbd_work *work, struct negotiate_message *negblob, - size_t negblob_len) + size_t negblob_len, struct smb2_sess_setup_rsp *rsp) { - struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf); struct challenge_message *chgblob; unsigned char *spnego_blob = NULL; u16 spnego_blob_len; @@ -1446,10 +1445,10 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn, return user; } -static int ntlm_authenticate(struct ksmbd_work *work) +static int ntlm_authenticate(struct ksmbd_work *work, + struct smb2_sess_setup_req *req, + struct smb2_sess_setup_rsp *rsp) { - struct smb2_sess_setup_req *req = smb2_get_msg(work->request_buf); - struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf); struct ksmbd_conn *conn = work->conn; struct ksmbd_session *sess = work->sess; struct channel *chann = NULL; @@ -1582,10 +1581,10 @@ binding_session: } #ifdef CONFIG_SMB_SERVER_KERBEROS5 -static int krb5_authenticate(struct ksmbd_work *work) +static int krb5_authenticate(struct ksmbd_work *work, + struct smb2_sess_setup_req *req, + struct smb2_sess_setup_rsp *rsp) { - struct smb2_sess_setup_req *req = smb2_get_msg(work->request_buf); - struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf); struct ksmbd_conn *conn = work->conn; struct ksmbd_session *sess = work->sess; char *in_blob, *out_blob; @@ -1662,7 +1661,9 @@ static int krb5_authenticate(struct ksmbd_work *work) return 0; } #else -static int krb5_authenticate(struct ksmbd_work *work) +static int krb5_authenticate(struct ksmbd_work *work, + struct smb2_sess_setup_req *req, + struct smb2_sess_setup_rsp *rsp) { return -EOPNOTSUPP; } @@ -1671,8 +1672,8 @@ static int krb5_authenticate(struct ksmbd_work *work) int smb2_sess_setup(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; - struct smb2_sess_setup_req *req = smb2_get_msg(work->request_buf); - struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf); + struct smb2_sess_setup_req *req; + struct smb2_sess_setup_rsp *rsp; struct ksmbd_session *sess; struct negotiate_message *negblob; unsigned int negblob_len, negblob_off; @@ -1680,6 +1681,8 @@ int smb2_sess_setup(struct ksmbd_work *work) ksmbd_debug(SMB, "Received request for session setup\n"); + WORK_BUFFERS(work, req, rsp); + rsp->StructureSize = cpu_to_le16(9); rsp->SessionFlags = 0; rsp->SecurityBufferOffset = cpu_to_le16(72); @@ -1801,7 +1804,7 @@ int smb2_sess_setup(struct ksmbd_work *work) if (conn->preferred_auth_mech & (KSMBD_AUTH_KRB5 | KSMBD_AUTH_MSKRB5)) { - rc = krb5_authenticate(work); + rc = krb5_authenticate(work, req, rsp); if (rc) { rc = -EINVAL; goto out_err; @@ -1815,7 +1818,7 @@ int smb2_sess_setup(struct ksmbd_work *work) sess->Preauth_HashValue = NULL; } else if (conn->preferred_auth_mech == KSMBD_AUTH_NTLMSSP) { if (negblob->MessageType == NtLmNegotiate) { - rc = ntlm_negotiate(work, negblob, negblob_len); + rc = ntlm_negotiate(work, negblob, negblob_len, rsp); if (rc) goto out_err; rsp->hdr.Status = @@ -1828,7 +1831,7 @@ int smb2_sess_setup(struct ksmbd_work *work) le16_to_cpu(rsp->SecurityBufferLength) - 1); } else if (negblob->MessageType == NtLmAuthenticate) { - rc = ntlm_authenticate(work); + rc = ntlm_authenticate(work, req, rsp); if (rc) goto out_err; From d48029c655e43562cfaf64df2984bc7340bde780 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 20 Jun 2023 08:22:02 -0300 Subject: [PATCH 007/224] drm/panel: simple: Add connector_type for innolux_at043tn24 [ Upstream commit 2c56a751845ddfd3078ebe79981aaaa182629163 ] The innolux at043tn24 display is a parallel LCD. Pass the 'connector_type' information to avoid the following warning: panel-simple panel: Specify missing connector_type Signed-off-by: Fabio Estevam Fixes: 41bcceb4de9c ("drm/panel: simple: Add support for Innolux AT043TN24") Reviewed-by: Sam Ravnborg Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230620112202.654981-1-festevam@gmail.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 7ca00b032336..e49d352339ff 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -2117,6 +2117,7 @@ static const struct panel_desc innolux_at043tn24 = { .height = 54, }, .bus_format = MEDIA_BUS_FMT_RGB888_1X24, + .connector_type = DRM_MODE_CONNECTOR_DPI, .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE, }; From 02d43b8a4f0a0e75abaec934aa69f18cde1dd1e8 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 13 Jun 2023 06:58:13 -0700 Subject: [PATCH 008/224] drm/bridge: ti-sn65dsi86: Fix auxiliary bus lifetime [ Upstream commit 7aa83fbd712a6f08ffa67890061f26d140c2a84f ] Memory for the "struct device" for any given device isn't supposed to be released until the device's release() is called. This is important because someone might be holding a kobject reference to the "struct device" and might try to access one of its members even after any other cleanup/uninitialization has happened. Code analysis of ti-sn65dsi86 shows that this isn't quite right. When the code was written, it was believed that we could rely on the fact that the child devices would all be freed before the parent devices and thus we didn't need to worry about a release() function. While I still believe that the parent's "struct device" is guaranteed to outlive the child's "struct device" (because the child holds a kobject reference to the parent), the parent's "devm" allocated memory is a different story. That appears to be freed much earlier. Let's make this better for ti-sn65dsi86 by allocating each auxiliary with kzalloc and then free that memory in the release(). Fixes: bf73537f411b ("drm/bridge: ti-sn65dsi86: Break GPIO and MIPI-to-eDP bridge into sub-drivers") Suggested-by: Stephen Boyd Reviewed-by: Stephen Boyd Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20230613065812.v2.1.I24b838a5b4151fb32bccd6f36397998ea2df9fbb@changeid Signed-off-by: Sasha Levin --- drivers/gpu/drm/bridge/ti-sn65dsi86.c | 35 +++++++++++++++++---------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index d16775c973c4..b89f7f7ca188 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -170,10 +170,10 @@ * @pwm_refclk_freq: Cache for the reference clock input to the PWM. */ struct ti_sn65dsi86 { - struct auxiliary_device bridge_aux; - struct auxiliary_device gpio_aux; - struct auxiliary_device aux_aux; - struct auxiliary_device pwm_aux; + struct auxiliary_device *bridge_aux; + struct auxiliary_device *gpio_aux; + struct auxiliary_device *aux_aux; + struct auxiliary_device *pwm_aux; struct device *dev; struct regmap *regmap; @@ -468,27 +468,34 @@ static void ti_sn65dsi86_delete_aux(void *data) auxiliary_device_delete(data); } -/* - * AUX bus docs say that a non-NULL release is mandatory, but it makes no - * sense for the model used here where all of the aux devices are allocated - * in the single shared structure. We'll use this noop as a workaround. - */ -static void ti_sn65dsi86_noop(struct device *dev) {} +static void ti_sn65dsi86_aux_device_release(struct device *dev) +{ + struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev); + + kfree(aux); +} static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata, - struct auxiliary_device *aux, + struct auxiliary_device **aux_out, const char *name) { struct device *dev = pdata->dev; + struct auxiliary_device *aux; int ret; + aux = kzalloc(sizeof(*aux), GFP_KERNEL); + if (!aux) + return -ENOMEM; + aux->name = name; aux->dev.parent = dev; - aux->dev.release = ti_sn65dsi86_noop; + aux->dev.release = ti_sn65dsi86_aux_device_release; device_set_of_node_from_dev(&aux->dev, dev); ret = auxiliary_device_init(aux); - if (ret) + if (ret) { + kfree(aux); return ret; + } ret = devm_add_action_or_reset(dev, ti_sn65dsi86_uninit_aux, aux); if (ret) return ret; @@ -497,6 +504,8 @@ static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata, if (ret) return ret; ret = devm_add_action_or_reset(dev, ti_sn65dsi86_delete_aux, aux); + if (!ret) + *aux_out = aux; return ret; } From 24b24863a0128152db3fb3aa33dc11b18a9b6792 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Mon, 26 Jun 2023 15:01:03 +0200 Subject: [PATCH 009/224] swiotlb: always set the number of areas before allocating the pool [ Upstream commit aabd12609f91155f26584508b01f548215cc3c0c ] The number of areas defaults to the number of possible CPUs. However, the total number of slots may have to be increased after adjusting the number of areas. Consequently, the number of areas must be determined before allocating the memory pool. This is even explained with a comment in swiotlb_init_remap(), but swiotlb_init_late() adjusts the number of areas after slots are already allocated. The areas may end up being smaller than IO_TLB_SEGSIZE, which breaks per-area locking. While fixing swiotlb_init_late(), move all relevant comments before the definition of swiotlb_adjust_nareas() and convert them to kernel-doc. Fixes: 20347fca71a3 ("swiotlb: split up the global swiotlb lock") Signed-off-by: Petr Tesarik Reviewed-by: Roberto Sassu Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- kernel/dma/swiotlb.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 7f4ad5e70b40..396106541254 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -117,9 +117,16 @@ static bool round_up_default_nslabs(void) return true; } +/** + * swiotlb_adjust_nareas() - adjust the number of areas and slots + * @nareas: Desired number of areas. Zero is treated as 1. + * + * Adjust the default number of areas in a memory pool. + * The default size of the memory pool may also change to meet minimum area + * size requirements. + */ static void swiotlb_adjust_nareas(unsigned int nareas) { - /* use a single area when non is specified */ if (!nareas) nareas = 1; else if (!is_power_of_2(nareas)) @@ -318,10 +325,6 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, if (swiotlb_force_disable) return; - /* - * default_nslabs maybe changed when adjust area number. - * So allocate bounce buffer after adjusting area number. - */ if (!default_nareas) swiotlb_adjust_nareas(num_possible_cpus()); @@ -398,6 +401,9 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, if (swiotlb_force_disable) return 0; + if (!default_nareas) + swiotlb_adjust_nareas(num_possible_cpus()); + retry: order = get_order(nslabs << IO_TLB_SHIFT); nslabs = SLABS_PER_PAGE << order; @@ -432,9 +438,6 @@ retry: (PAGE_SIZE << order) >> 20); } - if (!default_nareas) - swiotlb_adjust_nareas(num_possible_cpus()); - area_order = get_order(array_size(sizeof(*mem->areas), default_nareas)); mem->areas = (struct io_tlb_area *) From fc3db7fbdf58a2a091764889f15f6e37978aa2de Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Mon, 31 Oct 2022 19:13:27 +1100 Subject: [PATCH 010/224] swiotlb: reduce the swiotlb buffer size on allocation failure [ Upstream commit 8d58aa484920c4f9be4834a7aeb446cdced21a37 ] At the moment the AMD encrypted platform reserves 6% of RAM for SWIOTLB or 1GB, whichever is less. However it is possible that there is no block big enough in the low memory which make SWIOTLB allocation fail and the kernel continues without DMA. In such case a VM hangs on DMA. This moves alloc+remap to a helper and calls it from a loop where the size is halved on each iteration. This updates default_nslabs on successful allocation which looks like an oversight as not doing so should have broken callers of swiotlb_size_or_default(). Signed-off-by: Alexey Kardashevskiy Reviewed-by: Pankaj Gupta Signed-off-by: Christoph Hellwig Stable-dep-of: 8ac04063354a ("swiotlb: reduce the number of areas to match actual memory pool size") Signed-off-by: Sasha Levin --- kernel/dma/swiotlb.c | 63 +++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 396106541254..cc0c55ed2042 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -307,6 +307,37 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, return; } +static void *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags, + int (*remap)(void *tlb, unsigned long nslabs)) +{ + size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT); + void *tlb; + + /* + * By default allocate the bounce buffer memory from low memory, but + * allow to pick a location everywhere for hypervisors with guest + * memory encryption. + */ + if (flags & SWIOTLB_ANY) + tlb = memblock_alloc(bytes, PAGE_SIZE); + else + tlb = memblock_alloc_low(bytes, PAGE_SIZE); + + if (!tlb) { + pr_warn("%s: Failed to allocate %zu bytes tlb structure\n", + __func__, bytes); + return NULL; + } + + if (remap && remap(tlb, nslabs) < 0) { + memblock_free(tlb, PAGE_ALIGN(bytes)); + pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); + return NULL; + } + + return tlb; +} + /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. @@ -317,7 +348,6 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, struct io_tlb_mem *mem = &io_tlb_default_mem; unsigned long nslabs; size_t alloc_size; - size_t bytes; void *tlb; if (!addressing_limit && !swiotlb_force_bounce) @@ -329,31 +359,16 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, swiotlb_adjust_nareas(num_possible_cpus()); nslabs = default_nslabs; - /* - * By default allocate the bounce buffer memory from low memory, but - * allow to pick a location everywhere for hypervisors with guest - * memory encryption. - */ -retry: - bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT); - if (flags & SWIOTLB_ANY) - tlb = memblock_alloc(bytes, PAGE_SIZE); - else - tlb = memblock_alloc_low(bytes, PAGE_SIZE); - if (!tlb) { - pr_warn("%s: failed to allocate tlb structure\n", __func__); - return; + while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) { + if (nslabs <= IO_TLB_MIN_SLABS) + return; + nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); } - if (remap && remap(tlb, nslabs) < 0) { - memblock_free(tlb, PAGE_ALIGN(bytes)); - - nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); - if (nslabs >= IO_TLB_MIN_SLABS) - goto retry; - - pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); - return; + if (default_nslabs != nslabs) { + pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs", + default_nslabs, nslabs); + default_nslabs = nslabs; } alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs)); From fd5b64c1cf41c15303b19bead2d9a0c3d30911bd Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Mon, 26 Jun 2023 15:01:04 +0200 Subject: [PATCH 011/224] swiotlb: reduce the number of areas to match actual memory pool size [ Upstream commit 8ac04063354a01a484d2e55d20ed1958aa0d3392 ] Although the desired size of the SWIOTLB memory pool is increased in swiotlb_adjust_nareas() to match the number of areas, the actual allocation may be smaller, which may require reducing the number of areas. For example, Xen uses swiotlb_init_late(), which in turn uses the page allocator. On x86, page size is 4 KiB and MAX_ORDER is 10 (1024 pages), resulting in a maximum memory pool size of 4 MiB. This corresponds to 2048 slots of 2 KiB each. The minimum area size is 128 (IO_TLB_SEGSIZE), allowing at most 2048 / 128 = 16 areas. If num_possible_cpus() is greater than the maximum number of areas, areas are smaller than IO_TLB_SEGSIZE and contiguous groups of free slots will span multiple areas. When allocating and freeing slots, only one area will be properly locked, causing race conditions on the unlocked slots and ultimately data corruption, kernel hangs and crashes. Fixes: 20347fca71a3 ("swiotlb: split up the global swiotlb lock") Signed-off-by: Petr Tesarik Reviewed-by: Roberto Sassu Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- kernel/dma/swiotlb.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index cc0c55ed2042..491d3c86c228 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -140,6 +140,23 @@ static void swiotlb_adjust_nareas(unsigned int nareas) (default_nslabs << IO_TLB_SHIFT) >> 20); } +/** + * limit_nareas() - get the maximum number of areas for a given memory pool size + * @nareas: Desired number of areas. + * @nslots: Total number of slots in the memory pool. + * + * Limit the number of areas to the maximum possible number of areas in + * a memory pool of the given size. + * + * Return: Maximum possible number of areas. + */ +static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots) +{ + if (nslots < nareas * IO_TLB_SEGSIZE) + return nslots / IO_TLB_SEGSIZE; + return nareas; +} + static int __init setup_io_tlb_npages(char *str) { @@ -347,6 +364,7 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, { struct io_tlb_mem *mem = &io_tlb_default_mem; unsigned long nslabs; + unsigned int nareas; size_t alloc_size; void *tlb; @@ -359,10 +377,12 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, swiotlb_adjust_nareas(num_possible_cpus()); nslabs = default_nslabs; + nareas = limit_nareas(default_nareas, nslabs); while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) { if (nslabs <= IO_TLB_MIN_SLABS) return; nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); + nareas = limit_nareas(nareas, nslabs); } if (default_nslabs != nslabs) { @@ -408,6 +428,7 @@ int swiotlb_init_late(size_t size, gfp_t gfp_mask, { struct io_tlb_mem *mem = &io_tlb_default_mem; unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE); + unsigned int nareas; unsigned char *vstart = NULL; unsigned int order, area_order; bool retried = false; @@ -453,8 +474,8 @@ retry: (PAGE_SIZE << order) >> 20); } - area_order = get_order(array_size(sizeof(*mem->areas), - default_nareas)); + nareas = limit_nareas(default_nareas, nslabs); + area_order = get_order(array_size(sizeof(*mem->areas), nareas)); mem->areas = (struct io_tlb_area *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order); if (!mem->areas) @@ -468,7 +489,7 @@ retry: set_memory_decrypted((unsigned long)vstart, (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true, - default_nareas); + nareas); swiotlb_print_info(); return 0; From 1294311ce9ad812156223613180d4efbabbaf85b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 15 Jun 2023 22:16:02 +0200 Subject: [PATCH 012/224] drm/panel: simple: Add Powertip PH800480T013 drm_display_mode flags [ Upstream commit 1c519980aced3da1fae37c1339cf43b24eccdee7 ] Add missing drm_display_mode DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC flags. Those are used by various bridges in the pipeline to correctly configure its sync signals polarity. Fixes: d69de69f2be1 ("drm/panel: simple: Add Powertip PH800480T013 panel") Signed-off-by: Marek Vasut Reviewed-by: Sam Ravnborg Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20230615201602.565948-1-marex@denx.de Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index e49d352339ff..f851aaf2c591 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3110,6 +3110,7 @@ static const struct drm_display_mode powertip_ph800480t013_idf02_mode = { .vsync_start = 480 + 49, .vsync_end = 480 + 49 + 2, .vtotal = 480 + 49 + 2 + 22, + .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC, }; static const struct panel_desc powertip_ph800480t013_idf02 = { From dfaed769b935f359f37587f6f2e7ff9ef1ac5fb2 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 9 Jun 2023 17:40:23 -0700 Subject: [PATCH 013/224] ice: Fix max_rate check while configuring TX rate limits [ Upstream commit 5f16da6ee6ac32e6c8098bc4cfcc4f170694f9da ] Remove incorrect check in ice_validate_mqprio_opt() that limits filter configuration when sum of max_rates of all TCs exceeds the link speed. The max rate of each TC is unrelated to value used by other TCs and is valid as long as it is less than link speed. Fixes: fbc7b27af0f9 ("ice: enable ndo_setup_tc support for mqprio_qdisc") Signed-off-by: Sridhar Samudrala Signed-off-by: Sudheer Mogilappagari Tested-by: Bharathi Sreenivas Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 7a5ec3ce3407..8f77088900e9 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -7852,10 +7852,10 @@ static int ice_validate_mqprio_qopt(struct ice_vsi *vsi, struct tc_mqprio_qopt_offload *mqprio_qopt) { - u64 sum_max_rate = 0, sum_min_rate = 0; int non_power_of_2_qcount = 0; struct ice_pf *pf = vsi->back; int max_rss_q_cnt = 0; + u64 sum_min_rate = 0; struct device *dev; int i, speed; u8 num_tc; @@ -7871,6 +7871,7 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, dev = ice_pf_to_dev(pf); vsi->ch_rss_size = 0; num_tc = mqprio_qopt->qopt.num_tc; + speed = ice_get_link_speed_kbps(vsi); for (i = 0; num_tc; i++) { int qcount = mqprio_qopt->qopt.count[i]; @@ -7911,7 +7912,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, */ max_rate = mqprio_qopt->max_rate[i]; max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR); - sum_max_rate += max_rate; /* min_rate is minimum guaranteed rate and it can't be zero */ min_rate = mqprio_qopt->min_rate[i]; @@ -7924,6 +7924,12 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, return -EINVAL; } + if (max_rate && max_rate > speed) { + dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n", + i, max_rate, speed); + return -EINVAL; + } + iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem); if (rem) { dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps", @@ -7961,12 +7967,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi, (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) return -EINVAL; - speed = ice_get_link_speed_kbps(vsi); - if (sum_max_rate && sum_max_rate > (u64)speed) { - dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n", - sum_max_rate, speed); - return -EINVAL; - } if (sum_min_rate && sum_min_rate > (u64)speed) { dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n", sum_min_rate, speed); From 68b654e9eb5b18776224f976ebd5a486d55a4014 Mon Sep 17 00:00:00 2001 From: Muhammad Husaini Zulkifli Date: Wed, 17 May 2023 08:18:12 +0800 Subject: [PATCH 014/224] igc: Remove delay during TX ring configuration [ Upstream commit cca28ceac7c7857bc2d313777017585aef00bcc4 ] Remove unnecessary delay during the TX ring configuration. This will cause delay, especially during link down and link up activity. Furthermore, old SKUs like as I225 will call the reset_adapter to reset the controller during TSN mode Gate Control List (GCL) setting. This will add more time to the configuration of the real-time use case. It doesn't mentioned about this delay in the Software User Manual. It might have been ported from legacy code I210 in the past. Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings") Signed-off-by: Muhammad Husaini Zulkifli Acked-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b67a6a81474f..93e90c353f1a 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -709,7 +709,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter, /* disable the queue */ wr32(IGC_TXDCTL(reg_idx), 0); wrfl(); - mdelay(10); wr32(IGC_TDLEN(reg_idx), ring->count * sizeof(union igc_adv_tx_desc)); From 7ca1914cbd3bb044637cbe5bc053f4908bf067d4 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 28 Jun 2023 08:59:34 +0800 Subject: [PATCH 015/224] net/mlx5e: fix double free in mlx5e_destroy_flow_table [ Upstream commit 884abe45a9014d0de2e6edb0630dfd64f23f1d1b ] In function accel_fs_tcp_create_groups(), when the ft->g memory is successfully allocated but the 'in' memory fails to be allocated, the memory pointed to by ft->g is released once. And in function accel_fs_tcp_create_table, mlx5e_destroy_flow_table is called to release the memory pointed to by ft->g again. This will cause double free problem. Fixes: c062d52ac24c ("net/mlx5e: Receive flow steering framework for accelerated TCP flows") Signed-off-by: Zhengchao Shao Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index d7c020f72401..06c47404996b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -190,6 +190,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft, in = kvzalloc(inlen, GFP_KERNEL); if (!in || !ft->g) { kfree(ft->g); + ft->g = NULL; kvfree(in); return -ENOMEM; } From 75df2fe6d160e16be880aacacd521b135d7177c9 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 30 Jun 2023 09:49:02 +0800 Subject: [PATCH 016/224] net/mlx5e: fix memory leak in mlx5e_fs_tt_redirect_any_create [ Upstream commit 3250affdc658557a41df9c5fb567723e421f8bf2 ] The memory pointed to by the fs->any pointer is not freed in the error path of mlx5e_fs_tt_redirect_any_create, which can lead to a memory leak. Fix by freeing the memory in the error path, thereby making the error path identical to mlx5e_fs_tt_redirect_any_destroy(). Fixes: 0f575c20bf06 ("net/mlx5e: Introduce Flow Steering ANY API") Signed-off-by: Zhengchao Shao Reviewed-by: Simon Horman Reviewed-by: Rahul Rameshbabu Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index 03cb79adf912..be83ad9db82a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -594,7 +594,7 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) err = fs_any_create_table(fs); if (err) - return err; + goto err_free_any; err = fs_any_enable(fs); if (err) @@ -606,8 +606,8 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs) err_destroy_table: fs_any_destroy_table(fs_any); - - kfree(fs_any); +err_free_any: mlx5e_fs_set_any(fs, NULL); + kfree(fs_any); return err; } From 83a8f7337a14cdb215c76a8f4cf3f3be8b59177d Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 30 Jun 2023 09:49:03 +0800 Subject: [PATCH 017/224] net/mlx5e: fix memory leak in mlx5e_ptp_open [ Upstream commit d543b649ffe58a0cb4b6948b3305069c5980a1fa ] When kvzalloc_node or kvzalloc failed in mlx5e_ptp_open, the memory pointed by "c" or "cparams" is not freed, which can lead to a memory leak. Fix by freeing the array in the error path. Fixes: 145e5637d941 ("net/mlx5e: Add TX PTP port object support") Signed-off-by: Zhengchao Shao Reviewed-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Simon Horman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index efd02ce4425d..72b4781f0eb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -729,8 +729,10 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev))); cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL); - if (!c || !cparams) - return -ENOMEM; + if (!c || !cparams) { + err = -ENOMEM; + goto err_free; + } c->priv = priv; c->mdev = priv->mdev; From e962fd5933ebc767ce2a1cf7b7c85035b5a5d04c Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 8 Jun 2023 09:32:10 +0200 Subject: [PATCH 018/224] net/mlx5e: Check for NOT_READY flag state after locking [ Upstream commit 65e64640e97c0f223e77f9ea69b5a46186b93470 ] Currently the check for NOT_READY flag is performed before obtaining the necessary lock. This opens a possibility for race condition when the flow is concurrently removed from unready_flows list by the workqueue task, which causes a double-removal from the list and a crash[0]. Fix the issue by moving the flag check inside the section protected by uplink_priv->unready_flows_lock mutex. [0]: [44376.389654] general protection fault, probably for non-canonical address 0xdead000000000108: 0000 [#1] SMP [44376.391665] CPU: 7 PID: 59123 Comm: tc Not tainted 6.4.0-rc4+ #1 [44376.392984] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [44376.395342] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.396857] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06 [44376.399167] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246 [44376.399680] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00 [44376.400337] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0 [44376.401001] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001 [44376.401663] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000 [44376.402342] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000 [44376.402999] FS: 00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000 [44376.403787] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [44376.404343] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0 [44376.405004] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [44376.405665] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [44376.406339] Call Trace: [44376.406651] [44376.406939] ? die_addr+0x33/0x90 [44376.407311] ? exc_general_protection+0x192/0x390 [44376.407795] ? asm_exc_general_protection+0x22/0x30 [44376.408292] ? mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.408876] __mlx5e_tc_del_fdb_peer_flow+0xbc/0xe0 [mlx5_core] [44376.409482] mlx5e_tc_del_flow+0x42/0x210 [mlx5_core] [44376.410055] mlx5e_flow_put+0x25/0x50 [mlx5_core] [44376.410529] mlx5e_delete_flower+0x24b/0x350 [mlx5_core] [44376.411043] tc_setup_cb_reoffload+0x22/0x80 [44376.411462] fl_reoffload+0x261/0x2f0 [cls_flower] [44376.411907] ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core] [44376.412481] ? mlx5e_rep_indr_setup_ft_cb+0x160/0x160 [mlx5_core] [44376.413044] tcf_block_playback_offloads+0x76/0x170 [44376.413497] tcf_block_unbind+0x7b/0xd0 [44376.413881] tcf_block_setup+0x17d/0x1c0 [44376.414269] tcf_block_offload_cmd.isra.0+0xf1/0x130 [44376.414725] tcf_block_offload_unbind+0x43/0x70 [44376.415153] __tcf_block_put+0x82/0x150 [44376.415532] ingress_destroy+0x22/0x30 [sch_ingress] [44376.415986] qdisc_destroy+0x3b/0xd0 [44376.416343] qdisc_graft+0x4d0/0x620 [44376.416706] tc_get_qdisc+0x1c9/0x3b0 [44376.417074] rtnetlink_rcv_msg+0x29c/0x390 [44376.419978] ? rep_movs_alternative+0x3a/0xa0 [44376.420399] ? rtnl_calcit.isra.0+0x120/0x120 [44376.420813] netlink_rcv_skb+0x54/0x100 [44376.421192] netlink_unicast+0x1f6/0x2c0 [44376.421573] netlink_sendmsg+0x232/0x4a0 [44376.421980] sock_sendmsg+0x38/0x60 [44376.422328] ____sys_sendmsg+0x1d0/0x1e0 [44376.422709] ? copy_msghdr_from_user+0x6d/0xa0 [44376.423127] ___sys_sendmsg+0x80/0xc0 [44376.423495] ? ___sys_recvmsg+0x8b/0xc0 [44376.423869] __sys_sendmsg+0x51/0x90 [44376.424226] do_syscall_64+0x3d/0x90 [44376.424587] entry_SYSCALL_64_after_hwframe+0x46/0xb0 [44376.425046] RIP: 0033:0x7f045134f887 [44376.425403] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 [44376.426914] RSP: 002b:00007ffd63a82b98 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [44376.427592] RAX: ffffffffffffffda RBX: 000000006481955f RCX: 00007f045134f887 [44376.428195] RDX: 0000000000000000 RSI: 00007ffd63a82c00 RDI: 0000000000000003 [44376.428796] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 [44376.429404] R10: 00007f0451208708 R11: 0000000000000246 R12: 0000000000000001 [44376.430039] R13: 0000000000409980 R14: 000000000047e538 R15: 0000000000485400 [44376.430644] [44376.430907] Modules linked in: mlx5_ib mlx5_core act_mirred act_tunnel_key cls_flower vxlan dummy sch_ingress openvswitch nsh rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_g ss_krb5 auth_rpcgss oid_registry overlay zram zsmalloc fuse [last unloaded: mlx5_core] [44376.433936] ---[ end trace 0000000000000000 ]--- [44376.434373] RIP: 0010:mlx5e_tc_del_fdb_flow+0xb3/0x340 [mlx5_core] [44376.434951] Code: 00 48 8b b8 68 ce 02 00 e8 8a 4d 02 00 4c 8d a8 a8 01 00 00 4c 89 ef e8 8b 79 88 e1 48 8b 83 98 06 00 00 48 8b 93 90 06 00 00 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 90 06 [44376.436452] RSP: 0018:ffff88812cc97570 EFLAGS: 00010246 [44376.436924] RAX: dead000000000122 RBX: ffff8881088e3800 RCX: ffff8881881bac00 [44376.437530] RDX: dead000000000100 RSI: ffff88812cc97500 RDI: ffff8881242f71b0 [44376.438179] RBP: ffff88811cbb0940 R08: 0000000000000400 R09: 0000000000000001 [44376.438786] R10: 0000000000000001 R11: 0000000000000000 R12: ffff88812c944000 [44376.439393] R13: ffff8881242f71a8 R14: ffff8881222b4000 R15: 0000000000000000 [44376.439998] FS: 00007f0451104800(0000) GS:ffff88852cb80000(0000) knlGS:0000000000000000 [44376.440714] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [44376.441225] CR2: 0000000000489108 CR3: 0000000123a79003 CR4: 0000000000370ea0 [44376.441843] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [44376.442471] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: ad86755b18d5 ("net/mlx5e: Protect unready flows with dedicated lock") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7883b625634f..7ab489520a87 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1551,7 +1551,8 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow) uplink_priv = &rpriv->uplink_priv; mutex_lock(&uplink_priv->unready_flows_lock); - unready_flow_del(flow); + if (flow_flag_test(flow, NOT_READY)) + unready_flow_del(flow); mutex_unlock(&uplink_priv->unready_flows_lock); } @@ -1896,8 +1897,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, esw_attr = attr->esw_attr; mlx5e_put_flow_tunnel_id(flow); - if (flow_flag_test(flow, NOT_READY)) - remove_unready_flow(flow); + remove_unready_flow(flow); if (mlx5e_is_offloaded_flow(flow)) { if (flow_flag_test(flow, SLOW)) From 246fc961c875db105155e0ccd08d9751ef393d65 Mon Sep 17 00:00:00 2001 From: Prasad Koya Date: Mon, 5 Jun 2023 11:09:01 -0700 Subject: [PATCH 019/224] igc: set TP bit in 'supported' and 'advertising' fields of ethtool_link_ksettings [ Upstream commit 9ac3fc2f42e5ffa1e927dcbffb71b15fa81459e2 ] set TP bit in the 'supported' and 'advertising' fields. i225/226 parts only support twisted pair copper. Fixes: 8c5ad0dae93c ("igc: Add ethtool support") Signed-off-by: Prasad Koya Acked-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index 8cc077b712ad..511fc3f41208 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1707,6 +1707,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, /* twisted pair */ cmd->base.port = PORT_TP; cmd->base.phy_address = hw->phy.addr; + ethtool_link_ksettings_add_link_mode(cmd, supported, TP); + ethtool_link_ksettings_add_link_mode(cmd, advertising, TP); /* advertising link modes */ if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF) From d752be635bc4082da49c5c12b42260c8192779cb Mon Sep 17 00:00:00 2001 From: Aravindhan Gunasekaran Date: Thu, 15 Jun 2023 12:00:43 +0530 Subject: [PATCH 020/224] igc: Handle PPS start time programming for past time values [ Upstream commit 84a192e46106355de1a314d709e657231d4b1026 ] I225/6 hardware can be programmed to start PPS output once the time in Target Time registers is reached. The time programmed in these registers should always be into future. Only then PPS output is triggered when SYSTIM register reaches the programmed value. There are two modes in i225/6 hardware to program PPS, pulse and clock mode. There were issues reported where PPS is not generated when start time is in past. Example 1, "echo 0 0 0 2 0 > /sys/class/ptp/ptp0/period" In the current implementation, a value of '0' is programmed into Target time registers and PPS output is in pulse mode. Eventually an interrupt which is triggered upon SYSTIM register reaching Target time is not fired. Thus no PPS output is generated. Example 2, "echo 0 0 0 1 0 > /sys/class/ptp/ptp0/period" Above case, a value of '0' is programmed into Target time registers and PPS output is in clock mode. Here, HW tries to catch-up the current time by incrementing Target Time register. This catch-up time seem to vary according to programmed PPS period time as per the HW design. In my experiments, the delay ranged between few tens of seconds to few minutes. The PPS output is only generated after the Target time register reaches current time. In my experiments, I also observed PPS stopped working with below test and could not recover until module is removed and loaded again. 1) echo 0 0 1 0 > /sys/class/ptp/ptp1/period 2) echo 0 0 0 1 0 > /sys/class/ptp/ptp1/period 3) echo 0 0 0 1 0 > /sys/class/ptp/ptp1/period After this PPS did not work even if i re-program with proper values. I could only get this back working by reloading the driver. This patch takes care of calculating and programming appropriate future time value into Target Time registers. Fixes: 5e91c72e560c ("igc: Fix PPS delta between two synchronized end-points") Signed-off-by: Aravindhan Gunasekaran Reviewed-by: Muhammad Husaini Zulkifli Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_ptp.c | 25 +++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 4e10ced736db..d96cdccdc1e1 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -356,16 +356,35 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, tsim &= ~IGC_TSICR_TT0; } if (on) { + struct timespec64 safe_start; int i = rq->perout.index; igc_pin_perout(igc, i, pin, use_freq); - igc->perout[i].start.tv_sec = rq->perout.start.sec; + igc_ptp_read(igc, &safe_start); + + /* PPS output start time is triggered by Target time(TT) + * register. Programming any past time value into TT + * register will cause PPS to never start. Need to make + * sure we program the TT register a time ahead in + * future. There isn't a stringent need to fire PPS out + * right away. Adding +2 seconds should take care of + * corner cases. Let's say if the SYSTIML is close to + * wrap up and the timer keeps ticking as we program the + * register, adding +2seconds is safe bet. + */ + safe_start.tv_sec += 2; + + if (rq->perout.start.sec < safe_start.tv_sec) + igc->perout[i].start.tv_sec = safe_start.tv_sec; + else + igc->perout[i].start.tv_sec = rq->perout.start.sec; igc->perout[i].start.tv_nsec = rq->perout.start.nsec; igc->perout[i].period.tv_sec = ts.tv_sec; igc->perout[i].period.tv_nsec = ts.tv_nsec; - wr32(trgttimh, rq->perout.start.sec); + wr32(trgttimh, (u32)igc->perout[i].start.tv_sec); /* For now, always select timer 0 as source. */ - wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0); + wr32(trgttiml, (u32)(igc->perout[i].start.tv_nsec | + IGC_TT_IO_TIMER_SEL_SYSTIM0)); if (use_freq) wr32(freqout, ns); tsauxc |= tsauxc_mask; From 49f6ac6f1cde88bab4e530317510d1cf8870e404 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 9 Jun 2023 23:11:39 -0700 Subject: [PATCH 021/224] blk-crypto: use dynamic lock class for blk_crypto_profile::lock [ Upstream commit 2fb48d88e77f29bf9d278f25bcfe82cf59a0e09b ] When a device-mapper device is passing through the inline encryption support of an underlying device, calls to blk_crypto_evict_key() take the blk_crypto_profile::lock of the device-mapper device, then take the blk_crypto_profile::lock of the underlying device (nested). This isn't a real deadlock, but it causes a lockdep report because there is only one lock class for all instances of this lock. Lockdep subclasses don't really work here because the hierarchy of block devices is dynamic and could have more than 2 levels. Instead, register a dynamic lock class for each blk_crypto_profile, and associate that with the lock. This avoids false-positive lockdep reports like the following: ============================================ WARNING: possible recursive locking detected 6.4.0-rc5 #2 Not tainted -------------------------------------------- fscryptctl/1421 is trying to acquire lock: ffffff80829ca418 (&profile->lock){++++}-{3:3}, at: __blk_crypto_evict_key+0x44/0x1c0 but task is already holding lock: ffffff8086b68ca8 (&profile->lock){++++}-{3:3}, at: __blk_crypto_evict_key+0xc8/0x1c0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&profile->lock); lock(&profile->lock); *** DEADLOCK *** May be due to missing lock nesting notation Fixes: 1b2628397058 ("block: Keyslot Manager for Inline Encryption") Reported-by: Bart Van Assche Signed-off-by: Eric Biggers Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20230610061139.212085-1-ebiggers@kernel.org Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-crypto-profile.c | 12 ++++++++++-- include/linux/blk-crypto-profile.h | 1 + 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c index 3290c03c9918..aa7fc1436893 100644 --- a/block/blk-crypto-profile.c +++ b/block/blk-crypto-profile.c @@ -79,7 +79,14 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile, unsigned int slot_hashtable_size; memset(profile, 0, sizeof(*profile)); - init_rwsem(&profile->lock); + + /* + * profile->lock of an underlying device can nest inside profile->lock + * of a device-mapper device, so use a dynamic lock class to avoid + * false-positive lockdep reports. + */ + lockdep_register_key(&profile->lockdep_key); + __init_rwsem(&profile->lock, "&profile->lock", &profile->lockdep_key); if (num_slots == 0) return 0; @@ -89,7 +96,7 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile, profile->slots = kvcalloc(num_slots, sizeof(profile->slots[0]), GFP_KERNEL); if (!profile->slots) - return -ENOMEM; + goto err_destroy; profile->num_slots = num_slots; @@ -441,6 +448,7 @@ void blk_crypto_profile_destroy(struct blk_crypto_profile *profile) { if (!profile) return; + lockdep_unregister_key(&profile->lockdep_key); kvfree(profile->slot_hashtable); kvfree_sensitive(profile->slots, sizeof(profile->slots[0]) * profile->num_slots); diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h index e6802b69cdd6..90ab33cb5d0e 100644 --- a/include/linux/blk-crypto-profile.h +++ b/include/linux/blk-crypto-profile.h @@ -111,6 +111,7 @@ struct blk_crypto_profile { * keyslots while ensuring that they can't be changed concurrently. */ struct rw_semaphore lock; + struct lock_class_key lockdep_key; /* List of idle slots, with least recently used slot at front */ wait_queue_head_t idle_slots_wait_queue; From 574d5236a82b45f5981bb40945f70424b0939018 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 26 Jun 2023 13:58:47 +0300 Subject: [PATCH 022/224] scsi: qla2xxx: Fix error code in qla2x00_start_sp() [ Upstream commit e579b007eff3ff8d29d59d16214cd85fb9e573f7 ] This should be negative -EAGAIN instead of positive. The callers treat non-zero error codes the same so it doesn't really impact runtime beyond some trivial differences to debug output. Fixes: 80676d054e5a ("scsi: qla2xxx: Fix session cleanup hang") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/49866d28-4cfe-47b0-842b-78f110e61aab@moroto.mountain Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/qla2xxx/qla_iocb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 4f48f098ea5a..605e94f97318 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -3898,7 +3898,7 @@ qla2x00_start_sp(srb_t *sp) pkt = __qla2x00_alloc_iocbs(sp->qpair, sp); if (!pkt) { - rval = EAGAIN; + rval = -EAGAIN; ql_log(ql_log_warn, vha, 0x700c, "qla2x00_alloc_iocbs failed.\n"); goto done; From 714d81a5c49f60c1e422c7ddf89888cca2e9d950 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 30 Jun 2023 22:23:48 -0700 Subject: [PATCH 023/224] scsi: ufs: ufs-mediatek: Add dependency for RESET_CONTROLLER [ Upstream commit 89f7ef7f2b23b2a7b8ce346c23161916eae5b15c ] When RESET_CONTROLLER is not set, kconfig complains about missing dependencies for RESET_TI_SYSCON, so add the missing dependency just as is done above for SCSI_UFS_QCOM. Silences this kconfig warning: WARNING: unmet direct dependencies detected for RESET_TI_SYSCON Depends on [n]: RESET_CONTROLLER [=n] && HAS_IOMEM [=y] Selected by [m]: - SCSI_UFS_MEDIATEK [=m] && SCSI_UFSHCD [=y] && SCSI_UFSHCD_PLATFORM [=y] && ARCH_MEDIATEK [=y] Fixes: de48898d0cb6 ("scsi: ufs-mediatek: Create reset control device_link") Signed-off-by: Randy Dunlap Link: lore.kernel.org/r/202306020859.1wHg9AaT-lkp@intel.com Link: https://lore.kernel.org/r/20230701052348.28046-1-rdunlap@infradead.org Cc: Stanley Chu Cc: Peter Wang Cc: Paul Gazzillo Cc: Necip Fazil Yildiran Cc: linux-scsi@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mediatek@lists.infradead.org Cc: "James E.J. Bottomley" Cc: "Martin K. Petersen" Reported-by: kernel test robot Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/host/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ufs/host/Kconfig b/drivers/ufs/host/Kconfig index 4cc2dbd79ed0..9b39fd76031b 100644 --- a/drivers/ufs/host/Kconfig +++ b/drivers/ufs/host/Kconfig @@ -71,6 +71,7 @@ config SCSI_UFS_QCOM config SCSI_UFS_MEDIATEK tristate "Mediatek specific hooks to UFS controller platform driver" depends on SCSI_UFSHCD_PLATFORM && ARCH_MEDIATEK + depends on RESET_CONTROLLER select PHY_MTK_UFS select RESET_TI_SYSCON help From b2e74dedb057bc1278dc936a19f42d0e9aab2c01 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Wed, 5 Jul 2023 20:17:29 +0530 Subject: [PATCH 024/224] bpf: Fix max stack depth check for async callbacks [ Upstream commit 5415ccd50a8620c8cbaa32d6f18c946c453566f5 ] The check_max_stack_depth pass happens after the verifier's symbolic execution, and attempts to walk the call graph of the BPF program, ensuring that the stack usage stays within bounds for all possible call chains. There are two cases to consider: bpf_pseudo_func and bpf_pseudo_call. In the former case, the callback pointer is loaded into a register, and is assumed that it is passed to some helper later which calls it (however there is no way to be sure), but the check remains conservative and accounts the stack usage anyway. For this particular case, asynchronous callbacks are skipped as they execute asynchronously when their corresponding event fires. The case of bpf_pseudo_call is simpler and we know that the call is definitely made, hence the stack depth of the subprog is accounted for. However, the current check still skips an asynchronous callback even if a bpf_pseudo_call was made for it. This is erroneous, as it will miss accounting for the stack usage of the asynchronous callback, which can be used to breach the maximum stack depth limit. Fix this by only skipping asynchronous callbacks when the instruction is not a pseudo call to the subprog. Fixes: 7ddc80a476c2 ("bpf: Teach stack depth check about async callbacks.") Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20230705144730.235802-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 49c6b5e0855c..8c3ededef317 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4357,8 +4357,9 @@ continue_func: verbose(env, "verifier bug. subprog has tail_call and async cb\n"); return -EFAULT; } - /* async callbacks don't increase bpf prog stack size */ - continue; + /* async callbacks don't increase bpf prog stack size unless called directly */ + if (!bpf_pseudo_call(insn + i)) + continue; } i = next_insn; From 831fbc206529630ccef68507da7d8b015b21e14d Mon Sep 17 00:00:00 2001 From: Klaus Kudielka Date: Wed, 5 Jul 2023 07:37:12 +0200 Subject: [PATCH 025/224] net: mvneta: fix txq_map in case of txq_number==1 [ Upstream commit 21327f81db6337c8843ce755b01523c7d3df715b ] If we boot with mvneta.txq_number=1, the txq_map is set incorrectly: MVNETA_CPU_TXQ_ACCESS(1) refers to TX queue 1, but only TX queue 0 is initialized. Fix this. Fixes: 50bf8cb6fc9c ("net: mvneta: Configure XPS support") Signed-off-by: Klaus Kudielka Reviewed-by: Michal Kubiak Link: https://lore.kernel.org/r/20230705053712.3914-1-klaus.kudielka@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvneta.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 5aefaaff0871..aca5b72cfeec 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1505,7 +1505,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) */ if (txq_number == 1) txq_map = (cpu == pp->rxq_def) ? - MVNETA_CPU_TXQ_ACCESS(1) : 0; + MVNETA_CPU_TXQ_ACCESS(0) : 0; } else { txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK; @@ -4294,7 +4294,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) */ if (txq_number == 1) txq_map = (cpu == elected_cpu) ? - MVNETA_CPU_TXQ_ACCESS(1) : 0; + MVNETA_CPU_TXQ_ACCESS(0) : 0; else txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) & MVNETA_CPU_TXQ_ACCESS_ALL_MASK; From c91fb29bb07ee4dd40aabd1e41f19c0f92ac3199 Mon Sep 17 00:00:00 2001 From: M A Ramdhan Date: Wed, 5 Jul 2023 12:15:30 -0400 Subject: [PATCH 026/224] net/sched: cls_fw: Fix improper refcount update leads to use-after-free [ Upstream commit 0323bce598eea038714f941ce2b22541c46d488f ] In the event of a failure in tcf_change_indev(), fw_set_parms() will immediately return an error after incrementing or decrementing reference counter in tcf_bind_filter(). If attacker can control reference counter to zero and make reference freed, leading to use after free. In order to prevent this, move the point of possible failure above the point where the TC_FW_CLASSID is handled. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: M A Ramdhan Signed-off-by: M A Ramdhan Acked-by: Jamal Hadi Salim Reviewed-by: Pedro Tammela Message-ID: <20230705161530.52003-1-ramdhan@starlabs.sg> Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/cls_fw.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index a32351da968c..1212b057b129 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -210,11 +210,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, if (err < 0) return err; - if (tb[TCA_FW_CLASSID]) { - f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); - tcf_bind_filter(tp, &f->res, base); - } - if (tb[TCA_FW_INDEV]) { int ret; ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack); @@ -231,6 +226,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, } else if (head->mask != 0xFFFFFFFF) return err; + if (tb[TCA_FW_CLASSID]) { + f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); + tcf_bind_filter(tp, &f->res, base); + } + return 0; } From 2b4086a66abd557bc2ec4d68f4d800d1c40dbc2d Mon Sep 17 00:00:00 2001 From: Junfeng Guo Date: Thu, 6 Jul 2023 12:41:28 +0800 Subject: [PATCH 027/224] gve: Set default duplex configuration to full [ Upstream commit 0503efeadbf6bb8bf24397613a73b67e665eac5f ] Current duplex mode was unset in the driver, resulting in the default parameter being set to 0, which corresponds to half duplex. It might mislead users to have incorrect expectation about the driver's transmission capabilities. Set the default duplex configuration to full, as the driver runs in full duplex mode at this point. Fixes: 7e074d5a76ca ("gve: Enable Link Speed Reporting in the driver.") Signed-off-by: Junfeng Guo Reviewed-by: Leon Romanovsky Message-ID: <20230706044128.2726747-1-junfeng.guo@intel.com> Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/google/gve/gve_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 38df602f2869..033f17cb96be 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -541,6 +541,9 @@ static int gve_get_link_ksettings(struct net_device *netdev, err = gve_adminq_report_link_speed(priv); cmd->base.speed = priv->link_speed; + + cmd->base.duplex = DUPLEX_FULL; + return err; } From bb56b7905b7d3647450bd024f8daf2b917873af8 Mon Sep 17 00:00:00 2001 From: Ratheesh Kannoth Date: Thu, 6 Jul 2023 09:57:05 +0530 Subject: [PATCH 028/224] octeontx2-af: Promisc enable/disable through mbox [ Upstream commit af42088bdaf292060b8d8a00d8644ca7b2b3f2d1 ] In legacy silicon, promiscuous mode is only modified through CGX mbox messages. In CN10KB silicon, it is modified from CGX mbox and NIX. This breaks legacy application behaviour. Fix this by removing call from NIX. Fixes: d6c9784baf59 ("octeontx2-af: Invoke exact match functions if supported") Signed-off-by: Ratheesh Kannoth Reviewed-by: Leon Romanovsky Reviewed-by: Michal Kubiak Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 11 ++------- .../marvell/octeontx2/af/rvu_npc_hash.c | 23 +++++++++++++++++-- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 8cb2a0181fb9..705325431dec 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3804,21 +3804,14 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req, } /* install/uninstall promisc entry */ - if (promisc) { + if (promisc) rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, pfvf->rx_chan_base, pfvf->rx_chan_cnt); - - if (rvu_npc_exact_has_match_table(rvu)) - rvu_npc_exact_promisc_enable(rvu, pcifunc); - } else { + else if (!nix_rx_multicast) rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf, false); - if (rvu_npc_exact_has_match_table(rvu)) - rvu_npc_exact_promisc_disable(rvu, pcifunc); - } - return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 3b48b635977f..3b0a66c0977a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -1168,8 +1168,10 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i { struct npc_exact_table *table; u16 *cnt, old_cnt; + bool promisc; table = rvu->hw->table; + promisc = table->promisc_mode[drop_mcam_idx]; cnt = &table->cnt_cmd_rules[drop_mcam_idx]; old_cnt = *cnt; @@ -1181,13 +1183,18 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i *enable_or_disable_cam = false; - /* If all rules are deleted, disable cam */ + if (promisc) + goto done; + + /* If all rules are deleted and not already in promisc mode; + * disable cam + */ if (!*cnt && val < 0) { *enable_or_disable_cam = true; goto done; } - /* If rule got added, enable cam */ + /* If rule got added and not already in promisc mode; enable cam */ if (!old_cnt && val > 0) { *enable_or_disable_cam = true; goto done; @@ -1466,6 +1473,12 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc) *promisc = false; mutex_unlock(&table->lock); + /* Enable drop rule */ + rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, + true); + + dev_dbg(rvu->dev, "%s: disabled promisc mode (cgx=%d lmac=%d)\n", + __func__, cgx_id, lmac_id); return 0; } @@ -1507,6 +1520,12 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc) *promisc = true; mutex_unlock(&table->lock); + /* disable drop rule */ + rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX, + false); + + dev_dbg(rvu->dev, "%s: Enabled promisc mode (cgx=%d lmac=%d)\n", + __func__, cgx_id, lmac_id); return 0; } From 6cc293d29c8fc18cec2e6714be1dc6a56296e5e0 Mon Sep 17 00:00:00 2001 From: Sai Krishna Date: Thu, 6 Jul 2023 13:59:36 +0530 Subject: [PATCH 029/224] octeontx2-af: Move validation of ptp pointer before its usage [ Upstream commit 7709fbd4922c197efabda03660d93e48a3e80323 ] Moved PTP pointer validation before its use to avoid smatch warning. Also used kzalloc/kfree instead of devm_kzalloc/devm_kfree. Fixes: 2ef4e45d99b1 ("octeontx2-af: Add PTP PPS Errata workaround on CN10K silicon") Signed-off-by: Naveen Mamindlapalli Signed-off-by: Sunil Goutham Signed-off-by: Sai Krishna Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- .../net/ethernet/marvell/octeontx2/af/ptp.c | 19 +++++++++---------- .../net/ethernet/marvell/octeontx2/af/rvu.c | 2 +- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c index 3411e2e47d46..0ee420a489fc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/ptp.c @@ -208,7 +208,7 @@ struct ptp *ptp_get(void) /* Check driver is bound to PTP block */ if (!ptp) ptp = ERR_PTR(-EPROBE_DEFER); - else + else if (!IS_ERR(ptp)) pci_dev_get(ptp->pdev); return ptp; @@ -388,11 +388,10 @@ static int ptp_extts_on(struct ptp *ptp, int on) static int ptp_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { - struct device *dev = &pdev->dev; struct ptp *ptp; int err; - ptp = devm_kzalloc(dev, sizeof(*ptp), GFP_KERNEL); + ptp = kzalloc(sizeof(*ptp), GFP_KERNEL); if (!ptp) { err = -ENOMEM; goto error; @@ -428,20 +427,19 @@ static int ptp_probe(struct pci_dev *pdev, return 0; error_free: - devm_kfree(dev, ptp); + kfree(ptp); error: /* For `ptp_get()` we need to differentiate between the case * when the core has not tried to probe this device and the case when - * the probe failed. In the later case we pretend that the - * initialization was successful and keep the error in + * the probe failed. In the later case we keep the error in * `dev->driver_data`. */ pci_set_drvdata(pdev, ERR_PTR(err)); if (!first_ptp_block) first_ptp_block = ERR_PTR(err); - return 0; + return err; } static void ptp_remove(struct pci_dev *pdev) @@ -449,16 +447,17 @@ static void ptp_remove(struct pci_dev *pdev) struct ptp *ptp = pci_get_drvdata(pdev); u64 clock_cfg; - if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer)) - hrtimer_cancel(&ptp->hrtimer); - if (IS_ERR_OR_NULL(ptp)) return; + if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer)) + hrtimer_cancel(&ptp->hrtimer); + /* Disable PTP clock */ clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG); clock_cfg &= ~PTP_CLOCK_CFG_PTP_EN; writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG); + kfree(ptp); } static const struct pci_device_id ptp_id_table[] = { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 873f081c030d..733add3a9dc6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -3244,7 +3244,7 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id) rvu->ptp = ptp_get(); if (IS_ERR(rvu->ptp)) { err = PTR_ERR(rvu->ptp); - if (err == -EPROBE_DEFER) + if (err) goto err_release_regions; rvu->ptp = NULL; } From dc470466753ad0dd3a8c48aaefa05a992c119b9c Mon Sep 17 00:00:00 2001 From: Nitya Sunkad Date: Thu, 6 Jul 2023 11:20:06 -0700 Subject: [PATCH 030/224] ionic: remove WARN_ON to prevent panic_on_warn [ Upstream commit abfb2a58a5377ebab717d4362d6180f901b6e5c1 ] Remove unnecessary early code development check and the WARN_ON that it uses. The irq alloc and free paths have long been cleaned up and this check shouldn't have stuck around so long. Fixes: 77ceb68e29cc ("ionic: Add notifyq support") Signed-off-by: Nitya Sunkad Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 159bfcc76498..a89ab455af67 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -462,11 +462,6 @@ static void ionic_qcqs_free(struct ionic_lif *lif) static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq, struct ionic_qcq *n_qcq) { - if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) { - ionic_intr_free(n_qcq->cq.lif->ionic, n_qcq->intr.index); - n_qcq->flags &= ~IONIC_QCQ_F_INTR; - } - n_qcq->intr.vector = src_qcq->intr.vector; n_qcq->intr.index = src_qcq->intr.index; n_qcq->napi_qcq = src_qcq->napi_qcq; From 685b57a1221c38ec8b456f968264d2496715820c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 7 Jul 2023 08:53:25 +0200 Subject: [PATCH 031/224] net: bgmac: postpone turning IRQs off to avoid SoC hangs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e7731194fdf085f46d58b1adccfddbd0dfee4873 ] Turning IRQs off is done by accessing Ethernet controller registers. That can't be done until device's clock is enabled. It results in a SoC hang otherwise. This bug remained unnoticed for years as most bootloaders keep all Ethernet interfaces turned on. It seems to only affect a niche SoC family BCM47189. It has two Ethernet controllers but CFE bootloader uses only the first one. Fixes: 34322615cbaa ("net: bgmac: Mask interrupts during probe") Signed-off-by: Rafał Miłecki Reviewed-by: Michal Kubiak Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bgmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 1761df8fb7f9..10c7c232cc4e 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1492,8 +1492,6 @@ int bgmac_enet_probe(struct bgmac *bgmac) bgmac->in_init = true; - bgmac_chip_intrs_off(bgmac); - net_dev->irq = bgmac->irq; SET_NETDEV_DEV(net_dev, bgmac->dev); dev_set_drvdata(bgmac->dev, bgmac); @@ -1511,6 +1509,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) */ bgmac_clk_enable(bgmac, 0); + bgmac_chip_intrs_off(bgmac); + /* This seems to be fixing IRQ by assigning OOB #6 to the core */ if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) { if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6) From 1731234e8b60063eae858c77b55c7a88f5084353 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 7 Jul 2023 10:11:10 +0200 Subject: [PATCH 032/224] net: prevent skb corruption on frag list segmentation [ Upstream commit c329b261afe71197d9da83c1f18eb45a7e97e089 ] Ian reported several skb corruptions triggered by rx-gro-list, collecting different oops alike: [ 62.624003] BUG: kernel NULL pointer dereference, address: 00000000000000c0 [ 62.631083] #PF: supervisor read access in kernel mode [ 62.636312] #PF: error_code(0x0000) - not-present page [ 62.641541] PGD 0 P4D 0 [ 62.644174] Oops: 0000 [#1] PREEMPT SMP NOPTI [ 62.648629] CPU: 1 PID: 913 Comm: napi/eno2-79 Not tainted 6.4.0 #364 [ 62.655162] Hardware name: Supermicro Super Server/A2SDi-12C-HLN4F, BIOS 1.7a 10/13/2022 [ 62.663344] RIP: 0010:__udp_gso_segment (./include/linux/skbuff.h:2858 ./include/linux/udp.h:23 net/ipv4/udp_offload.c:228 net/ipv4/udp_offload.c:261 net/ipv4/udp_offload.c:277) [ 62.687193] RSP: 0018:ffffbd3a83b4f868 EFLAGS: 00010246 [ 62.692515] RAX: 00000000000000ce RBX: 0000000000000000 RCX: 0000000000000000 [ 62.699743] RDX: ffffa124def8a000 RSI: 0000000000000079 RDI: ffffa125952a14d4 [ 62.706970] RBP: ffffa124def8a000 R08: 0000000000000022 R09: 00002000001558c9 [ 62.714199] R10: 0000000000000000 R11: 00000000be554639 R12: 00000000000000e2 [ 62.721426] R13: ffffa125952a1400 R14: ffffa125952a1400 R15: 00002000001558c9 [ 62.728654] FS: 0000000000000000(0000) GS:ffffa127efa40000(0000) knlGS:0000000000000000 [ 62.736852] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 62.742702] CR2: 00000000000000c0 CR3: 00000001034b0000 CR4: 00000000003526e0 [ 62.749948] Call Trace: [ 62.752498] [ 62.779267] inet_gso_segment (net/ipv4/af_inet.c:1398) [ 62.787605] skb_mac_gso_segment (net/core/gro.c:141) [ 62.791906] __skb_gso_segment (net/core/dev.c:3403 (discriminator 2)) [ 62.800492] validate_xmit_skb (./include/linux/netdevice.h:4862 net/core/dev.c:3659) [ 62.804695] validate_xmit_skb_list (net/core/dev.c:3710) [ 62.809158] sch_direct_xmit (net/sched/sch_generic.c:330) [ 62.813198] __dev_queue_xmit (net/core/dev.c:3805 net/core/dev.c:4210) net/netfilter/core.c:626) [ 62.821093] br_dev_queue_push_xmit (net/bridge/br_forward.c:55) [ 62.825652] maybe_deliver (net/bridge/br_forward.c:193) [ 62.829420] br_flood (net/bridge/br_forward.c:233) [ 62.832758] br_handle_frame_finish (net/bridge/br_input.c:215) [ 62.837403] br_handle_frame (net/bridge/br_input.c:298 net/bridge/br_input.c:416) [ 62.851417] __netif_receive_skb_core.constprop.0 (net/core/dev.c:5387) [ 62.866114] __netif_receive_skb_list_core (net/core/dev.c:5570) [ 62.871367] netif_receive_skb_list_internal (net/core/dev.c:5638 net/core/dev.c:5727) [ 62.876795] napi_complete_done (./include/linux/list.h:37 ./include/net/gro.h:434 ./include/net/gro.h:429 net/core/dev.c:6067) [ 62.881004] ixgbe_poll (drivers/net/ethernet/intel/ixgbe/ixgbe_main.c:3191) [ 62.893534] __napi_poll (net/core/dev.c:6498) [ 62.897133] napi_threaded_poll (./include/linux/netpoll.h:89 net/core/dev.c:6640) [ 62.905276] kthread (kernel/kthread.c:379) [ 62.913435] ret_from_fork (arch/x86/entry/entry_64.S:314) [ 62.917119] In the critical scenario, rx-gro-list GRO-ed packets are fed, via a bridge, both to the local input path and to an egress device (tun). The segmentation of such packets unsafely writes to the cloned skbs with shared heads. This change addresses the issue by uncloning as needed the to-be-segmented skbs. Reported-by: Ian Kumlien Tested-by: Ian Kumlien Fixes: 3a1296a38d0c ("net: Support GRO/GSO fraglist chaining.") Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/skbuff.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ef9772b12624..b6c16db86c71 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4042,6 +4042,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_push(skb, -skb_network_offset(skb) + offset); + /* Ensure the head is writeable before touching the shared info */ + err = skb_unclone(skb, GFP_ATOMIC); + if (err) + goto err_linearize; + skb_shinfo(skb)->frag_list = NULL; while (list_skb) { From 1462e9d9aa52d14665eaca6d89d22c4af44ede04 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 7 Jul 2023 18:43:27 -0700 Subject: [PATCH 033/224] icmp6: Fix null-ptr-deref of ip6_null_entry->rt6i_idev in icmp6_dev(). [ Upstream commit 2aaa8a15de73874847d62eb595c6683bface80fd ] With some IPv6 Ext Hdr (RPL, SRv6, etc.), we can send a packet that has the link-local address as src and dst IP and will be forwarded to an external IP in the IPv6 Ext Hdr. For example, the script below generates a packet whose src IP is the link-local address and dst is updated to 11::. # for f in $(find /proc/sys/net/ -name *seg6_enabled*); do echo 1 > $f; done # python3 >>> from socket import * >>> from scapy.all import * >>> >>> SRC_ADDR = DST_ADDR = "fe80::5054:ff:fe12:3456" >>> >>> pkt = IPv6(src=SRC_ADDR, dst=DST_ADDR) >>> pkt /= IPv6ExtHdrSegmentRouting(type=4, addresses=["11::", "22::"], segleft=1) >>> >>> sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW) >>> sk.sendto(bytes(pkt), (DST_ADDR, 0)) For such a packet, we call ip6_route_input() to look up a route for the next destination in these three functions depending on the header type. * ipv6_rthdr_rcv() * ipv6_rpl_srh_rcv() * ipv6_srh_rcv() If no route is found, ip6_null_entry is set to skb, and the following dst_input(skb) calls ip6_pkt_drop(). Finally, in icmp6_dev(), we dereference skb_rt6_info(skb)->rt6i_idev->dev as the input device is the loopback interface. Then, we have to check if skb_rt6_info(skb)->rt6i_idev is NULL or not to avoid NULL pointer deref for ip6_null_entry. BUG: kernel NULL pointer dereference, address: 0000000000000000 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 0 PID: 157 Comm: python3 Not tainted 6.4.0-11996-gb121d614371c #35 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 RIP: 0010:icmp6_send (net/ipv6/icmp.c:436 net/ipv6/icmp.c:503) Code: fe ff ff 48 c7 40 30 c0 86 5d 83 e8 c6 44 1c 00 e9 c8 fc ff ff 49 8b 46 58 48 83 e0 fe 0f 84 4a fb ff ff 48 8b 80 d0 00 00 00 <48> 8b 00 44 8b 88 e0 00 00 00 e9 34 fb ff ff 4d 85 ed 0f 85 69 01 RSP: 0018:ffffc90000003c70 EFLAGS: 00000286 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00000000000000e0 RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff888006d72a18 RBP: ffffc90000003d80 R08: 0000000000000000 R09: 0000000000000001 R10: ffffc90000003d98 R11: 0000000000000040 R12: ffff888006d72a10 R13: 0000000000000000 R14: ffff8880057fb800 R15: ffffffff835d86c0 FS: 00007f9dc72ee740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000057b2000 CR4: 00000000007506f0 PKRU: 55555554 Call Trace: ip6_pkt_drop (net/ipv6/route.c:4513) ipv6_rthdr_rcv (net/ipv6/exthdrs.c:640 net/ipv6/exthdrs.c:686) ip6_protocol_deliver_rcu (net/ipv6/ip6_input.c:437 (discriminator 5)) ip6_input_finish (./include/linux/rcupdate.h:781 net/ipv6/ip6_input.c:483) __netif_receive_skb_one_core (net/core/dev.c:5455) process_backlog (./include/linux/rcupdate.h:781 net/core/dev.c:5895) __napi_poll (net/core/dev.c:6460) net_rx_action (net/core/dev.c:6529 net/core/dev.c:6660) __do_softirq (./arch/x86/include/asm/jump_label.h:27 ./include/linux/jump_label.h:207 ./include/trace/events/irq.h:142 kernel/softirq.c:554) do_softirq (kernel/softirq.c:454 kernel/softirq.c:441) __local_bh_enable_ip (kernel/softirq.c:381) __dev_queue_xmit (net/core/dev.c:4231) ip6_finish_output2 (./include/net/neighbour.h:544 net/ipv6/ip6_output.c:135) rawv6_sendmsg (./include/net/dst.h:458 ./include/linux/netfilter.h:303 net/ipv6/raw.c:656 net/ipv6/raw.c:914) sock_sendmsg (net/socket.c:725 net/socket.c:748) __sys_sendto (net/socket.c:2134) __x64_sys_sendto (net/socket.c:2146 net/socket.c:2142 net/socket.c:2142) do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) RIP: 0033:0x7f9dc751baea Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 RSP: 002b:00007ffe98712c38 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007ffe98712cf8 RCX: 00007f9dc751baea RDX: 0000000000000060 RSI: 00007f9dc6460b90 RDI: 0000000000000003 RBP: 00007f9dc56e8be0 R08: 00007ffe98712d70 R09: 000000000000001c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: ffffffffc4653600 R14: 0000000000000001 R15: 00007f9dc6af5d1b Modules linked in: CR2: 0000000000000000 ---[ end trace 0000000000000000 ]--- RIP: 0010:icmp6_send (net/ipv6/icmp.c:436 net/ipv6/icmp.c:503) Code: fe ff ff 48 c7 40 30 c0 86 5d 83 e8 c6 44 1c 00 e9 c8 fc ff ff 49 8b 46 58 48 83 e0 fe 0f 84 4a fb ff ff 48 8b 80 d0 00 00 00 <48> 8b 00 44 8b 88 e0 00 00 00 e9 34 fb ff ff 4d 85 ed 0f 85 69 01 RSP: 0018:ffffc90000003c70 EFLAGS: 00000286 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 00000000000000e0 RDX: 0000000000000021 RSI: 0000000000000000 RDI: ffff888006d72a18 RBP: ffffc90000003d80 R08: 0000000000000000 R09: 0000000000000001 R10: ffffc90000003d98 R11: 0000000000000040 R12: ffff888006d72a10 R13: 0000000000000000 R14: ffff8880057fb800 R15: ffffffff835d86c0 FS: 00007f9dc72ee740(0000) GS:ffff88807dc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000057b2000 CR4: 00000000007506f0 PKRU: 55555554 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: disabled Fixes: 4832c30d5458 ("net: ipv6: put host and anycast routes on device with address") Reported-by: Wang Yufen Closes: https://lore.kernel.org/netdev/c41403a9-c2f6-3b7e-0c96-e1901e605cd0@huawei.com/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/icmp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 9d92d51c4757..e2af7ab99282 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -422,7 +422,10 @@ static struct net_device *icmp6_dev(const struct sk_buff *skb) if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) { const struct rt6_info *rt6 = skb_rt6_info(skb); - if (rt6) + /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.), + * and ip6_null_entry could be set to skb if no route is found. + */ + if (rt6 && rt6->rt6i_idev) dev = rt6->rt6i_idev->dev; } From 360db93beb8f8d373080ebc97cfdc98409d1b544 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 8 Jul 2023 08:29:58 +0000 Subject: [PATCH 034/224] udp6: fix udp6_ehashfn() typo [ Upstream commit 51d03e2f2203e76ed02d33fb5ffbb5fc85ffaf54 ] Amit Klein reported that udp6_ehash_secret was initialized but never used. Fixes: 1bbdceef1e53 ("inet: convert inet_ehash_secret and ipv6_hash_secret to net_get_random_once") Reported-by: Amit Klein Signed-off-by: Eric Dumazet Cc: Willy Tarreau Cc: Willem de Bruijn Cc: David Ahern Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c029222ce46b..04f1d696503c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -90,7 +90,7 @@ static u32 udp6_ehashfn(const struct net *net, fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); return __inet6_ehashfn(lhash, lport, fhash, fport, - udp_ipv6_hash_secret + net_hash_mix(net)); + udp6_ehash_secret + net_hash_mix(net)); } int udp_v6_get_port(struct sock *sk, unsigned short snum) From 4e5daadf8cd054254acef01c501450a57dc17057 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sat, 5 Nov 2022 09:43:01 +0000 Subject: [PATCH 035/224] ntb: idt: Fix error handling in idt_pci_driver_init() [ Upstream commit c012968259b451dc4db407f2310fe131eaefd800 ] A problem about ntb_hw_idt create debugfs failed is triggered with the following log given: [ 1236.637636] IDT PCI-E Non-Transparent Bridge Driver 2.0 [ 1236.639292] debugfs: Directory 'ntb_hw_idt' with parent '/' already present! The reason is that idt_pci_driver_init() returns pci_register_driver() directly without checking its return value, if pci_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of ntb_hw_idt can never be created later. idt_pci_driver_init() debugfs_create_dir() # create debugfs directory pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when pci_register_driver() returns error. Fixes: bf2a952d31d2 ("NTB: Add IDT 89HPESxNTx PCIe-switches support") Signed-off-by: Yuan Can Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/hw/idt/ntb_hw_idt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index 0ed6f809ff2e..51799fccf840 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -2891,6 +2891,7 @@ static struct pci_driver idt_pci_driver = { static int __init idt_pci_driver_init(void) { + int ret; pr_info("%s %s\n", NTB_DESC, NTB_VER); /* Create the top DebugFS directory if the FS is initialized */ @@ -2898,7 +2899,11 @@ static int __init idt_pci_driver_init(void) dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL); /* Register the NTB hardware driver to handle the PCI device */ - return pci_register_driver(&idt_pci_driver); + ret = pci_register_driver(&idt_pci_driver); + if (ret) + debugfs_remove_recursive(dbgfs_topdir); + + return ret; } module_init(idt_pci_driver_init); From d4317d41f0007ff9a362705c8af608d30ee84f84 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sat, 5 Nov 2022 09:43:09 +0000 Subject: [PATCH 036/224] NTB: amd: Fix error handling in amd_ntb_pci_driver_init() [ Upstream commit 98af0a33c1101c29b3ce4f0cf4715fd927c717f9 ] A problem about ntb_hw_amd create debugfs failed is triggered with the following log given: [ 618.431232] AMD(R) PCI-E Non-Transparent Bridge Driver 1.0 [ 618.433284] debugfs: Directory 'ntb_hw_amd' with parent '/' already present! The reason is that amd_ntb_pci_driver_init() returns pci_register_driver() directly without checking its return value, if pci_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of ntb_hw_amd can never be created later. amd_ntb_pci_driver_init() debugfs_create_dir() # create debugfs directory pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when pci_register_driver() returns error. Fixes: a1b3695820aa ("NTB: Add support for AMD PCI-Express Non-Transparent Bridge") Signed-off-by: Yuan Can Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/hw/amd/ntb_hw_amd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c index 04550b1f984c..730f2103b91d 100644 --- a/drivers/ntb/hw/amd/ntb_hw_amd.c +++ b/drivers/ntb/hw/amd/ntb_hw_amd.c @@ -1338,12 +1338,17 @@ static struct pci_driver amd_ntb_pci_driver = { static int __init amd_ntb_pci_driver_init(void) { + int ret; pr_info("%s %s\n", NTB_DESC, NTB_VER); if (debugfs_initialized()) debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); - return pci_register_driver(&amd_ntb_pci_driver); + ret = pci_register_driver(&amd_ntb_pci_driver); + if (ret) + debugfs_remove_recursive(debugfs_dir); + + return ret; } module_init(amd_ntb_pci_driver_init); From bece67815ab474b84b414443b1a72cb9450af63a Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Sat, 5 Nov 2022 09:43:22 +0000 Subject: [PATCH 037/224] ntb: intel: Fix error handling in intel_ntb_pci_driver_init() [ Upstream commit 4c3c796aca02883ad35bb117468938cc4022ca41 ] A problem about ntb_hw_intel create debugfs failed is triggered with the following log given: [ 273.112733] Intel(R) PCI-E Non-Transparent Bridge Driver 2.0 [ 273.115342] debugfs: Directory 'ntb_hw_intel' with parent '/' already present! The reason is that intel_ntb_pci_driver_init() returns pci_register_driver() directly without checking its return value, if pci_register_driver() failed, it returns without destroy the newly created debugfs, resulting the debugfs of ntb_hw_intel can never be created later. intel_ntb_pci_driver_init() debugfs_create_dir() # create debugfs directory pci_register_driver() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without destroy debugfs directory Fix by removing debugfs when pci_register_driver() returns error. Fixes: e26a5843f7f5 ("NTB: Split ntb_hw_intel and ntb_transport drivers") Signed-off-by: Yuan Can Acked-by: Dave Jiang Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/hw/intel/ntb_hw_gen1.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen1.c b/drivers/ntb/hw/intel/ntb_hw_gen1.c index 84772013812b..60a4ebc7bf35 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen1.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen1.c @@ -2064,12 +2064,17 @@ static struct pci_driver intel_ntb_pci_driver = { static int __init intel_ntb_pci_driver_init(void) { + int ret; pr_info("%s %s\n", NTB_DESC, NTB_VER); if (debugfs_initialized()) debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); - return pci_register_driver(&intel_ntb_pci_driver); + ret = pci_register_driver(&intel_ntb_pci_driver); + if (ret) + debugfs_remove_recursive(debugfs_dir); + + return ret; } module_init(intel_ntb_pci_driver_init); From 8d7b8758665b9a28b1c1ab3254c1859e29be6c9a Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 10 Nov 2022 23:19:17 +0800 Subject: [PATCH 038/224] NTB: ntb_transport: fix possible memory leak while device_register() fails [ Upstream commit 8623ccbfc55d962e19a3537652803676ad7acb90 ] If device_register() returns error, the name allocated by dev_set_name() need be freed. As comment of device_register() says, it should use put_device() to give up the reference in the error path. So fix this by calling put_device(), then the name can be freed in kobject_cleanup(), and client_dev is freed in ntb_transport_client_release(). Fixes: fce8a7bb5b4b ("PCI-Express Non-Transparent Bridge Support") Signed-off-by: Yang Yingliang Reviewed-by: Dave Jiang Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/ntb_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index a9b97ebc71ac..2abd2235bbca 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -410,7 +410,7 @@ int ntb_transport_register_client_dev(char *device_name) rc = device_register(dev); if (rc) { - kfree(client_dev); + put_device(dev); goto err; } From 5554414227dbcc757e4f0a10c109ec44f0c07237 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Tue, 22 Nov 2022 11:32:44 +0800 Subject: [PATCH 039/224] NTB: ntb_tool: Add check for devm_kcalloc [ Upstream commit 2790143f09938776a3b4f69685b380bae8fd06c7 ] As the devm_kcalloc may return NULL pointer, it should be better to add check for the return value, as same as the others. Fixes: 7f46c8b3a552 ("NTB: ntb_tool: Add full multi-port NTB API support") Signed-off-by: Jiasheng Jiang Reviewed-by: Serge Semin Reviewed-by: Dave Jiang Signed-off-by: Jon Mason Signed-off-by: Sasha Levin --- drivers/ntb/test/ntb_tool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 5ee0afa621a9..eeeb4b1c97d2 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -998,6 +998,8 @@ static int tool_init_mws(struct tool_ctx *tc) tc->peers[pidx].outmws = devm_kcalloc(&tc->ntb->dev, tc->peers[pidx].outmw_cnt, sizeof(*tc->peers[pidx].outmws), GFP_KERNEL); + if (tc->peers[pidx].outmws == NULL) + return -ENOMEM; for (widx = 0; widx < tc->peers[pidx].outmw_cnt; widx++) { tc->peers[pidx].outmws[widx].pidx = pidx; From 436b7cc7eae7851c184b671ed7a4a64c750b86f7 Mon Sep 17 00:00:00 2001 From: Ziyang Xuan Date: Sat, 8 Jul 2023 14:59:10 +0800 Subject: [PATCH 040/224] ipv6/addrconf: fix a potential refcount underflow for idev [ Upstream commit 06a0716949c22e2aefb648526580671197151acc ] Now in addrconf_mod_rs_timer(), reference idev depends on whether rs_timer is not pending. Then modify rs_timer timeout. There is a time gap in [1], during which if the pending rs_timer becomes not pending. It will miss to hold idev, but the rs_timer is activated. Thus rs_timer callback function addrconf_rs_timer() will be executed and put idev later without holding idev. A refcount underflow issue for idev can be caused by this. if (!timer_pending(&idev->rs_timer)) in6_dev_hold(idev); <--------------[1] mod_timer(&idev->rs_timer, jiffies + when); To fix the issue, hold idev if mod_timer() return 0. Fixes: b7b1bfce0bb6 ("ipv6: split duplicate address detection and router solicitation timer") Suggested-by: Eric Dumazet Signed-off-by: Ziyang Xuan Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrconf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e6c7edcf6834..51bfc74805ec 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -318,9 +318,8 @@ static void addrconf_del_dad_work(struct inet6_ifaddr *ifp) static void addrconf_mod_rs_timer(struct inet6_dev *idev, unsigned long when) { - if (!timer_pending(&idev->rs_timer)) + if (!mod_timer(&idev->rs_timer, jiffies + when)) in6_dev_hold(idev); - mod_timer(&idev->rs_timer, jiffies + when); } static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, From 89726b030373224de23192913774c00f49a07313 Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Mon, 10 Jul 2023 09:39:07 +0800 Subject: [PATCH 041/224] net: dsa: qca8k: Add check for skb_copy [ Upstream commit 87355b7c3da9bfd81935caba0ab763355147f7b0 ] Add check for the return value of skb_copy in order to avoid NULL pointer dereference. Fixes: 2cd548566384 ("net: dsa: qca8k: add support for phy read/write with mgmt Ethernet") Signed-off-by: Jiasheng Jiang Reviewed-by: Pavan Chebbi Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/dsa/qca/qca8k-8xxx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 7a6166a0c9bc..b3f798866899 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -469,6 +469,9 @@ qca8k_phy_eth_busy_wait(struct qca8k_mgmt_eth_data *mgmt_eth_data, bool ack; int ret; + if (!skb) + return -ENOMEM; + reinit_completion(&mgmt_eth_data->rw_done); /* Increment seq_num and set it in the copy pkt */ From 7772d5c44011aaf7035ff86b5881e89e7765c49f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 21 Jun 2023 18:11:54 +0300 Subject: [PATCH 042/224] platform/x86: wmi: Break possible infinite loop when parsing GUID [ Upstream commit 028e6e204ace1f080cfeacd72c50397eb8ae8883 ] The while-loop may break on one of the two conditions, either ID string is empty or GUID matches. The second one, may never be reached if the parsed string is not correct GUID. In such a case the loop will never advance to check the next ID. Break possible infinite loop by factoring out guid_parse_and_compare() helper which may be moved to the generic header for everyone later on and preventing from similar mistake in the future. Interestingly that firstly it appeared when WMI was turned into a bus driver, but later when duplicated GUIDs were checked, the while-loop has been replaced by for-loop and hence no mistake made again. Fixes: a48e23385fcf ("platform/x86: wmi: add context pointer field to struct wmi_device_id") Fixes: 844af950da94 ("platform/x86: wmi: Turn WMI into a bus driver") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230621151155.78279-1-andriy.shevchenko@linux.intel.com Tested-by: Armin Wolf Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/wmi.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 223550a10d4d..2fe6e147785e 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -135,6 +135,16 @@ static acpi_status find_guid(const char *guid_string, struct wmi_block **out) return AE_NOT_FOUND; } +static bool guid_parse_and_compare(const char *string, const guid_t *guid) +{ + guid_t guid_input; + + if (guid_parse(string, &guid_input)) + return false; + + return guid_equal(&guid_input, guid); +} + static const void *find_guid_context(struct wmi_block *wblock, struct wmi_driver *wdriver) { @@ -145,11 +155,7 @@ static const void *find_guid_context(struct wmi_block *wblock, return NULL; while (*id->guid_string) { - guid_t guid_input; - - if (guid_parse(id->guid_string, &guid_input)) - continue; - if (guid_equal(&wblock->gblock.guid, &guid_input)) + if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid)) return id->context; id++; } @@ -833,11 +839,7 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver) return 0; while (*id->guid_string) { - guid_t driver_guid; - - if (WARN_ON(guid_parse(id->guid_string, &driver_guid))) - continue; - if (guid_equal(&driver_guid, &wblock->gblock.guid)) + if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid)) return 1; id++; From f92a82dc486a539e2888b403ab18cf3963eb5b4d Mon Sep 17 00:00:00 2001 From: "Tzvetomir Stoyanov (VMware)" Date: Mon, 3 Jul 2023 07:28:53 +0300 Subject: [PATCH 043/224] kernel/trace: Fix cleanup logic of enable_trace_eprobe [ Upstream commit cf0a624dc706c306294c14e6b3e7694702f25191 ] The enable_trace_eprobe() function enables all event probes, attached to given trace probe. If an error occurs in enabling one of the event probes, all others should be roll backed. There is a bug in that roll back logic - instead of all event probes, only the failed one is disabled. Link: https://lore.kernel.org/all/20230703042853.1427493-1-tz.stoyanov@gmail.com/ Reported-by: Dan Carpenter Fixes: 7491e2c44278 ("tracing: Add a probe that attaches to trace events") Signed-off-by: Tzvetomir Stoyanov (VMware) Acked-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Sasha Levin --- kernel/trace/trace_eprobe.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 753fc536525d..d2370cdb4c1d 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -743,6 +743,7 @@ static int enable_trace_eprobe(struct trace_event_call *call, struct trace_eprobe *ep; bool enabled; int ret = 0; + int cnt = 0; tp = trace_probe_primary_from_call(call); if (WARN_ON_ONCE(!tp)) @@ -766,12 +767,25 @@ static int enable_trace_eprobe(struct trace_event_call *call, if (ret) break; enabled = true; + cnt++; } if (ret) { /* Failed to enable one of them. Roll back all */ - if (enabled) - disable_eprobe(ep, file->tr); + if (enabled) { + /* + * It's a bug if one failed for something other than memory + * not being available but another eprobe succeeded. + */ + WARN_ON_ONCE(ret != -ENOMEM); + + list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + ep = container_of(pos, struct trace_eprobe, tp); + disable_eprobe(ep, file->tr); + if (!--cnt) + break; + } + } if (file) trace_probe_remove_file(tp, file); else From 1b87509ef6ad75d5b31e836a73b4171b197bb564 Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:13 +0200 Subject: [PATCH 044/224] igc: Fix launchtime before start of cycle [ Upstream commit c1bca9ac0bcb355be11354c2e68bc7bf31f5ac5a ] It is possible (verified on a running system) that frames are processed by igc_tx_launchtime with a txtime before the start of the cycle (baset_est). However, the result of txtime - baset_est is written into a u32, leading to a wrap around to a positive number. The following launchtime > 0 check will only branch to executing launchtime = 0 if launchtime is already 0. Fix it by using a s32 before checking launchtime > 0. Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 93e90c353f1a..743d27789679 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1014,7 +1014,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, ktime_t base_time = adapter->base_time; ktime_t now = ktime_get_clocktai(); ktime_t baset_est, end_of_cycle; - u32 launchtime; + s32 launchtime; s64 n; n = div64_s64(ktime_sub_ns(now, base_time), cycle_time); From 66afb6a54e44bb272fa2eeda6354e81e21e55dde Mon Sep 17 00:00:00 2001 From: Florian Kauer Date: Wed, 14 Jun 2023 16:07:14 +0200 Subject: [PATCH 045/224] igc: Fix inserting of empty frame for launchtime [ Upstream commit 0bcc62858d6ba62cbade957d69745e6adeed5f3d ] The insertion of an empty frame was introduced with commit db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") in order to ensure that the current cycle has at least one packet if there is some packet to be scheduled for the next cycle. However, the current implementation does not properly check if a packet is already scheduled for the current cycle. Currently, an empty packet is always inserted if and only if txtime >= end_of_cycle && txtime > last_tx_cycle but since last_tx_cycle is always either the end of the current cycle (end_of_cycle) or the end of a previous cycle, the second part (txtime > last_tx_cycle) is always true unless txtime == last_tx_cycle. What actually needs to be checked here is if the last_tx_cycle was already written within the current cycle, so an empty frame should only be inserted if and only if txtime >= end_of_cycle && end_of_cycle > last_tx_cycle. This patch does not only avoid an unnecessary insertion, but it can actually be harmful to insert an empty packet if packets are already scheduled in the current cycle, because it can lead to a situation where the empty packet is actually processed as the first packet in the upcoming cycle shifting the packet with the first_flag even one cycle into the future, finally leading to a TX hang. The TX hang can be reproduced on a i225 with: sudo tc qdisc replace dev enp1s0 parent root handle 100 taprio \ num_tc 1 \ map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ queues 1@0 \ base-time 0 \ sched-entry S 01 300000 \ flags 0x1 \ txtime-delay 500000 \ clockid CLOCK_TAI sudo tc qdisc replace dev enp1s0 parent 100:1 etf \ clockid CLOCK_TAI \ delta 500000 \ offload \ skip_sock_check and traffic generator sudo trafgen -i traffic.cfg -o enp1s0 --cpp -n0 -q -t1400ns with traffic.cfg #define ETH_P_IP 0x0800 { /* Ethernet Header */ 0x30, 0x1f, 0x9a, 0xd0, 0xf0, 0x0e, # MAC Dest - adapt as needed 0x24, 0x5e, 0xbe, 0x57, 0x2e, 0x36, # MAC Src - adapt as needed const16(ETH_P_IP), /* IPv4 Header */ 0b01000101, 0, # IPv4 version, IHL, TOS const16(1028), # IPv4 total length (UDP length + 20 bytes (IP header)) const16(2), # IPv4 ident 0b01000000, 0, # IPv4 flags, fragmentation off 64, # IPv4 TTL 17, # Protocol UDP csumip(14, 33), # IPv4 checksum /* UDP Header */ 10, 0, 48, 1, # IP Src - adapt as needed 10, 0, 48, 10, # IP Dest - adapt as needed const16(5555), # UDP Src Port const16(6666), # UDP Dest Port const16(1008), # UDP length (UDP header 8 bytes + payload length) csumudp(14, 34), # UDP checksum /* Payload */ fill('W', 1000), } and the observed message with that is for example igc 0000:01:00.0 enp1s0: Detected Tx Unit Hang Tx Queue <0> TDH <32> TDT <3c> next_to_use <3c> next_to_clean <32> buffer_info[next_to_clean] time_stamp next_to_watch <00000000632a1828> jiffies desc.status <1048000> Fixes: db0b124f02ba ("igc: Enhance Qbv scheduling by using first flag bit") Signed-off-by: Florian Kauer Reviewed-by: Kurt Kanzenbach Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 743d27789679..273941f90f06 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1027,7 +1027,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime, *first_flag = true; ring->last_ff_cycle = baset_est; - if (ktime_compare(txtime, ring->last_tx_cycle) > 0) + if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0) *insert_empty = true; } } From 13a30e22eac3f31fa7aac530a7ff30479f1f7115 Mon Sep 17 00:00:00 2001 From: Ankit Kumar Date: Fri, 23 Jun 2023 18:08:05 +0530 Subject: [PATCH 046/224] nvme: fix the NVME_ID_NS_NVM_STS_MASK definition [ Upstream commit b938e6603660652dc3db66d3c915fbfed3bce21d ] As per NVMe command set specification 1.0c Storage tag size is 7 bits. Fixes: 4020aad85c67 ("nvme: add support for enhanced metadata") Signed-off-by: Ankit Kumar Reviewed-by: Kanchan Joshi Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d9fbc5afeaf7..e6fb36b71b59 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -473,7 +473,7 @@ struct nvme_id_ns_nvm { }; enum { - NVME_ID_NS_NVM_STS_MASK = 0x3f, + NVME_ID_NS_NVM_STS_MASK = 0x7f, NVME_ID_NS_NVM_GUARD_SHIFT = 7, NVME_ID_NS_NVM_GUARD_MASK = 0x3, }; From ba05762e4a5c236b560d8cee498d62b54000aeb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Mon, 10 Jul 2023 09:41:31 +0200 Subject: [PATCH 047/224] riscv, bpf: Fix inconsistent JIT image generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c56fb2aab23505bb7160d06097c8de100b82b851 ] In order to generate the prologue and epilogue, the BPF JIT needs to know which registers that are clobbered. Therefore, the during pre-final passes, the prologue is generated after the body of the program body-prologue-epilogue. Then, in the final pass, a proper prologue-body-epilogue JITted image is generated. This scheme has worked most of the time. However, for some large programs with many jumps, e.g. the test_kmod.sh BPF selftest with hardening enabled (blinding constants), this has shown to be incorrect. For the final pass, when the proper prologue-body-epilogue is generated, the image has not converged. This will lead to that the final image will have incorrect jump offsets. The following is an excerpt from an incorrect image: | ... | 3b8: 00c50663 beq a0,a2,3c4 <.text+0x3c4> | 3bc: 0020e317 auipc t1,0x20e | 3c0: 49630067 jalr zero,1174(t1) # 20e852 <.text+0x20e852> | ... | 20e84c: 8796 c.mv a5,t0 | 20e84e: 6422 c.ldsp s0,8(sp) # Epilogue start | 20e850: 6141 c.addi16sp sp,16 | 20e852: 853e c.mv a0,a5 # Incorrect jump target | 20e854: 8082 c.jr ra The image has shrunk, and the epilogue offset is incorrect in the final pass. Correct the problem by always generating proper prologue-body-epilogue outputs, which means that the first pass will only generate the body to track what registers that are touched. Fixes: 2353ecc6f91f ("bpf, riscv: add BPF JIT for RV64G") Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230710074131.19596-1-bjorn@kernel.org Signed-off-by: Sasha Levin --- arch/riscv/net/bpf_jit.h | 6 +++--- arch/riscv/net/bpf_jit_core.c | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index d926e0f7ef57..5ee21a19969c 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -69,7 +69,7 @@ struct rv_jit_context { struct bpf_prog *prog; u16 *insns; /* RV insns */ int ninsns; - int body_len; + int prologue_len; int epilogue_offset; int *offset; /* BPF to RV */ int nexentries; @@ -216,8 +216,8 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx) int from, to; off++; /* BPF branch is from PC+1, RV is from PC */ - from = (insn > 0) ? ctx->offset[insn - 1] : 0; - to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; + from = (insn > 0) ? ctx->offset[insn - 1] : ctx->prologue_len; + to = (insn + off > 0) ? ctx->offset[insn + off - 1] : ctx->prologue_len; return ninsns_rvoff(to - from); } diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 737baf8715da..7a26a3e1c73c 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -44,7 +44,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) unsigned int prog_size = 0, extable_size = 0; bool tmp_blinded = false, extra_pass = false; struct bpf_prog *tmp, *orig_prog = prog; - int pass = 0, prev_ninsns = 0, prologue_len, i; + int pass = 0, prev_ninsns = 0, i; struct rv_jit_data *jit_data; struct rv_jit_context *ctx; @@ -83,6 +83,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = orig_prog; goto out_offset; } + + if (build_body(ctx, extra_pass, NULL)) { + prog = orig_prog; + goto out_offset; + } + for (i = 0; i < prog->len; i++) { prev_ninsns += 32; ctx->offset[i] = prev_ninsns; @@ -91,12 +97,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) for (i = 0; i < NR_JIT_ITERATIONS; i++) { pass++; ctx->ninsns = 0; + + bpf_jit_build_prologue(ctx); + ctx->prologue_len = ctx->ninsns; + if (build_body(ctx, extra_pass, ctx->offset)) { prog = orig_prog; goto out_offset; } - ctx->body_len = ctx->ninsns; - bpf_jit_build_prologue(ctx); + ctx->epilogue_offset = ctx->ninsns; bpf_jit_build_epilogue(ctx); @@ -162,10 +171,8 @@ skip_init_ctx: if (!prog->is_func || extra_pass) { bpf_jit_binary_lock_ro(jit_data->header); - prologue_len = ctx->epilogue_offset - ctx->body_len; for (i = 0; i < prog->len; i++) - ctx->offset[i] = ninsns_rvoff(prologue_len + - ctx->offset[i]); + ctx->offset[i] = ninsns_rvoff(ctx->offset[i]); bpf_prog_fill_jited_linfo(prog, ctx->offset); out_offset: kfree(ctx->offset); From f1e746aedd7dfbdea84b690c56154a11b68dc4de Mon Sep 17 00:00:00 2001 From: Stanislav Lisovskiy Date: Wed, 28 Jun 2023 17:10:17 +0300 Subject: [PATCH 048/224] drm/i915: Don't preserve dpll_hw_state for slave crtc in Bigjoiner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 5c413188c68da0e4bffc93de1c80257e20741e69 ] If we are using Bigjoiner dpll_hw_state is supposed to be exactly same as for master crtc, so no need to save it's state for slave crtc. Signed-off-by: Stanislav Lisovskiy Fixes: 0ff0e219d9b8 ("drm/i915: Compute clocks earlier") Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20230628141017.18937-1-stanislav.lisovskiy@intel.com (cherry picked from commit cbaf758809952c95ec00e796695049babb08bb60) Signed-off-by: Tvrtko Ursulin Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/display/intel_display.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 455d9ae6c41c..da9b995b54c8 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -5133,7 +5133,6 @@ copy_bigjoiner_crtc_state_modeset(struct intel_atomic_state *state, saved_state->uapi = slave_crtc_state->uapi; saved_state->scaler_state = slave_crtc_state->scaler_state; saved_state->shared_dpll = slave_crtc_state->shared_dpll; - saved_state->dpll_hw_state = slave_crtc_state->dpll_hw_state; saved_state->crc_enabled = slave_crtc_state->crc_enabled; intel_crtc_free_hw_state(slave_crtc_state); From 5a9aecb6651c6e1ef8198c162340ebe172b7a551 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 7 Jul 2023 13:55:03 +0100 Subject: [PATCH 049/224] drm/i915: Fix one wrong caching mode enum usage [ Upstream commit 113899c2669dff148b2a5bea4780123811aecc13 ] Commit a4d86249c773 ("drm/i915/gt: Provide a utility to create a scratch buffer") mistakenly passed in uapi I915_CACHING_CACHED as argument to i915_gem_object_set_cache_coherency(), which actually takes internal enum i915_cache_level. No functional issue since the value matches I915_CACHE_LLC (1 == 1), which is the intended caching mode, but lets clean it up nevertheless. Signed-off-by: Tvrtko Ursulin Fixes: a4d86249c773 ("drm/i915/gt: Provide a utility to create a scratch buffer") Cc: Daniele Ceraolo Spurio Reviewed-by: Tejas Upadhyay Link: https://patchwork.freedesktop.org/patch/msgid/20230707125503.3965817-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit 49c60b2f0867ac36fd54d513882a48431aeccae7) Signed-off-by: Tvrtko Ursulin Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 2eaeba14319e..f4879f437bfa 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -611,7 +611,7 @@ __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) if (IS_ERR(obj)) return ERR_CAST(obj); - i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { From 27272795a72cd19a8ad6fb0e4da61543a1620a68 Mon Sep 17 00:00:00 2001 From: Suman Ghosh Date: Mon, 10 Jul 2023 16:00:27 +0530 Subject: [PATCH 050/224] octeontx2-pf: Add additional check for MCAM rules [ Upstream commit 8278ee2a2646b9acf747317895e47a640ba933c9 ] Due to hardware limitation, MCAM drop rule with ether_type == 802.1Q and vlan_id == 0 is not supported. Hence rejecting such rules. Fixes: dce677da57c0 ("octeontx2-pf: Add vlan-etype to ntuple filters") Signed-off-by: Suman Ghosh Link: https://lore.kernel.org/r/20230710103027.2244139-1-sumang@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../ethernet/marvell/octeontx2/nic/otx2_flows.c | 8 ++++++++ .../net/ethernet/marvell/octeontx2/nic/otx2_tc.c | 15 +++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index d0554f6d2673..934c199667b5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@ -867,6 +867,14 @@ static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp, return -EINVAL; vlan_etype = be16_to_cpu(fsp->h_ext.vlan_etype); + + /* Drop rule with vlan_etype == 802.1Q + * and vlan_id == 0 is not supported + */ + if (vlan_etype == ETH_P_8021Q && !fsp->m_ext.vlan_tci && + fsp->ring_cookie == RX_CLS_FLOW_DISC) + return -EINVAL; + /* Only ETH_P_8021Q and ETH_P_802AD types supported */ if (vlan_etype != ETH_P_8021Q && vlan_etype != ETH_P_8021AD) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 6a01ab1a6e6f..1aeb18a901b1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -579,6 +579,21 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node, return -EOPNOTSUPP; } + if (!match.mask->vlan_id) { + struct flow_action_entry *act; + int i; + + flow_action_for_each(i, act, &rule->action) { + if (act->id == FLOW_ACTION_DROP) { + netdev_err(nic->netdev, + "vlan tpid 0x%x with vlan_id %d is not supported for DROP rule.\n", + ntohs(match.key->vlan_tpid), + match.key->vlan_id); + return -EOPNOTSUPP; + } + } + } + if (match.mask->vlan_id || match.mask->vlan_dei || match.mask->vlan_priority) { From 83879f72e05501064ecf3fbc832007ab569faa8d Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Mon, 10 Jul 2023 12:25:31 +0800 Subject: [PATCH 051/224] erofs: avoid useless loops in z_erofs_pcluster_readmore() when reading beyond EOF [ Upstream commit 936aa701d82d397c2d1afcd18ce2c739471d978d ] z_erofs_pcluster_readmore() may take a long time to loop when the page offset is large enough, which is unnecessary should be prevented. For example, when the following case is encountered, it will loop 4691368 times, taking about 27 seconds: - offset = 19217289215 - inode_size = 1442672 Signed-off-by: Chunhai Guo Fixes: 386292919c25 ("erofs: introduce readmore decompression strategy") Reviewed-by: Gao Xiang Reviewed-by: Yue Hu Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20230710042531.28761-1-guochunhai@vivo.com Signed-off-by: Gao Xiang Signed-off-by: Sasha Levin --- fs/erofs/zdata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 92b2e4ddb7ce..bf6a369f9c69 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1660,7 +1660,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, } cur = map->m_la + map->m_llen - 1; - while (cur >= end) { + while ((cur >= end) && (cur < i_size_read(inode))) { pgoff_t index = cur >> PAGE_SHIFT; struct page *page; From dc8158a95fd720990fe6ef47cb1b9487e01b56e5 Mon Sep 17 00:00:00 2001 From: Chunhai Guo Date: Mon, 10 Jul 2023 17:34:10 +0800 Subject: [PATCH 052/224] erofs: avoid infinite loop in z_erofs_do_read_page() when reading beyond EOF [ Upstream commit 8191213a5835b0317c5e4d0d337ae1ae00c75253 ] z_erofs_do_read_page() may loop infinitely due to the inappropriate truncation in the below statement. Since the offset is 64 bits and min_t() truncates the result to 32 bits. The solution is to replace unsigned int with a 64-bit type, such as erofs_off_t. cur = end - min_t(unsigned int, offset + end - map->m_la, end); - For example: - offset = 0x400160000 - end = 0x370 - map->m_la = 0x160370 - offset + end - map->m_la = 0x400000000 - offset + end - map->m_la = 0x00000000 (truncated as unsigned int) - Expected result: - cur = 0 - Actual result: - cur = 0x370 Signed-off-by: Chunhai Guo Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20230710093410.44071-1-guochunhai@vivo.com Signed-off-by: Gao Xiang Signed-off-by: Sasha Levin --- fs/erofs/zdata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index bf6a369f9c69..533e612b6a48 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -866,7 +866,7 @@ hitted: */ tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); - cur = end - min_t(unsigned int, offset + end - map->m_la, end); + cur = end - min_t(erofs_off_t, offset + end - map->m_la, end); if (!(map->m_flags & EROFS_MAP_MAPPED)) { zero_user_segment(page, cur, end); goto next_part; From 9e6474e5d70cdc69108e236cb96bc02afc01fcb9 Mon Sep 17 00:00:00 2001 From: Xin Yin Date: Tue, 11 Jul 2023 14:21:30 +0800 Subject: [PATCH 053/224] erofs: fix fsdax unavailability for chunk-based regular files [ Upstream commit 18bddc5b67038722cb88fcf51fbf41a0277092cb ] DAX can be used to share page cache between VMs, reducing guest memory overhead. And chunk based data format is widely used for VM and container image. So enable dax support for it, make erofs better used for VM scenarios. Fixes: c5aa903a59db ("erofs: support reading chunk-based uncompressed files") Signed-off-by: Xin Yin Reviewed-by: Gao Xiang Reviewed-by: Chao Yu Link: https://lore.kernel.org/r/20230711062130.7860-1-yinxin.x@bytedance.com Signed-off-by: Gao Xiang Signed-off-by: Sasha Levin --- fs/erofs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 5aadc73d5765..e090bcd46db1 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -186,7 +186,8 @@ static void *erofs_read_inode(struct erofs_buf *buf, inode->i_flags &= ~S_DAX; if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) && - vi->datalayout == EROFS_INODE_FLAT_PLAIN) + (vi->datalayout == EROFS_INODE_FLAT_PLAIN || + vi->datalayout == EROFS_INODE_CHUNK_BASED)) inode->i_flags |= S_DAX; if (!nblks) /* measure inode.i_blocks as generic filesystems */ From 4719576d6ed6f14494abc4d5b8a2e463857e6fd1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 9 Jul 2023 06:31:54 -0700 Subject: [PATCH 054/224] wifi: airo: avoid uninitialized warning in airo_get_rate() [ Upstream commit 9373771aaed17f5c2c38485f785568abe3a9f8c1 ] Quieten a gcc (11.3.0) build error or warning by checking the function call status and returning -EBUSY if the function call failed. This is similar to what several other wireless drivers do for the SIOCGIWRATE ioctl call when there is a locking problem. drivers/net/wireless/cisco/airo.c: error: 'status_rid.currentXmitRate' is used uninitialized [-Werror=uninitialized] Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Randy Dunlap Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/39abf2c7-24a-f167-91da-ed4c5435d1c4@linux-m68k.org Link: https://lore.kernel.org/r/20230709133154.26206-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wireless/cisco/airo.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index fb2c35bd73bb..f6362429b735 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -6146,8 +6146,11 @@ static int airo_get_rate(struct net_device *dev, { struct airo_info *local = dev->ml_priv; StatusRid status_rid; /* Card status info */ + int ret; - readStatusRid(local, &status_rid, 1); + ret = readStatusRid(local, &status_rid, 1); + if (ret) + return -EBUSY; vwrq->value = le16_to_cpu(status_rid.currentXmitRate) * 500000; /* If more than one rate, set auto */ From b11a9b4f28cb6ff69ef7e69809e5f7fffeac9030 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Tue, 11 Jul 2023 19:58:48 +0800 Subject: [PATCH 055/224] bpf: cpumap: Fix memory leak in cpu_map_update_elem [ Upstream commit 4369016497319a9635702da010d02af1ebb1849d ] Syzkaller reported a memory leak as follows: BUG: memory leak unreferenced object 0xff110001198ef748 (size 192): comm "syz-executor.3", pid 17672, jiffies 4298118891 (age 9.906s) hex dump (first 32 bytes): 00 00 00 00 4a 19 00 00 80 ad e3 e4 fe ff c0 00 ....J........... 00 b2 d3 0c 01 00 11 ff 28 f5 8e 19 01 00 11 ff ........(....... backtrace: [] __cpu_map_entry_alloc+0xf7/0xb00 [] cpu_map_update_elem+0x2fe/0x3d0 [] bpf_map_update_value.isra.0+0x2bd/0x520 [] map_update_elem+0x4cb/0x720 [] __se_sys_bpf+0x8c3/0xb90 [] do_syscall_64+0x30/0x40 [] entry_SYSCALL_64_after_hwframe+0x61/0xc6 BUG: memory leak unreferenced object 0xff110001198ef528 (size 192): comm "syz-executor.3", pid 17672, jiffies 4298118891 (age 9.906s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] __cpu_map_entry_alloc+0x260/0xb00 [] cpu_map_update_elem+0x2fe/0x3d0 [] bpf_map_update_value.isra.0+0x2bd/0x520 [] map_update_elem+0x4cb/0x720 [] __se_sys_bpf+0x8c3/0xb90 [] do_syscall_64+0x30/0x40 [] entry_SYSCALL_64_after_hwframe+0x61/0xc6 BUG: memory leak unreferenced object 0xff1100010fd93d68 (size 8): comm "syz-executor.3", pid 17672, jiffies 4298118891 (age 9.906s) hex dump (first 8 bytes): 00 00 00 00 00 00 00 00 ........ backtrace: [] kvmalloc_node+0x11e/0x170 [] __cpu_map_entry_alloc+0x2f0/0xb00 [] cpu_map_update_elem+0x2fe/0x3d0 [] bpf_map_update_value.isra.0+0x2bd/0x520 [] map_update_elem+0x4cb/0x720 [] __se_sys_bpf+0x8c3/0xb90 [] do_syscall_64+0x30/0x40 [] entry_SYSCALL_64_after_hwframe+0x61/0xc6 In the cpu_map_update_elem flow, when kthread_stop is called before calling the threadfn of rcpu->kthread, since the KTHREAD_SHOULD_STOP bit of kthread has been set by kthread_stop, the threadfn of rcpu->kthread will never be executed, and rcpu->refcnt will never be 0, which will lead to the allocated rcpu, rcpu->queue and rcpu->queue->queue cannot be released. Calling kthread_stop before executing kthread's threadfn will return -EINTR. We can complete the release of memory resources in this state. Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP") Signed-off-by: Pu Lehui Acked-by: Jesper Dangaard Brouer Acked-by: Hou Tao Link: https://lore.kernel.org/r/20230711115848.2701559-1-pulehui@huaweicloud.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/cpumap.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index b5ba34ddd4b6..09141351d545 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -127,22 +127,6 @@ static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) atomic_inc(&rcpu->refcnt); } -/* called from workqueue, to workaround syscall using preempt_disable */ -static void cpu_map_kthread_stop(struct work_struct *work) -{ - struct bpf_cpu_map_entry *rcpu; - - rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); - - /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier, - * as it waits until all in-flight call_rcu() callbacks complete. - */ - rcu_barrier(); - - /* kthread_stop will wake_up_process and wait for it to complete */ - kthread_stop(rcpu->kthread); -} - static void __cpu_map_ring_cleanup(struct ptr_ring *ring) { /* The tear-down procedure should have made sure that queue is @@ -170,6 +154,30 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) } } +/* called from workqueue, to workaround syscall using preempt_disable */ +static void cpu_map_kthread_stop(struct work_struct *work) +{ + struct bpf_cpu_map_entry *rcpu; + int err; + + rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); + + /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier, + * as it waits until all in-flight call_rcu() callbacks complete. + */ + rcu_barrier(); + + /* kthread_stop will wake_up_process and wait for it to complete */ + err = kthread_stop(rcpu->kthread); + if (err) { + /* kthread_stop may be called before cpu_map_kthread_run + * is executed, so we need to release the memory related + * to rcpu. + */ + put_cpu_map_entry(rcpu); + } +} + static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, struct list_head *listp, struct xdp_cpumap_stats *stats) From 174cfa0317df8ee8a3fed7c450159cf3cf4a6c3f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 11 Jul 2023 10:08:09 +0300 Subject: [PATCH 056/224] net/sched: flower: Ensure both minimum and maximum ports are specified [ Upstream commit d3f87278bcb80bd7f9519669d928b43320363d4f ] The kernel does not currently validate that both the minimum and maximum ports of a port range are specified. This can lead user space to think that a filter matching on a port range was successfully added, when in fact it was not. For example, with a patched (buggy) iproute2 that only sends the minimum port, the following commands do not return an error: # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp src_port 100-200 action pass # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp dst_port 100-200 action pass # tc filter show dev swp1 ingress filter protocol ip pref 1 flower chain 0 filter protocol ip pref 1 flower chain 0 handle 0x1 eth_type ipv4 ip_proto udp not_in_hw action order 1: gact action pass random type none pass val 0 index 1 ref 1 bind 1 filter protocol ip pref 1 flower chain 0 handle 0x2 eth_type ipv4 ip_proto udp not_in_hw action order 1: gact action pass random type none pass val 0 index 2 ref 1 bind 1 Fix by returning an error unless both ports are specified: # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp src_port 100-200 action pass Error: Both min and max source ports must be specified. We have an error talking to the kernel # tc filter add dev swp1 ingress pref 1 proto ip flower ip_proto udp dst_port 100-200 action pass Error: Both min and max destination ports must be specified. We have an error talking to the kernel Fixes: 5c72299fba9d ("net: sched: cls_flower: Classify packets using port ranges") Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/cls_flower.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 3de72e7c1075..10e6ec0f9498 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -793,6 +793,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key, TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src, TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src)); + if (mask->tp_range.tp_min.dst != mask->tp_range.tp_max.dst) { + NL_SET_ERR_MSG(extack, + "Both min and max destination ports must be specified"); + return -EINVAL; + } + if (mask->tp_range.tp_min.src != mask->tp_range.tp_max.src) { + NL_SET_ERR_MSG(extack, + "Both min and max source ports must be specified"); + return -EINVAL; + } if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst && ntohs(key->tp_range.tp_max.dst) <= ntohs(key->tp_range.tp_min.dst)) { From 1b125be4e00186f60db758b7baa2bf02b534b979 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 10 Jul 2023 01:10:36 +0800 Subject: [PATCH 057/224] riscv: mm: fix truncation warning on RV32 [ Upstream commit b690e266dae2f85f4dfea21fa6a05e3500a51054 ] lkp reports below sparse warning when building for RV32: arch/riscv/mm/init.c:1204:48: sparse: warning: cast truncates bits from constant value (100000000 becomes 0) IMO, the reason we didn't see this truncates bug in real world is "0" means MEMBLOCK_ALLOC_ACCESSIBLE in memblock and there's no RV32 HW with more than 4GB memory. Fix it anyway to make sparse happy. Fixes: decf89f86ecd ("riscv: try to allocate crashkern region from 32bit addressible memory") Signed-off-by: Jisheng Zhang Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202306080034.SLiCiOMn-lkp@intel.com/ Link: https://lore.kernel.org/r/20230709171036.1906-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9390cdff39ff..7c4852af9e3f 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -1187,7 +1187,7 @@ static void __init reserve_crashkernel(void) */ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, search_start, - min(search_end, (unsigned long) SZ_4G)); + min(search_end, (unsigned long)(SZ_4G - 1))); if (crash_base == 0) { /* Try again without restricting region to 32bit addressible memory */ crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE, From 1c806e406627962f3d87a7d5979a5659c7063235 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 Jul 2023 11:52:26 +0300 Subject: [PATCH 058/224] netdevsim: fix uninitialized data in nsim_dev_trap_fa_cookie_write() [ Upstream commit f72207a5c0dbaaf6921cf9a6c0d2fd0bc249ea78 ] The simple_write_to_buffer() function is designed to handle partial writes. It returns negatives on error, otherwise it returns the number of bytes that were able to be copied. This code doesn't check the return properly. We only know that the first byte is written, the rest of the buffer might be uninitialized. There is no need to use the simple_write_to_buffer() function. Partial writes are prohibited by the "if (*ppos != 0)" check at the start of the function. Just use memdup_user() and copy the whole buffer. Fixes: d3cbb907ae57 ("netdevsim: add ACL trap reporting cookie as a metadata") Signed-off-by: Dan Carpenter Reviewed-by: Pavan Chebbi Reviewed-by: Ido Schimmel Link: https://lore.kernel.org/r/7c1f950b-3a7d-4252-82a6-876e53078ef7@moroto.mountain Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/netdevsim/dev.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index 68e56e451b2b..c3fbdd6b68ba 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -184,13 +184,10 @@ static ssize_t nsim_dev_trap_fa_cookie_write(struct file *file, cookie_len = (count - 1) / 2; if ((count - 1) % 2) return -EINVAL; - buf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); - if (!buf) - return -ENOMEM; - ret = simple_write_to_buffer(buf, count, ppos, data, count); - if (ret < 0) - goto free_buf; + buf = memdup_user(data, count); + if (IS_ERR(buf)) + return PTR_ERR(buf); fa_cookie = kmalloc(sizeof(*fa_cookie) + cookie_len, GFP_KERNEL | __GFP_NOWARN); From 3d1dc71b8f76daad756cc39d7de01b12bef274c4 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Mon, 10 Jul 2023 23:16:34 -0300 Subject: [PATCH 059/224] net/sched: make psched_mtu() RTNL-less safe [ Upstream commit 150e33e62c1fa4af5aaab02776b6c3812711d478 ] Eric Dumazet says[1]: ------- Speaking of psched_mtu(), I see that net/sched/sch_pie.c is using it without holding RTNL, so dev->mtu can be changed underneath. KCSAN could issue a warning. ------- Annotate dev->mtu with READ_ONCE() so KCSAN don't issue a warning. [1] https://lore.kernel.org/all/CANn89iJoJO5VtaJ-2=_d2aOQhb0Xw8iBT_Cxqp2HyuS-zj6azw@mail.gmail.com/ v1 -> v2: Fix commit message Fixes: d4b36210c2e6 ("net: pkt_sched: PIE AQM scheme") Suggested-by: Eric Dumazet Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20230711021634.561598-1-pctammela@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/net/pkt_sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 8ab75128512a..f99a513b40a9 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -135,7 +135,7 @@ extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; */ static inline unsigned int psched_mtu(const struct net_device *dev) { - return dev->mtu + dev->hard_header_len; + return READ_ONCE(dev->mtu) + dev->hard_header_len; } static inline struct net *qdisc_net(struct Qdisc *q) From 0aec8dab2be672fbaf4141c41648ed3afd829660 Mon Sep 17 00:00:00 2001 From: Zhang Shurong Date: Thu, 6 Jul 2023 10:45:00 +0800 Subject: [PATCH 060/224] wifi: rtw89: debug: fix error code in rtw89_debug_priv_send_h2c_set() [ Upstream commit 4f4626cd049576af1276c7568d5b44eb3f7bb1b1 ] If there is a failure during rtw89_fw_h2c_raw() rtw89_debug_priv_send_h2c should return negative error code instead of a positive value count. Fix this bug by returning correct error code. Fixes: e3ec7017f6a2 ("rtw89: add Realtek 802.11ax driver") Signed-off-by: Zhang Shurong Acked-by: Ping-Ke Shih Link: https://lore.kernel.org/r/tencent_AD09A61BC4DA92AD1EB0790F5C850E544D07@qq.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw89/debug.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index 50701c55ed60..ec0af903961f 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -2130,17 +2130,18 @@ static ssize_t rtw89_debug_priv_send_h2c_set(struct file *filp, struct rtw89_debugfs_priv *debugfs_priv = filp->private_data; struct rtw89_dev *rtwdev = debugfs_priv->rtwdev; u8 *h2c; + int ret; u16 h2c_len = count / 2; h2c = rtw89_hex2bin_user(rtwdev, user_buf, count); if (IS_ERR(h2c)) return -EFAULT; - rtw89_fw_h2c_raw(rtwdev, h2c, h2c_len); + ret = rtw89_fw_h2c_raw(rtwdev, h2c, h2c_len); kfree(h2c); - return count; + return ret ? ret : count; } static int From 4b33836824052c91cfa812f1222cdfa0ff8daa41 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Sat, 22 Apr 2023 12:56:11 -0300 Subject: [PATCH 061/224] net/sched: sch_qfq: refactor parsing of netlink parameters [ Upstream commit 25369891fcef373540f8b4e0b3bccf77a04490d5 ] Two parameters can be transformed into netlink policies and validated while parsing the netlink message. Reviewed-by: Simon Horman Acked-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Signed-off-by: David S. Miller Stable-dep-of: 3e337087c3b5 ("net/sched: sch_qfq: account for stab overhead in qfq_enqueue") Signed-off-by: Sasha Levin --- net/sched/sch_qfq.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 02098a02943e..2f3629c85158 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -113,6 +113,7 @@ #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ +#define QFQ_MAX_LMAX (1UL << QFQ_MTU_SHIFT) #define QFQ_MAX_AGG_CLASSES 8 /* max num classes per aggregate allowed */ @@ -214,9 +215,14 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) return container_of(clc, struct qfq_class, common); } +static struct netlink_range_validation lmax_range = { + .min = QFQ_MIN_LMAX, + .max = QFQ_MAX_LMAX, +}; + static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { - [TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, - [TCA_QFQ_LMAX] = { .type = NLA_U32 }, + [TCA_QFQ_WEIGHT] = NLA_POLICY_RANGE(NLA_U32, 1, QFQ_MAX_WEIGHT), + [TCA_QFQ_LMAX] = NLA_POLICY_FULL_RANGE(NLA_U32, &lmax_range), }; /* @@ -408,17 +414,13 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } err = nla_parse_nested_deprecated(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], - qfq_policy, NULL); + qfq_policy, extack); if (err < 0) return err; - if (tb[TCA_QFQ_WEIGHT]) { + if (tb[TCA_QFQ_WEIGHT]) weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]); - if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) { - pr_notice("qfq: invalid weight %u\n", weight); - return -EINVAL; - } - } else + else weight = 1; if (tb[TCA_QFQ_LMAX]) @@ -426,11 +428,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, else lmax = psched_mtu(qdisc_dev(sch)); - if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) { - pr_notice("qfq: invalid max length %u\n", lmax); - return -EINVAL; - } - inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; From 70feebdbfad85772ab3ef152812729cab5c6c426 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 11 Jul 2023 18:01:02 -0300 Subject: [PATCH 062/224] net/sched: sch_qfq: account for stab overhead in qfq_enqueue [ Upstream commit 3e337087c3b5805fe0b8a46ba622a962880b5d64 ] Lion says: ------- In the QFQ scheduler a similar issue to CVE-2023-31436 persists. Consider the following code in net/sched/sch_qfq.c: static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb), gso_segs; // ... if (unlikely(cl->agg->lmax < len)) { pr_debug("qfq: increasing maxpkt from %u to %u for class %u", cl->agg->lmax, len, cl->common.classid); err = qfq_change_agg(sch, cl, cl->agg->class_weight, len); if (err) { cl->qstats.drops++; return qdisc_drop(skb, sch, to_free); } // ... } Similarly to CVE-2023-31436, "lmax" is increased without any bounds checks according to the packet length "len". Usually this would not impose a problem because packet sizes are naturally limited. This is however not the actual packet length, rather the "qdisc_pkt_len(skb)" which might apply size transformations according to "struct qdisc_size_table" as created by "qdisc_get_stab()" in net/sched/sch_api.c if the TCA_STAB option was set when modifying the qdisc. A user may choose virtually any size using such a table. As a result the same issue as in CVE-2023-31436 can occur, allowing heap out-of-bounds read / writes in the kmalloc-8192 cache. ------- We can create the issue with the following commands: tc qdisc add dev $DEV root handle 1: stab mtu 2048 tsize 512 mpu 0 \ overhead 999999999 linklayer ethernet qfq tc class add dev $DEV parent 1: classid 1:1 htb rate 6mbit burst 15k tc filter add dev $DEV parent 1: matchall classid 1:1 ping -I $DEV 1.1.1.2 This is caused by incorrectly assuming that qdisc_pkt_len() returns a length within the QFQ_MIN_LMAX < len < QFQ_MAX_LMAX. Fixes: 462dbc9101ac ("pkt_sched: QFQ Plus: fair-queueing service at DRR cost") Reported-by: Lion Reviewed-by: Eric Dumazet Signed-off-by: Jamal Hadi Salim Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/sched/sch_qfq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 2f3629c85158..d5610e145da2 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -381,8 +381,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight, u32 lmax) { struct qfq_sched *q = qdisc_priv(sch); - struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight); + struct qfq_aggregate *new_agg; + /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */ + if (lmax > QFQ_MAX_LMAX) + return -EINVAL; + + new_agg = qfq_find_agg(q, lmax, weight); if (new_agg == NULL) { /* create new aggregate */ new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC); if (new_agg == NULL) From bf2f2c059f17c198e4d041348eb84db1152e7568 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 13 Jul 2023 17:26:20 +0800 Subject: [PATCH 063/224] nvme-pci: fix DMA direction of unmapping integrity data [ Upstream commit b8f6446b6853768cb99e7c201bddce69ca60c15e ] DMA direction should be taken in dma_unmap_page() for unmapping integrity data. Fix this DMA direction, and reported in Guangwu's test. Reported-by: Guangwu Zhang Fixes: 4aedb705437f ("nvme-pci: split metadata handling from nvme_map_data / nvme_unmap_data") Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 145fa7ef3f74..ce2e628f94a0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1022,7 +1022,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req) struct nvme_iod *iod = blk_mq_rq_to_pdu(req); dma_unmap_page(dev->dev, iod->meta_dma, - rq_integrity_vec(req)->bv_len, rq_data_dir(req)); + rq_integrity_vec(req)->bv_len, rq_dma_dir(req)); } if (blk_rq_nr_phys_segments(req)) From 000a9a72efa4a9df289bab9c9e8ba1639c72e0d6 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 10 Oct 2022 13:15:33 +0300 Subject: [PATCH 064/224] fs/ntfs3: Check fields while reading commit 0e8235d28f3a0e9eda9f02ff67ee566d5f42b66b upstream. Added new functions index_hdr_check and index_buf_check. Now we check all stuff for correctness while reading from disk. Also fixed bug with stale nfs data. Reported-by: van fantasy Signed-off-by: Konstantin Komarov Fixes: 82cae269cfa95 ("fs/ntfs3: Add initialization of super block") Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- fs/ntfs3/index.c | 84 ++++++++++++++++++++++++++++++---- fs/ntfs3/inode.c | 18 ++++---- fs/ntfs3/ntfs_fs.h | 4 +- fs/ntfs3/run.c | 7 ++- fs/ntfs3/xattr.c | 109 +++++++++++++++++++++++++++++---------------- 5 files changed, 164 insertions(+), 58 deletions(-) diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 98491abf95b9..9e9a9ffd9295 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -605,11 +605,58 @@ static const struct NTFS_DE *hdr_insert_head(struct INDEX_HDR *hdr, return e; } +/* + * index_hdr_check + * + * return true if INDEX_HDR is valid + */ +static bool index_hdr_check(const struct INDEX_HDR *hdr, u32 bytes) +{ + u32 end = le32_to_cpu(hdr->used); + u32 tot = le32_to_cpu(hdr->total); + u32 off = le32_to_cpu(hdr->de_off); + + if (!IS_ALIGNED(off, 8) || tot > bytes || end > tot || + off + sizeof(struct NTFS_DE) > end) { + /* incorrect index buffer. */ + return false; + } + + return true; +} + +/* + * index_buf_check + * + * return true if INDEX_BUFFER seems is valid + */ +static bool index_buf_check(const struct INDEX_BUFFER *ib, u32 bytes, + const CLST *vbn) +{ + const struct NTFS_RECORD_HEADER *rhdr = &ib->rhdr; + u16 fo = le16_to_cpu(rhdr->fix_off); + u16 fn = le16_to_cpu(rhdr->fix_num); + + if (bytes <= offsetof(struct INDEX_BUFFER, ihdr) || + rhdr->sign != NTFS_INDX_SIGNATURE || + fo < sizeof(struct INDEX_BUFFER) + /* Check index buffer vbn. */ + || (vbn && *vbn != le64_to_cpu(ib->vbn)) || (fo % sizeof(short)) || + fo + fn * sizeof(short) >= bytes || + fn != ((bytes >> SECTOR_SHIFT) + 1)) { + /* incorrect index buffer. */ + return false; + } + + return index_hdr_check(&ib->ihdr, + bytes - offsetof(struct INDEX_BUFFER, ihdr)); +} + void fnd_clear(struct ntfs_fnd *fnd) { int i; - for (i = 0; i < fnd->level; i++) { + for (i = fnd->level - 1; i >= 0; i--) { struct indx_node *n = fnd->nodes[i]; if (!n) @@ -828,9 +875,16 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, u32 t32; const struct INDEX_ROOT *root = resident_data(attr); + t32 = le32_to_cpu(attr->res.data_size); + if (t32 <= offsetof(struct INDEX_ROOT, ihdr) || + !index_hdr_check(&root->ihdr, + t32 - offsetof(struct INDEX_ROOT, ihdr))) { + goto out; + } + /* Check root fields. */ if (!root->index_block_clst) - return -EINVAL; + goto out; indx->type = type; indx->idx2vbn_bits = __ffs(root->index_block_clst); @@ -842,19 +896,19 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, if (t32 < sbi->cluster_size) { /* Index record is smaller than a cluster, use 512 blocks. */ if (t32 != root->index_block_clst * SECTOR_SIZE) - return -EINVAL; + goto out; /* Check alignment to a cluster. */ if ((sbi->cluster_size >> SECTOR_SHIFT) & (root->index_block_clst - 1)) { - return -EINVAL; + goto out; } indx->vbn2vbo_bits = SECTOR_SHIFT; } else { /* Index record must be a multiple of cluster size. */ if (t32 != root->index_block_clst << sbi->cluster_bits) - return -EINVAL; + goto out; indx->vbn2vbo_bits = sbi->cluster_bits; } @@ -862,7 +916,14 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, init_rwsem(&indx->run_lock); indx->cmp = get_cmp_func(root); - return indx->cmp ? 0 : -EINVAL; + if (!indx->cmp) + goto out; + + return 0; + +out: + ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); + return -EINVAL; } static struct indx_node *indx_new(struct ntfs_index *indx, @@ -1020,6 +1081,13 @@ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, goto out; ok: + if (!index_buf_check(ib, bytes, &vbn)) { + ntfs_inode_err(&ni->vfs_inode, "directory corrupted"); + ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); + err = -EINVAL; + goto out; + } + if (err == -E_NTFS_FIXUP) { ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &in->nb, 0); err = 0; @@ -1607,9 +1675,9 @@ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni, if (err) { /* Restore root. */ - if (mi_resize_attr(mi, attr, -ds_root)) + if (mi_resize_attr(mi, attr, -ds_root)) { memcpy(attr, a_root, asize); - else { + } else { /* Bug? */ ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index e52dfa5c7562..dc937089a464 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -81,7 +81,7 @@ static struct inode *ntfs_read_mft(struct inode *inode, le16_to_cpu(ref->seq), le16_to_cpu(rec->seq)); goto out; } else if (!is_rec_inuse(rec)) { - err = -EINVAL; + err = -ESTALE; ntfs_err(sb, "Inode r=%x is not in use!", (u32)ino); goto out; } @@ -92,8 +92,10 @@ static struct inode *ntfs_read_mft(struct inode *inode, goto out; } - if (!is_rec_base(rec)) - goto Ok; + if (!is_rec_base(rec)) { + err = -EINVAL; + goto out; + } /* Record should contain $I30 root. */ is_dir = rec->flags & RECORD_FLAG_DIR; @@ -472,7 +474,6 @@ end_enum: inode->i_flags |= S_NOSEC; } -Ok: if (ino == MFT_REC_MFT && !sb->s_root) sbi->mft.ni = NULL; @@ -526,6 +527,9 @@ struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref, _ntfs_bad_inode(inode); } + if (IS_ERR(inode) && name) + ntfs_set_state(sb->s_fs_info, NTFS_DIRTY_ERROR); + return inode; } @@ -1641,10 +1645,8 @@ out6: ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref); out5: - if (S_ISDIR(mode) || run_is_empty(&ni->file.run)) - goto out4; - - run_deallocate(sbi, &ni->file.run, false); + if (!S_ISDIR(mode)) + run_deallocate(sbi, &ni->file.run, false); out4: clear_rec_inuse(rec); diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index c5c022fef4e0..24227b2e1b2b 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -794,12 +794,12 @@ int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf, u32 run_buf_size, CLST *packed_vcns); int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, - u32 run_buf_size); + int run_buf_size); #ifdef NTFS3_CHECK_FREE_CLST int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, - u32 run_buf_size); + int run_buf_size); #else #define run_unpack_ex run_unpack #endif diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c index aaaa0d3d35a2..12d8682f33b5 100644 --- a/fs/ntfs3/run.c +++ b/fs/ntfs3/run.c @@ -919,12 +919,15 @@ out: */ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, - u32 run_buf_size) + int run_buf_size) { u64 prev_lcn, vcn64, lcn, next_vcn; const u8 *run_last, *run_0; bool is_mft = ino == MFT_REC_MFT; + if (run_buf_size < 0) + return -EINVAL; + /* Check for empty. */ if (evcn + 1 == svcn) return 0; @@ -1046,7 +1049,7 @@ int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, */ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, - u32 run_buf_size) + int run_buf_size) { int ret, err; CLST next_vcn, lcn, len; diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index 88866bcd1a21..f5d3092f478c 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -42,28 +42,26 @@ static inline size_t packed_ea_size(const struct EA_FULL *ea) * Assume there is at least one xattr in the list. */ static inline bool find_ea(const struct EA_FULL *ea_all, u32 bytes, - const char *name, u8 name_len, u32 *off) + const char *name, u8 name_len, u32 *off, u32 *ea_sz) { - *off = 0; + u32 ea_size; - if (!ea_all || !bytes) + *off = 0; + if (!ea_all) return false; - for (;;) { + for (; *off < bytes; *off += ea_size) { const struct EA_FULL *ea = Add2Ptr(ea_all, *off); - u32 next_off = *off + unpacked_ea_size(ea); - - if (next_off > bytes) - return false; - + ea_size = unpacked_ea_size(ea); if (ea->name_len == name_len && - !memcmp(ea->name, name, name_len)) + !memcmp(ea->name, name, name_len)) { + if (ea_sz) + *ea_sz = ea_size; return true; - - *off = next_off; - if (next_off >= bytes) - return false; + } } + + return false; } /* @@ -74,12 +72,12 @@ static inline bool find_ea(const struct EA_FULL *ea_all, u32 bytes, static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, size_t add_bytes, const struct EA_INFO **info) { - int err; + int err = -EINVAL; struct ntfs_sb_info *sbi = ni->mi.sbi; struct ATTR_LIST_ENTRY *le = NULL; struct ATTRIB *attr_info, *attr_ea; void *ea_p; - u32 size; + u32 size, off, ea_size; static_assert(le32_to_cpu(ATTR_EA_INFO) < le32_to_cpu(ATTR_EA)); @@ -96,24 +94,31 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, *info = resident_data_ex(attr_info, sizeof(struct EA_INFO)); if (!*info) - return -EINVAL; + goto out; /* Check Ea limit. */ size = le32_to_cpu((*info)->size); - if (size > sbi->ea_max_size) - return -EFBIG; + if (size > sbi->ea_max_size) { + err = -EFBIG; + goto out; + } - if (attr_size(attr_ea) > sbi->ea_max_size) - return -EFBIG; + if (attr_size(attr_ea) > sbi->ea_max_size) { + err = -EFBIG; + goto out; + } + + if (!size) { + /* EA info persists, but xattr is empty. Looks like EA problem. */ + goto out; + } /* Allocate memory for packed Ea. */ ea_p = kmalloc(size_add(size, add_bytes), GFP_NOFS); if (!ea_p) return -ENOMEM; - if (!size) { - /* EA info persists, but xattr is empty. Looks like EA problem. */ - } else if (attr_ea->non_res) { + if (attr_ea->non_res) { struct runs_tree run; run_init(&run); @@ -124,24 +129,52 @@ static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, run_close(&run); if (err) - goto out; + goto out1; } else { void *p = resident_data_ex(attr_ea, size); - if (!p) { - err = -EINVAL; - goto out; - } + if (!p) + goto out1; memcpy(ea_p, p, size); } memset(Add2Ptr(ea_p, size), 0, add_bytes); + + /* Check all attributes for consistency. */ + for (off = 0; off < size; off += ea_size) { + const struct EA_FULL *ef = Add2Ptr(ea_p, off); + u32 bytes = size - off; + + /* Check if we can use field ea->size. */ + if (bytes < sizeof(ef->size)) + goto out1; + + if (ef->size) { + ea_size = le32_to_cpu(ef->size); + if (ea_size > bytes) + goto out1; + continue; + } + + /* Check if we can use fields ef->name_len and ef->elength. */ + if (bytes < offsetof(struct EA_FULL, name)) + goto out1; + + ea_size = ALIGN(struct_size(ef, name, + 1 + ef->name_len + + le16_to_cpu(ef->elength)), + 4); + if (ea_size > bytes) + goto out1; + } + *ea = ea_p; return 0; -out: +out1: kfree(ea_p); - *ea = NULL; +out: + ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); return err; } @@ -163,6 +196,7 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, const struct EA_FULL *ea; u32 off, size; int err; + int ea_size; size_t ret; err = ntfs_read_ea(ni, &ea_all, 0, &info); @@ -175,8 +209,9 @@ static ssize_t ntfs_list_ea(struct ntfs_inode *ni, char *buffer, size = le32_to_cpu(info->size); /* Enumerate all xattrs. */ - for (ret = 0, off = 0; off < size; off += unpacked_ea_size(ea)) { + for (ret = 0, off = 0; off < size; off += ea_size) { ea = Add2Ptr(ea_all, off); + ea_size = unpacked_ea_size(ea); if (!ea->name_len) break; @@ -230,7 +265,8 @@ static int ntfs_get_ea(struct inode *inode, const char *name, size_t name_len, goto out; /* Enumerate all xattrs. */ - if (!find_ea(ea_all, le32_to_cpu(info->size), name, name_len, &off)) { + if (!find_ea(ea_all, le32_to_cpu(info->size), name, name_len, &off, + NULL)) { err = -ENODATA; goto out; } @@ -272,7 +308,7 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name, struct EA_FULL *new_ea; struct EA_FULL *ea_all = NULL; size_t add, new_pack; - u32 off, size; + u32 off, size, ea_sz; __le16 size_pack; struct ATTRIB *attr; struct ATTR_LIST_ENTRY *le; @@ -307,9 +343,8 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name, size_pack = ea_info.size_pack; } - if (info && find_ea(ea_all, size, name, name_len, &off)) { + if (info && find_ea(ea_all, size, name, name_len, &off, &ea_sz)) { struct EA_FULL *ea; - size_t ea_sz; if (flags & XATTR_CREATE) { err = -EEXIST; @@ -332,8 +367,6 @@ static noinline int ntfs_set_ea(struct inode *inode, const char *name, if (ea->flags & FILE_NEED_EA) le16_add_cpu(&ea_info.count, -1); - ea_sz = unpacked_ea_size(ea); - le16_add_cpu(&ea_info.size_pack, 0 - packed_ea_size(ea)); memmove(ea, Add2Ptr(ea, ea_sz), size - off - ea_sz); From 06b3f0bf418a7b6c266a589ef0516e6a99bc4fab Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 17 Jul 2023 11:09:03 +0800 Subject: [PATCH 065/224] ovl: let helper ovl_i_path_real() return the realinode [ Upstream commit b2dd05f107b11966e26fe52a313b418364cf497b ] Let helper ovl_i_path_real() return the realinode to prepare for checking non-null realinode in RCU walking path. [msz] Use d_inode_rcu() since we are depending on the consitency between dentry and inode being non-NULL in an RCU setting. There are some changes from upstream commit: 1. Context conflicts caused by 73db6a063c785bc ("ovl: port to vfs{g,u}id_t and associated helpers") is handled. Signed-off-by: Zhihao Cheng Signed-off-by: Amir Goldstein Fixes: ffa5723c6d25 ("ovl: store lower path in ovl_inode") Cc: # v5.19 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/overlayfs.h | 2 +- fs/overlayfs/util.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 052226aa7de0..a3c59ac015ee 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -369,7 +369,7 @@ enum ovl_path_type ovl_path_type(struct dentry *dentry); void ovl_path_upper(struct dentry *dentry, struct path *path); void ovl_path_lower(struct dentry *dentry, struct path *path); void ovl_path_lowerdata(struct dentry *dentry, struct path *path); -void ovl_i_path_real(struct inode *inode, struct path *path); +struct inode *ovl_i_path_real(struct inode *inode, struct path *path); enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); enum ovl_path_type ovl_path_realdata(struct dentry *dentry, struct path *path); struct dentry *ovl_dentry_upper(struct dentry *dentry); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 850e8d1bf829..0d8f96168e6c 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -266,7 +266,7 @@ struct dentry *ovl_i_dentry_upper(struct inode *inode) return ovl_upperdentry_dereference(OVL_I(inode)); } -void ovl_i_path_real(struct inode *inode, struct path *path) +struct inode *ovl_i_path_real(struct inode *inode, struct path *path) { path->dentry = ovl_i_dentry_upper(inode); if (!path->dentry) { @@ -275,6 +275,8 @@ void ovl_i_path_real(struct inode *inode, struct path *path) } else { path->mnt = ovl_upper_mnt(OVL_FS(inode->i_sb)); } + + return path->dentry ? d_inode_rcu(path->dentry) : NULL; } struct inode *ovl_inode_upper(struct inode *inode) @@ -1121,8 +1123,7 @@ void ovl_copyattr(struct inode *inode) struct inode *realinode; struct user_namespace *real_mnt_userns; - ovl_i_path_real(inode, &realpath); - realinode = d_inode(realpath.dentry); + realinode = ovl_i_path_real(inode, &realpath); real_mnt_userns = mnt_user_ns(realpath.mnt); inode->i_uid = i_uid_into_mnt(real_mnt_userns, realinode); From c4a5fb1ae5d3f02d3227afde2b9339994389463d Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Mon, 17 Jul 2023 11:09:04 +0800 Subject: [PATCH 066/224] ovl: fix null pointer dereference in ovl_get_acl_rcu() [ Upstream commit f4e19e595cc2e76a8a58413eb19d3d9c51328b53 ] Following process: P1 P2 path_openat link_path_walk may_lookup inode_permission(rcu) ovl_permission acl_permission_check check_acl get_cached_acl_rcu ovl_get_inode_acl realinode = ovl_inode_real(ovl_inode) drop_cache __dentry_kill(ovl_dentry) iput(ovl_inode) ovl_destroy_inode(ovl_inode) dput(oi->__upperdentry) dentry_kill(upperdentry) dentry_unlink_inode upperdentry->d_inode = NULL ovl_inode_upper upperdentry = ovl_i_dentry_upper(ovl_inode) d_inode(upperdentry) // returns NULL IS_POSIXACL(realinode) // NULL pointer dereference , will trigger an null pointer dereference at realinode: [ 205.472797] BUG: kernel NULL pointer dereference, address: 0000000000000028 [ 205.476701] CPU: 2 PID: 2713 Comm: ls Not tainted 6.3.0-12064-g2edfa098e750-dirty #1216 [ 205.478754] RIP: 0010:do_ovl_get_acl+0x5d/0x300 [ 205.489584] Call Trace: [ 205.489812] [ 205.490014] ovl_get_inode_acl+0x26/0x30 [ 205.490466] get_cached_acl_rcu+0x61/0xa0 [ 205.490908] generic_permission+0x1bf/0x4e0 [ 205.491447] ovl_permission+0x79/0x1b0 [ 205.491917] inode_permission+0x15e/0x2c0 [ 205.492425] link_path_walk+0x115/0x550 [ 205.493311] path_lookupat.isra.0+0xb2/0x200 [ 205.493803] filename_lookup+0xda/0x240 [ 205.495747] vfs_fstatat+0x7b/0xb0 Fetch a reproducer in [Link]. Use the helper ovl_i_path_realinode() to get realinode and then do non-nullptr checking. There are some changes from upstream commit: 1. Corrusponds to do_ovl_get_acl() in 6.1 is ovl_get_acl() 2. Context conflicts caused by 6c0a8bfb84af8f3 ("ovl: implement get acl method") is handled. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217404 Fixes: 332f606b32b6 ("ovl: enable RCU'd ->get_acl()") Cc: # v5.15 Signed-off-by: Zhihao Cheng Suggested-by: Christian Brauner Suggested-by: Amir Goldstein Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/overlayfs/inode.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 9e61511de7a7..677649b34965 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -497,20 +497,20 @@ static void ovl_idmap_posix_acl(struct inode *realinode, */ struct posix_acl *ovl_get_acl(struct inode *inode, int type, bool rcu) { - struct inode *realinode = ovl_inode_real(inode); + struct inode *realinode; struct posix_acl *acl, *clone; struct path realpath; - if (!IS_POSIXACL(realinode)) - return NULL; - /* Careful in RCU walk mode */ - ovl_i_path_real(inode, &realpath); - if (!realpath.dentry) { + realinode = ovl_i_path_real(inode, &realpath); + if (!realinode) { WARN_ON(!rcu); return ERR_PTR(-ECHILD); } + if (!IS_POSIXACL(realinode)) + return NULL; + if (rcu) { acl = get_cached_acl_rcu(realinode, type); } else { From 40e2ed0e562a4f040174eb77678fec61ef7b4e7c Mon Sep 17 00:00:00 2001 From: Winston Wen Date: Mon, 26 Jun 2023 11:42:57 +0800 Subject: [PATCH 067/224] cifs: fix session state check in smb2_find_smb_ses commit 66be5c48ee1b5b8c919cc329fe6d32e16badaa40 upstream. Chech the session state and skip it if it's exiting. Signed-off-by: Winston Wen Reviewed-by: Shyam Prasad N Cc: stable@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2transport.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 790acf65a092..22954a9c7a6c 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -153,7 +153,14 @@ smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { if (ses->Suid != ses_id) continue; + + spin_lock(&ses->ses_lock); + if (ses->ses_status == SES_EXITING) { + spin_unlock(&ses->ses_lock); + continue; + } ++ses->ses_count; + spin_unlock(&ses->ses_lock); return ses; } From 7c880188c71066449a76de71de772198a0c30a7c Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 10 Jul 2023 11:10:17 +0200 Subject: [PATCH 068/224] drm/client: Send hotplug event after registering a client MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 27655b9bb9f0d9c32b8de8bec649b676898c52d5 upstream. Generate a hotplug event after registering a client to allow the client to configure its display. Remove the hotplug calls from the existing clients for fbdev emulation. This change fixes a concurrency bug between registering a client and receiving events from the DRM core. The bug is present in the fbdev emulation of all drivers. The fbdev emulation currently generates a hotplug event before registering the client to the device. For each new output, the DRM core sends an additional hotplug event to each registered client. If the DRM core detects first output between sending the artificial hotplug and registering the device, the output's hotplug event gets lost. If this is the first output, the fbdev console display remains dark. This has been observed with amdgpu and fbdev-generic. Fix this by adding hotplug generation directly to the client's register helper drm_client_register(). Registering the client and receiving events are serialized by struct drm_device.clientlist_mutex. So an output is either configured by the initial hotplug event, or the client has already been registered. The bug was originally added in commit 6e3f17ee73f7 ("drm/fb-helper: generic: Call drm_client_add() after setup is done"), in which adding a client and receiving a hotplug event switched order. It was hidden, as most hardware and drivers have at least on static output configured. Other drivers didn't use the internal DRM client or still had struct drm_mode_config_funcs.output_poll_changed set. That callback handled hotplug events as well. After not setting the callback in amdgpu in commit 0e3172bac3f4 ("drm/amdgpu: Don't set struct drm_driver.output_poll_changed"), amdgpu did not show a framebuffer console if output events got lost. The bug got copy-pasted from fbdev-generic into the other fbdev emulation. Reported-by: Moritz Duge Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2649 Fixes: 6e3f17ee73f7 ("drm/fb-helper: generic: Call drm_client_add() after setup is done") Fixes: 8ab59da26bc0 ("drm/fb-helper: Move generic fbdev emulation into separate source file") Fixes: b79fe9abd58b ("drm/fbdev-dma: Implement fbdev emulation for GEM DMA helpers") Fixes: 63c381552f69 ("drm/armada: Implement fbdev emulation as in-kernel client") Fixes: 49953b70e7d3 ("drm/exynos: Implement fbdev emulation as in-kernel client") Fixes: 8f1aaccb04b7 ("drm/gma500: Implement client-based fbdev emulation") Fixes: 940b869c2f2f ("drm/msm: Implement fbdev emulation as in-kernel client") Fixes: 9e69bcd88e45 ("drm/omapdrm: Implement fbdev emulation as in-kernel client") Fixes: e317a69fe891 ("drm/radeon: Implement client-based fbdev emulation") Fixes: 71ec16f45ef8 ("drm/tegra: Implement fbdev emulation as in-kernel client") Fixes: 0e3172bac3f4 ("drm/amdgpu: Don't set struct drm_driver.output_poll_changed") Signed-off-by: Thomas Zimmermann Tested-by: Moritz Duge Tested-by: Torsten Krah Tested-by: Paul Schyska Cc: Daniel Vetter Cc: David Airlie Cc: Noralf Trønnes Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Javier Martinez Canillas Cc: Russell King Cc: Inki Dae Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: Krzysztof Kozlowski Cc: Patrik Jakobsson Cc: Rob Clark Cc: Abhinav Kumar Cc: Dmitry Baryshkov Cc: Tomi Valkeinen Cc: Alex Deucher Cc: "Christian König" Cc: "Pan, Xinhui" Cc: Thierry Reding Cc: Mikko Perttunen Cc: dri-devel@lists.freedesktop.org Cc: linux-kernel@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-samsung-soc@vger.kernel.org Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: amd-gfx@lists.freedesktop.org Cc: linux-tegra@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: # v5.2+ Reviewed-by: Javier Martinez Canillas Reviewed-by: Dmitry Baryshkov # msm Link: https://patchwork.freedesktop.org/patch/msgid/20230710091029.27503-1-tzimmermann@suse.de (cherry picked from commit 27655b9bb9f0d9c32b8de8bec649b676898c52d5) [ Dropped changes to drivers/gpu/drm/armada/armada_fbdev.c as 174c3c38e3a2 drm/armada: Initialize fbdev DRM client was introduced in 6.5-rc1. Dropped changes to exynos, msm, omapdrm, radeon, tegra drivers as missing code these commits introduced: 99286486d674 drm/exynos: Initialize fbdev DRM client 841ef552b141 drm/msm: Initialize fbdev DRM client 9e69bcd88e45 drm/omapdrm: Implement fbdev emulation as in-kernel client e317a69fe891 drm/radeon: Implement client-based fbdev emulation 9b926bcf2636 drm/radeon: Only build fbdev if DRM_FBDEV_EMULATION is set 25dda38e0b07 drm/tegra: Initialize fbdev DRM client 8f1aaccb04b7 drm/gma500: Implement client-based fbdev emulation b79fe9abd58b drm/fbdev-dma: Implement fbdev emulation for GEM DMA helpers Move code for drm-fbdev-generic.c to matching file in 6.1.y because these commits haven't happened in 6.1.y. 8ab59da26bc0 drm/fb-helper: Move generic fbdev emulation into separate source file b9c93f4ec737 drm/fbdev-generic: Rename symbols ] Cc: alexandru.gagniuc@hp.com Link: https://lore.kernel.org/stable/SJ0PR84MB20882EEA1ABB36F60E845E378F5AA@SJ0PR84MB2088.NAMPRD84.PROD.OUTLOOK.COM/ Signed-off-by: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_client.c | 21 +++++++++++++++++++++ drivers/gpu/drm/drm_fb_helper.c | 4 ---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 2b230b4d6942..dcbeeb68ca64 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -122,13 +122,34 @@ EXPORT_SYMBOL(drm_client_init); * drm_client_register() it is no longer permissible to call drm_client_release() * directly (outside the unregister callback), instead cleanup will happen * automatically on driver unload. + * + * Registering a client generates a hotplug event that allows the client + * to set up its display from pre-existing outputs. The client must have + * initialized its state to able to handle the hotplug event successfully. */ void drm_client_register(struct drm_client_dev *client) { struct drm_device *dev = client->dev; + int ret; mutex_lock(&dev->clientlist_mutex); list_add(&client->list, &dev->clientlist); + + if (client->funcs && client->funcs->hotplug) { + /* + * Perform an initial hotplug event to pick up the + * display configuration for the client. This step + * has to be performed *after* registering the client + * in the list of clients, or a concurrent hotplug + * event might be lost; leaving the display off. + * + * Hold the clientlist_mutex as for a regular hotplug + * event. + */ + ret = client->funcs->hotplug(client); + if (ret) + drm_dbg_kms(dev, "client hotplug ret=%d\n", ret); + } mutex_unlock(&dev->clientlist_mutex); } EXPORT_SYMBOL(drm_client_register); diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 76e46713b2f0..442746d9777a 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2634,10 +2634,6 @@ void drm_fbdev_generic_setup(struct drm_device *dev, preferred_bpp = 32; fb_helper->preferred_bpp = preferred_bpp; - ret = drm_fbdev_client_hotplug(&fb_helper->client); - if (ret) - drm_dbg_kms(dev, "client hotplug ret=%d\n", ret); - drm_client_register(&fb_helper->client); } EXPORT_SYMBOL(drm_fbdev_generic_setup); From 4596c812916a582e16aedfb243aaee8d010c6220 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 7 Jun 2023 12:14:00 -0400 Subject: [PATCH 069/224] drm/amdgpu/sdma4: set align mask to 255 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e5df16d9428f5c6d2d0b1eff244d6c330ba9ef3a upstream. The wptr needs to be incremented at at least 64 dword intervals, use 256 to align with windows. This should fix potential hangs with unaligned updates. Reviewed-by: Felix Kuehling Reviewed-by: Aaron Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher (cherry picked from commit e5df16d9428f5c6d2d0b1eff244d6c330ba9ef3a) The path `drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c` doesn't exist in 6.1.y, only modify the file that does exist. Signed-off-by: Mario Limonciello Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 5b251d009467..97b033dfe9e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2330,7 +2330,7 @@ const struct amd_ip_funcs sdma_v4_0_ip_funcs = { static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, + .align_mask = 0xff, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, @@ -2400,7 +2400,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { .type = AMDGPU_RING_TYPE_SDMA, - .align_mask = 0xf, + .align_mask = 0xff, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, .secure_submission_supported = true, From c8c703befd2fb2ebbcc9cedbdc98953b52453a35 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Thu, 15 Jun 2023 10:56:55 +0800 Subject: [PATCH 070/224] drm/amd/pm: revise the ASPM settings for thunderbolt attached scenario commit fd21987274463a439c074b8f3c93d3b132e4c031 upstream. Also, correct the comment for NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT as 0x0000000E stands for 400ms instead of 4ms. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index aa761ff3a5fa..7ba47fc1917b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -346,7 +346,7 @@ static void nbio_v2_3_init_registers(struct amdgpu_device *adev) #define NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT 0x00000000 // off by default, no gains over L1 #define NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT 0x00000009 // 1=1us, 9=1ms -#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x0000000E // 4ms +#define NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT 0x0000000E // 400ms static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, bool enable) @@ -479,9 +479,12 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) WREG32_SOC15(NBIO, 0, mmRCC_BIF_STRAP5, data); def = data = RREG32_PCIE(smnPCIE_LC_CNTL); - data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; - data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; - data |= 0x1 << PCIE_LC_CNTL__LC_PMI_TO_L1_DIS__SHIFT; + data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + if (pci_is_thunderbolt_attached(adev->pdev)) + data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + else + data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; if (def != data) WREG32_PCIE(smnPCIE_LC_CNTL, data); From d7d53c669da90181827e42583582192646a4d933 Mon Sep 17 00:00:00 2001 From: lyndonli Date: Mon, 21 Nov 2022 09:10:20 +0800 Subject: [PATCH 071/224] drm/amdgpu: add the fan abnormal detection feature commit ef5fca9f7294509ee5013af9e879edc5837c1d6c upstream. Update the SW CTF limit from existing register when there's a fan failure detected via SMU interrupt. Signed-off-by: lyndonli Reviewed-by: Hawking Zhang Reviewed-by: Kenneth Feng Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 1 + .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 28 +++++++++++++++++++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 3 files changed, 30 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 44bbf17e4bef..3bc4128a22ac 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -168,6 +168,7 @@ struct smu_temperature_range { int mem_crit_max; int mem_emergency_max; int software_shutdown_temp; + int software_shutdown_temp_offset; }; struct smu_state_validation_block { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 5143b4df2cc1..d1f5bd225fed 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1381,6 +1381,7 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, */ uint32_t ctxid = entry->src_data[0]; uint32_t data; + uint32_t high; if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { @@ -1437,6 +1438,33 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, schedule_work(&smu->throttling_logging_work); break; + case 0x8: + high = smu->thermal_range.software_shutdown_temp + + smu->thermal_range.software_shutdown_temp_offset; + high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, high); + dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n", + high, + smu->thermal_range.software_shutdown_temp_offset); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; + case 0x9: + high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, + smu->thermal_range.software_shutdown_temp); + dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high); + + data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); + data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL, + DIG_THERM_INTH, + (high & 0xff)); + data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK); + WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); + break; } } } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index bd61518bb7b1..084597d0ae4e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1288,6 +1288,7 @@ static int smu_v13_0_7_get_thermal_temperature_range(struct smu_context *smu, range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)* SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; range->software_shutdown_temp = powerplay_table->software_shutdown_temp; + range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset; return 0; } From e8b6b7b8132500ecb241f7f685398028017ec0d3 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Mon, 21 Nov 2022 12:18:36 -0500 Subject: [PATCH 072/224] drm/amdgpu: Fix minmax warning commit abd51738fe754a684ec44b7a9eca1981e1704ad9 upstream. Fix minmax warning by using min_t() macro and explicitly specifying the assignment type. Cc: Alex Deucher Signed-off-by: Luben Tuikov Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index d1f5bd225fed..cd627ee8bcea 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1441,7 +1441,9 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, case 0x8: high = smu->thermal_range.software_shutdown_temp + smu->thermal_range.software_shutdown_temp_offset; - high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, high); + high = min_t(typeof(high), + SMU_THERMAL_MAXIMUM_ALERT_TEMP, + high); dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n", high, smu->thermal_range.software_shutdown_temp_offset); @@ -1454,8 +1456,9 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev, WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data); break; case 0x9: - high = min(SMU_THERMAL_MAXIMUM_ALERT_TEMP, - smu->thermal_range.software_shutdown_temp); + high = min_t(typeof(high), + SMU_THERMAL_MAXIMUM_ALERT_TEMP, + smu->thermal_range.software_shutdown_temp); dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high); data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL); From 13e8af958cfa97d93d9824b863208c0bb0977361 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 20 Jun 2023 11:41:40 +0800 Subject: [PATCH 073/224] drm/amd/pm: add abnormal fan detection for smu 13.0.0 commit 2da0036ea99bccb27f7fe3cf2aa2900860e9be46 upstream. add abnormal fan detection for smu 13.0.0 Signed-off-by: Kenneth Feng Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 54fc42dad775..e5e03455622a 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1281,6 +1281,7 @@ static int smu_v13_0_0_get_thermal_temperature_range(struct smu_context *smu, range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)* SMU_TEMPERATURE_UNITS_PER_CENTIGRADES; range->software_shutdown_temp = powerplay_table->software_shutdown_temp; + range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset; return 0; } From 2cb10f4e6ccade106b7d9341c160f1e71660ae1e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 May 2023 12:16:54 -0700 Subject: [PATCH 074/224] f2fs: fix the wrong condition to determine atomic context commit 633c8b9409f564ce4b7f7944c595ffac27ed1ff4 upstream. Should use !in_task for irq context. Cc: stable@vger.kernel.org Fixes: 1aa161e43106 ("f2fs: fix scheduling while atomic in decompression path") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index e50d5848c100..fb75ff7b3448 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -764,7 +764,7 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task) ret = -EFSCORRUPTED; /* Avoid f2fs_commit_super in irq context */ - if (in_task) + if (!in_task) f2fs_save_errors(sbi, ERROR_FAIL_DECOMPRESSION); else f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION); From a996fec74c3884c309a238f2e1521f91771f095f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 28 Jun 2023 01:00:56 -0700 Subject: [PATCH 075/224] f2fs: fix deadlock in i_xattr_sem and inode page lock commit 5eda1ad1aaffdfebdecf7a164e586060a210f74f upstream. Thread #1: [122554.641906][ T92] f2fs_getxattr+0xd4/0x5fc -> waiting for f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); [122554.641927][ T92] __f2fs_get_acl+0x50/0x284 [122554.641948][ T92] f2fs_init_acl+0x84/0x54c [122554.641969][ T92] f2fs_init_inode_metadata+0x460/0x5f0 [122554.641990][ T92] f2fs_add_inline_entry+0x11c/0x350 -> Locked dir->inode_page by f2fs_get_node_page() [122554.642009][ T92] f2fs_do_add_link+0x100/0x1e4 [122554.642025][ T92] f2fs_create+0xf4/0x22c [122554.642047][ T92] vfs_create+0x130/0x1f4 Thread #2: [123996.386358][ T92] __get_node_page+0x8c/0x504 -> waiting for dir->inode_page lock [123996.386383][ T92] read_all_xattrs+0x11c/0x1f4 [123996.386405][ T92] __f2fs_setxattr+0xcc/0x528 [123996.386424][ T92] f2fs_setxattr+0x158/0x1f4 -> f2fs_down_write(&F2FS_I(inode)->i_xattr_sem); [123996.386443][ T92] __f2fs_set_acl+0x328/0x430 [123996.386618][ T92] f2fs_set_acl+0x38/0x50 [123996.386642][ T92] posix_acl_chmod+0xc8/0x1c8 [123996.386669][ T92] f2fs_setattr+0x5e0/0x6bc [123996.386689][ T92] notify_change+0x4d8/0x580 [123996.386717][ T92] chmod_common+0xd8/0x184 [123996.386748][ T92] do_fchmodat+0x60/0x124 [123996.386766][ T92] __arm64_sys_fchmodat+0x28/0x3c Cc: Fixes: 27161f13e3c3 "f2fs: avoid race in between read xattr & write xattr" Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Greg Kroah-Hartman --- fs/f2fs/dir.c | 9 ++++++++- fs/f2fs/xattr.c | 6 ++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 21960a899b6a..bf5ba75b75d2 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -806,8 +806,15 @@ int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname, { int err = -EAGAIN; - if (f2fs_has_inline_dentry(dir)) + if (f2fs_has_inline_dentry(dir)) { + /* + * Should get i_xattr_sem to keep the lock order: + * i_xattr_sem -> inode_page lock used by f2fs_setxattr. + */ + f2fs_down_read(&F2FS_I(dir)->i_xattr_sem); err = f2fs_add_inline_entry(dir, fname, inode, ino, mode); + f2fs_up_read(&F2FS_I(dir)->i_xattr_sem); + } if (err == -EAGAIN) err = f2fs_add_regular_entry(dir, fname, inode, ino, mode); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index dc2e8637189e..db3b641f2158 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -527,10 +527,12 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); + if (!ipage) + f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, &entry, &base_addr, &base_size, &is_inline); - f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); + if (!ipage) + f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; From 03590f9be9b94b701159fef39f04956890c56049 Mon Sep 17 00:00:00 2001 From: Basavaraj Natikar Date: Thu, 8 Dec 2022 15:07:04 +0530 Subject: [PATCH 076/224] pinctrl: amd: Add Z-state wake control bits commit df72b4a692b60d3e5d99d9ef662b2d03c44bb9c0 upstream. GPIO registers include Bit 27 for WakeCntrlZ used to enable wake in Z state. Hence add Z-state wake control bits to debugfs output to debug and analyze Z-states problems. Signed-off-by: Basavaraj Natikar Suggested-by: Mario Limonciello Tested-by: Guruvendra Punugupati Link: https://lore.kernel.org/r/20221208093704.1151928-1-Basavaraj.Natikar@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 7 +++++++ drivers/pinctrl/pinctrl-amd.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 32c3edaf9038..9236a132c7ba 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -218,6 +218,7 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) char *orientation; char debounce_value[40]; char *debounce_enable; + char *wake_cntrlz; for (bank = 0; bank < gpio_dev->hwbank_num; bank++) { unsigned int time = 0; @@ -305,6 +306,12 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) wake_cntrl2 = " ∅"; seq_printf(s, "S4/S5 %s| ", wake_cntrl2); + if (pin_reg & BIT(WAKECNTRL_Z_OFF)) + wake_cntrlz = "⏰"; + else + wake_cntrlz = " ∅"; + seq_printf(s, "Z %s| ", wake_cntrlz); + if (pin_reg & BIT(PULL_UP_ENABLE_OFF)) { pull_up_enable = "+"; if (pin_reg & BIT(PULL_UP_SEL_OFF)) diff --git a/drivers/pinctrl/pinctrl-amd.h b/drivers/pinctrl/pinctrl-amd.h index c8635998465d..81ae8319a1f0 100644 --- a/drivers/pinctrl/pinctrl-amd.h +++ b/drivers/pinctrl/pinctrl-amd.h @@ -42,6 +42,7 @@ #define OUTPUT_ENABLE_OFF 23 #define SW_CNTRL_IN_OFF 24 #define SW_CNTRL_EN_OFF 25 +#define WAKECNTRL_Z_OFF 27 #define INTERRUPT_STS_OFF 28 #define WAKE_STS_OFF 29 From a56afed6d5a416a7595fd9e66edc96aafbea0c6a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 28 Mar 2023 12:42:30 -0500 Subject: [PATCH 077/224] pinctrl: amd: Adjust debugfs output commit 75358cf3319d5fed595946019deda5c2c26a203d upstream. More fields are to be added, so to keep the display from being too busy, adjust it. 1) Add a header to all columns 2) Except for interrupt, when fields have no data show empty 3) Remove otherwise blank whitespace Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230328174231.8924-2-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 76 ++++++++++++++--------------------- 1 file changed, 30 insertions(+), 46 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 9236a132c7ba..822f29440f15 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -206,15 +206,12 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) char *level_trig; char *active_level; - char *interrupt_enable; char *interrupt_mask; char *wake_cntrl0; char *wake_cntrl1; char *wake_cntrl2; char *pin_sts; char *pull_up_sel; - char *pull_up_enable; - char *pull_down_enable; char *orientation; char debounce_value[40]; char *debounce_enable; @@ -246,6 +243,7 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) continue; } seq_printf(s, "GPIO bank%d\n", bank); + seq_puts(s, "gpio\tint|active|trigger|S0i3| S3|S4/S5| Z|wake|pull| orient| debounce|reg\n"); for (; i < pin_num; i++) { seq_printf(s, "#%d\t", i); raw_spin_lock_irqsave(&gpio_dev->lock, flags); @@ -255,7 +253,6 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) if (pin_reg & BIT(INTERRUPT_ENABLE_OFF)) { u8 level = (pin_reg >> ACTIVE_LEVEL_OFF) & ACTIVE_LEVEL_MASK; - interrupt_enable = "+"; if (level == ACTIVE_LEVEL_HIGH) active_level = "↑"; @@ -272,65 +269,54 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) else level_trig = " edge"; - } else { - interrupt_enable = "∅"; - active_level = "∅"; - level_trig = " ∅"; - } + if (pin_reg & BIT(INTERRUPT_MASK_OFF)) + interrupt_mask = "😛"; + else + interrupt_mask = "😷"; - if (pin_reg & BIT(INTERRUPT_MASK_OFF)) - interrupt_mask = "😛"; - else - interrupt_mask = "😷"; - seq_printf(s, "int %s (%s)| active-%s| %s-⚡| ", - interrupt_enable, + seq_printf(s, "%s| %s| %s|", interrupt_mask, active_level, level_trig); + } else + seq_puts(s, " ∅| | |"); if (pin_reg & BIT(WAKE_CNTRL_OFF_S0I3)) wake_cntrl0 = "⏰"; else - wake_cntrl0 = " ∅"; - seq_printf(s, "S0i3 %s| ", wake_cntrl0); + wake_cntrl0 = " "; + seq_printf(s, " %s| ", wake_cntrl0); if (pin_reg & BIT(WAKE_CNTRL_OFF_S3)) wake_cntrl1 = "⏰"; else - wake_cntrl1 = " ∅"; - seq_printf(s, "S3 %s| ", wake_cntrl1); + wake_cntrl1 = " "; + seq_printf(s, "%s|", wake_cntrl1); if (pin_reg & BIT(WAKE_CNTRL_OFF_S4)) wake_cntrl2 = "⏰"; else - wake_cntrl2 = " ∅"; - seq_printf(s, "S4/S5 %s| ", wake_cntrl2); + wake_cntrl2 = " "; + seq_printf(s, " %s|", wake_cntrl2); if (pin_reg & BIT(WAKECNTRL_Z_OFF)) wake_cntrlz = "⏰"; else - wake_cntrlz = " ∅"; - seq_printf(s, "Z %s| ", wake_cntrlz); + wake_cntrlz = " "; + seq_printf(s, "%s|", wake_cntrlz); if (pin_reg & BIT(PULL_UP_ENABLE_OFF)) { - pull_up_enable = "+"; if (pin_reg & BIT(PULL_UP_SEL_OFF)) pull_up_sel = "8k"; else pull_up_sel = "4k"; - } else { - pull_up_enable = "∅"; - pull_up_sel = " "; + seq_printf(s, "%s ↑|", + pull_up_sel); + } else if (pin_reg & BIT(PULL_DOWN_ENABLE_OFF)) { + seq_puts(s, " ↓|"); + } else { + seq_puts(s, " |"); } - seq_printf(s, "pull-↑ %s (%s)| ", - pull_up_enable, - pull_up_sel); - - if (pin_reg & BIT(PULL_DOWN_ENABLE_OFF)) - pull_down_enable = "+"; - else - pull_down_enable = "∅"; - seq_printf(s, "pull-↓ %s| ", pull_down_enable); if (pin_reg & BIT(OUTPUT_ENABLE_OFF)) { pin_sts = "output"; @@ -345,7 +331,7 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) else orientation = "↓"; } - seq_printf(s, "%s %s| ", pin_sts, orientation); + seq_printf(s, "%s %s|", pin_sts, orientation); db_cntrl = (DB_CNTRl_MASK << DB_CNTRL_OFF) & pin_reg; if (db_cntrl) { @@ -364,19 +350,17 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) unit = 61; } if ((DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF) == db_cntrl) - debounce_enable = "b +"; + debounce_enable = "b"; else if ((DB_TYPE_PRESERVE_LOW_GLITCH << DB_CNTRL_OFF) == db_cntrl) - debounce_enable = "↓ +"; + debounce_enable = "↓"; else - debounce_enable = "↑ +"; - + debounce_enable = "↑"; + snprintf(debounce_value, sizeof(debounce_value), "%06u", time * unit); + seq_printf(s, "%s (🕑 %sus)|", debounce_enable, debounce_value); } else { - debounce_enable = " ∅"; - time = 0; + seq_puts(s, " |"); } - snprintf(debounce_value, sizeof(debounce_value), "%u", time * unit); - seq_printf(s, "debounce %s (🕑 %sus)| ", debounce_enable, debounce_value); - seq_printf(s, " 0x%x\n", pin_reg); + seq_printf(s, "0x%x\n", pin_reg); } } } From 4484ce0e4928d52bf15cb7b44c4965aebc55d7eb Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 28 Mar 2023 12:42:31 -0500 Subject: [PATCH 078/224] pinctrl: amd: Add fields for interrupt status and wake status commit 010f493d90ee1dbc32fa1ce51398f20d494c20c2 upstream. If the firmware has misconfigured a GPIO it may cause interrupt status or wake status bits to be set and not asserted. Add these to debug output to catch this case. Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230328174231.8924-3-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 822f29440f15..c250110f6775 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -211,6 +211,8 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) char *wake_cntrl1; char *wake_cntrl2; char *pin_sts; + char *interrupt_sts; + char *wake_sts; char *pull_up_sel; char *orientation; char debounce_value[40]; @@ -243,7 +245,7 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) continue; } seq_printf(s, "GPIO bank%d\n", bank); - seq_puts(s, "gpio\tint|active|trigger|S0i3| S3|S4/S5| Z|wake|pull| orient| debounce|reg\n"); + seq_puts(s, "gpio\t int|active|trigger|S0i3| S3|S4/S5| Z|wake|pull| orient| debounce|reg\n"); for (; i < pin_num; i++) { seq_printf(s, "#%d\t", i); raw_spin_lock_irqsave(&gpio_dev->lock, flags); @@ -274,12 +276,18 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) else interrupt_mask = "😷"; - seq_printf(s, "%s| %s| %s|", + if (pin_reg & BIT(INTERRUPT_STS_OFF)) + interrupt_sts = "🔥"; + else + interrupt_sts = " "; + + seq_printf(s, "%s %s| %s| %s|", + interrupt_sts, interrupt_mask, active_level, level_trig); } else - seq_puts(s, " ∅| | |"); + seq_puts(s, " ∅| | |"); if (pin_reg & BIT(WAKE_CNTRL_OFF_S0I3)) wake_cntrl0 = "⏰"; @@ -305,6 +313,12 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) wake_cntrlz = " "; seq_printf(s, "%s|", wake_cntrlz); + if (pin_reg & BIT(WAKE_STS_OFF)) + wake_sts = "🔥"; + else + wake_sts = " "; + seq_printf(s, " %s|", wake_sts); + if (pin_reg & BIT(PULL_UP_ENABLE_OFF)) { if (pin_reg & BIT(PULL_UP_SEL_OFF)) pull_up_sel = "8k"; From 8a2d8e17c731dee8c5e149df970d20215bf52776 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 21 Apr 2023 07:06:21 -0500 Subject: [PATCH 079/224] pinctrl: amd: Detect internal GPIO0 debounce handling commit 968ab9261627fa305307e3935ca1a32fcddd36cb upstream. commit 4e5a04be88fe ("pinctrl: amd: disable and mask interrupts on probe") had a mistake in loop iteration 63 that it would clear offset 0xFC instead of 0x100. Offset 0xFC is actually `WAKE_INT_MASTER_REG`. This was clearing bits 13 and 15 from the register which significantly changed the expected handling for some platforms for GPIO0. commit b26cd9325be4 ("pinctrl: amd: Disable and mask interrupts on resume") actually fixed this bug, but lead to regressions on Lenovo Z13 and some other systems. This is because there was no handling in the driver for bit 15 debounce behavior. Quoting a public BKDG: ``` EnWinBlueBtn. Read-write. Reset: 0. 0=GPIO0 detect debounced power button; Power button override is 4 seconds. 1=GPIO0 detect debounced power button in S3/S5/S0i3, and detect "pressed less than 2 seconds" and "pressed 2~10 seconds" in S0; Power button override is 10 seconds ``` Cross referencing the same master register in Windows it's obvious that Windows doesn't use debounce values in this configuration. So align the Linux driver to do this as well. This fixes wake on lid when WAKE_INT_MASTER_REG is properly programmed. Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=217315 Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230421120625.3366-2-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 7 +++++++ drivers/pinctrl/pinctrl-amd.h | 1 + 2 files changed, 8 insertions(+) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index c250110f6775..6b9ae92017d4 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -125,6 +125,12 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, struct amd_gpio *gpio_dev = gpiochip_get_data(gc); raw_spin_lock_irqsave(&gpio_dev->lock, flags); + + /* Use special handling for Pin0 debounce */ + pin_reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG); + if (pin_reg & INTERNAL_GPIO0_DEBOUNCE) + debounce = 0; + pin_reg = readl(gpio_dev->base + offset * 4); if (debounce) { @@ -219,6 +225,7 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) char *debounce_enable; char *wake_cntrlz; + seq_printf(s, "WAKE_INT_MASTER_REG: 0x%08x\n", readl(gpio_dev->base + WAKE_INT_MASTER_REG)); for (bank = 0; bank < gpio_dev->hwbank_num; bank++) { unsigned int time = 0; unsigned int unit = 0; diff --git a/drivers/pinctrl/pinctrl-amd.h b/drivers/pinctrl/pinctrl-amd.h index 81ae8319a1f0..1cf2d06bbd8c 100644 --- a/drivers/pinctrl/pinctrl-amd.h +++ b/drivers/pinctrl/pinctrl-amd.h @@ -17,6 +17,7 @@ #define AMD_GPIO_PINS_BANK3 32 #define WAKE_INT_MASTER_REG 0xfc +#define INTERNAL_GPIO0_DEBOUNCE (1 << 15) #define EOI_MASK (1 << 29) #define WAKE_INT_STATUS_REG0 0x2f8 From 15165187947cb48f75979d88608f3609fe2acb66 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 21 Apr 2023 07:06:22 -0500 Subject: [PATCH 080/224] pinctrl: amd: Fix mistake in handling clearing pins at startup commit a855724dc08b8cb0c13ab1e065a4922f1e5a7552 upstream. commit 4e5a04be88fe ("pinctrl: amd: disable and mask interrupts on probe") had a mistake in loop iteration 63 that it would clear offset 0xFC instead of 0x100. Offset 0xFC is actually `WAKE_INT_MASTER_REG`. This was clearing bits 13 and 15 from the register which significantly changed the expected handling for some platforms for GPIO0. Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=217315 Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230421120625.3366-3-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 6b9ae92017d4..24465010397b 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -897,9 +897,9 @@ static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) raw_spin_lock_irqsave(&gpio_dev->lock, flags); - pin_reg = readl(gpio_dev->base + i * 4); + pin_reg = readl(gpio_dev->base + pin * 4); pin_reg &= ~mask; - writel(pin_reg, gpio_dev->base + i * 4); + writel(pin_reg, gpio_dev->base + pin * 4); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); } From 1cd1a0151fdeab2683125ef98d313eaf7878b178 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kornel=20Dul=C4=99ba?= Date: Fri, 21 Apr 2023 07:06:23 -0500 Subject: [PATCH 081/224] pinctrl: amd: Detect and mask spurious interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0cf9e48ff22e15f3f0882991f33d23ccc5ae1d01 upstream. Leverage gpiochip_line_is_irq to check whether a pin has an irq associated with it. The previous check ("irq == 0") didn't make much sense. The irq variable refers to the pinctrl irq, and has nothing do to with an individual pin. On some systems, during suspend/resume cycle, the firmware leaves an interrupt enabled on a pin that is not used by the kernel. Without this patch that caused an interrupt storm. Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=217315 Signed-off-by: Kornel Dulęba Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20230421120625.3366-4-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 24465010397b..675c9826b78a 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -660,21 +660,21 @@ static bool do_amd_gpio_irq_handler(int irq, void *dev_id) * We must read the pin register again, in case the * value was changed while executing * generic_handle_domain_irq() above. - * If we didn't find a mapping for the interrupt, - * disable it in order to avoid a system hang caused - * by an interrupt storm. + * If the line is not an irq, disable it in order to + * avoid a system hang caused by an interrupt storm. */ raw_spin_lock_irqsave(&gpio_dev->lock, flags); regval = readl(regs + i); - if (irq == 0) { - regval &= ~BIT(INTERRUPT_ENABLE_OFF); + if (!gpiochip_line_is_irq(gc, irqnr + i)) { + regval &= ~BIT(INTERRUPT_MASK_OFF); dev_dbg(&gpio_dev->pdev->dev, "Disabling spurious GPIO IRQ %d\n", irqnr + i); + } else { + ret = true; } writel(regval, regs + i); raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); - ret = true; } } /* did not cause wake on resume context for shared IRQ */ From 57f6d48af46f0ffd23ef0dc463c8625d8855f657 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 21 Apr 2023 07:06:24 -0500 Subject: [PATCH 082/224] pinctrl: amd: Revert "pinctrl: amd: disable and mask interrupts on probe" commit 65f6c7c91cb2ebacbf155e0f881f81e79f90d138 upstream. commit 4e5a04be88fe ("pinctrl: amd: disable and mask interrupts on probe") was well intentioned to mask a firmware issue on a surface laptop, but it has a few problems: 1. It had a bug in the loop handling for iteration 63 that lead to other problems with GPIO0 handling. 2. It disables interrupts that are used internally by the SOC but masked by default. 3. It masked a real firmware problem in some chromebooks that should have been caught during development but wasn't. There has been a lot of other development around s2idle; particularly around handling of the spurious wakeups. If there is still a problem on the original reported surface laptop it should be avoided by adding a quirk to gpiolib-acpi for that system instead. Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230421120625.3366-5-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 31 ------------------------------- 1 file changed, 31 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 675c9826b78a..e9fef2391b38 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -877,34 +877,6 @@ static const struct pinconf_ops amd_pinconf_ops = { .pin_config_group_set = amd_pinconf_group_set, }; -static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) -{ - struct pinctrl_desc *desc = gpio_dev->pctrl->desc; - unsigned long flags; - u32 pin_reg, mask; - int i; - - mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | - BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) | - BIT(WAKE_CNTRL_OFF_S4); - - for (i = 0; i < desc->npins; i++) { - int pin = desc->pins[i].number; - const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); - - if (!pd) - continue; - - raw_spin_lock_irqsave(&gpio_dev->lock, flags); - - pin_reg = readl(gpio_dev->base + pin * 4); - pin_reg &= ~mask; - writel(pin_reg, gpio_dev->base + pin * 4); - - raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); - } -} - #ifdef CONFIG_PM_SLEEP static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin) { @@ -1142,9 +1114,6 @@ static int amd_gpio_probe(struct platform_device *pdev) return PTR_ERR(gpio_dev->pctrl); } - /* Disable and mask interrupts */ - amd_gpio_irq_init(gpio_dev); - girq = &gpio_dev->gc.irq; gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip); /* This will let us handle the parent IRQ in the driver */ From 3cadcab402e1e18a18b76ae3de557534a811c27c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 5 Jul 2023 08:30:02 -0500 Subject: [PATCH 083/224] pinctrl: amd: Only use special debounce behavior for GPIO 0 commit 0d5ace1a07f7e846d0f6d972af60d05515599d0b upstream. It's uncommon to use debounce on any other pin, but technically we should only set debounce to 0 when working off GPIO0. Cc: stable@vger.kernel.org Tested-by: Jan Visser Fixes: 968ab9261627 ("pinctrl: amd: Detect internal GPIO0 debounce handling") Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230705133005.577-2-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index e9fef2391b38..b3a3bb155dea 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -127,9 +127,11 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, raw_spin_lock_irqsave(&gpio_dev->lock, flags); /* Use special handling for Pin0 debounce */ - pin_reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG); - if (pin_reg & INTERNAL_GPIO0_DEBOUNCE) - debounce = 0; + if (offset == 0) { + pin_reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG); + if (pin_reg & INTERNAL_GPIO0_DEBOUNCE) + debounce = 0; + } pin_reg = readl(gpio_dev->base + offset * 4); From 326b3f17bee88f8f05a3371b95b82caaf2ce1cac Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 5 Jul 2023 08:30:03 -0500 Subject: [PATCH 084/224] pinctrl: amd: Use amd_pinconf_set() for all config options commit 635a750d958e158e17af0f524bedc484b27fbb93 upstream. On ASUS TUF A16 it is reported that the ITE5570 ACPI device connected to GPIO 7 is causing an interrupt storm. This issue doesn't happen on Windows. Comparing the GPIO register configuration between Windows and Linux bit 20 has been configured as a pull up on Windows, but not on Linux. Checking GPIO declaration from the firmware it is clear it *should* have been a pull up on Linux as well. ``` GpioInt (Level, ActiveLow, Exclusive, PullUp, 0x0000, "\\_SB.GPIO", 0x00, ResourceConsumer, ,) { // Pin list 0x0007 } ``` On Linux amd_gpio_set_config() is currently only used for programming the debounce. Actually the GPIO core calls it with all the arguments that are supported by a GPIO, pinctrl-amd just responds `-ENOTSUPP`. To solve this issue expand amd_gpio_set_config() to support the other arguments amd_pinconf_set() supports, namely `PIN_CONFIG_BIAS_PULL_DOWN`, `PIN_CONFIG_BIAS_PULL_UP`, and `PIN_CONFIG_DRIVE_STRENGTH`. Reported-by: Nik P Reported-by: Nathan Schulte Reported-by: Friedrich Vock Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217336 Reported-by: dridri85@gmail.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217493 Link: https://lore.kernel.org/linux-input/20230530154058.17594-1-friedrich.vock@gmx.de/ Tested-by: Jan Visser Fixes: 2956b5d94a76 ("pinctrl / gpio: Introduce .set_config() callback for GPIO chips") Signed-off-by: Mario Limonciello Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230705133005.577-3-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index b3a3bb155dea..300e735ca57c 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -188,18 +188,6 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, return ret; } -static int amd_gpio_set_config(struct gpio_chip *gc, unsigned offset, - unsigned long config) -{ - u32 debounce; - - if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE) - return -ENOTSUPP; - - debounce = pinconf_to_config_argument(config); - return amd_gpio_set_debounce(gc, offset, debounce); -} - #ifdef CONFIG_DEBUG_FS static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) { @@ -782,7 +770,7 @@ static int amd_pinconf_get(struct pinctrl_dev *pctldev, } static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, - unsigned long *configs, unsigned num_configs) + unsigned long *configs, unsigned int num_configs) { int i; u32 arg; @@ -872,6 +860,20 @@ static int amd_pinconf_group_set(struct pinctrl_dev *pctldev, return 0; } +static int amd_gpio_set_config(struct gpio_chip *gc, unsigned int pin, + unsigned long config) +{ + struct amd_gpio *gpio_dev = gpiochip_get_data(gc); + + if (pinconf_to_config_param(config) == PIN_CONFIG_INPUT_DEBOUNCE) { + u32 debounce = pinconf_to_config_argument(config); + + return amd_gpio_set_debounce(gc, pin, debounce); + } + + return amd_pinconf_set(gpio_dev->pctrl, pin, &config, 1); +} + static const struct pinconf_ops amd_pinconf_ops = { .pin_config_get = amd_pinconf_get, .pin_config_set = amd_pinconf_set, From dce19c966d4da491c820c576be2962e15424aaf3 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 5 Jul 2023 08:30:04 -0500 Subject: [PATCH 085/224] pinctrl: amd: Drop pull up select configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3f62312d04d4c68aace9cd06fc135e09573325f3 upstream. pinctrl-amd currently tries to program bit 19 of all GPIOs to select either a 4kΩ or 8hΩ pull up, but this isn't what bit 19 does. Bit 19 is marked as reserved, even in the latest platforms documentation. Drop this programming functionality. Tested-by: Jan Visser Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230705133005.577-4-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 16 ++++------------ drivers/pinctrl/pinctrl-amd.h | 1 - 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 300e735ca57c..89b813cfa31c 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -209,7 +209,6 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) char *pin_sts; char *interrupt_sts; char *wake_sts; - char *pull_up_sel; char *orientation; char debounce_value[40]; char *debounce_enable; @@ -317,14 +316,9 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc) seq_printf(s, " %s|", wake_sts); if (pin_reg & BIT(PULL_UP_ENABLE_OFF)) { - if (pin_reg & BIT(PULL_UP_SEL_OFF)) - pull_up_sel = "8k"; - else - pull_up_sel = "4k"; - seq_printf(s, "%s ↑|", - pull_up_sel); + seq_puts(s, " ↑ |"); } else if (pin_reg & BIT(PULL_DOWN_ENABLE_OFF)) { - seq_puts(s, " ↓|"); + seq_puts(s, " ↓ |"); } else { seq_puts(s, " |"); } @@ -751,7 +745,7 @@ static int amd_pinconf_get(struct pinctrl_dev *pctldev, break; case PIN_CONFIG_BIAS_PULL_UP: - arg = (pin_reg >> PULL_UP_SEL_OFF) & (BIT(0) | BIT(1)); + arg = (pin_reg >> PULL_UP_ENABLE_OFF) & BIT(0); break; case PIN_CONFIG_DRIVE_STRENGTH: @@ -798,10 +792,8 @@ static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, break; case PIN_CONFIG_BIAS_PULL_UP: - pin_reg &= ~BIT(PULL_UP_SEL_OFF); - pin_reg |= (arg & BIT(0)) << PULL_UP_SEL_OFF; pin_reg &= ~BIT(PULL_UP_ENABLE_OFF); - pin_reg |= ((arg>>1) & BIT(0)) << PULL_UP_ENABLE_OFF; + pin_reg |= (arg & BIT(0)) << PULL_UP_ENABLE_OFF; break; case PIN_CONFIG_DRIVE_STRENGTH: diff --git a/drivers/pinctrl/pinctrl-amd.h b/drivers/pinctrl/pinctrl-amd.h index 1cf2d06bbd8c..34c5c3e71fb2 100644 --- a/drivers/pinctrl/pinctrl-amd.h +++ b/drivers/pinctrl/pinctrl-amd.h @@ -36,7 +36,6 @@ #define WAKE_CNTRL_OFF_S4 15 #define PIN_STS_OFF 16 #define DRV_STRENGTH_SEL_OFF 17 -#define PULL_UP_SEL_OFF 19 #define PULL_UP_ENABLE_OFF 20 #define PULL_DOWN_ENABLE_OFF 21 #define OUTPUT_VALUE_OFF 22 From 6d8488509ebad2ef1e4bad2184bbdf2f3543b61d Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 5 Jul 2023 08:30:05 -0500 Subject: [PATCH 086/224] pinctrl: amd: Unify debounce handling into amd_pinconf_set() commit 283c5ce7da0a676f46539094d40067ad17c4f294 upstream. Debounce handling is done in two different entry points in the driver. Unify this to make sure that it's always handled the same. Tested-by: Jan Visser Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20230705133005.577-5-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 89b813cfa31c..a8df77e80549 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -115,16 +115,12 @@ static void amd_gpio_set_value(struct gpio_chip *gc, unsigned offset, int value) raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); } -static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, - unsigned debounce) +static int amd_gpio_set_debounce(struct amd_gpio *gpio_dev, unsigned int offset, + unsigned int debounce) { u32 time; u32 pin_reg; int ret = 0; - unsigned long flags; - struct amd_gpio *gpio_dev = gpiochip_get_data(gc); - - raw_spin_lock_irqsave(&gpio_dev->lock, flags); /* Use special handling for Pin0 debounce */ if (offset == 0) { @@ -183,7 +179,6 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset, pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF); } writel(pin_reg, gpio_dev->base + offset * 4); - raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); return ret; } @@ -782,9 +777,8 @@ static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, switch (param) { case PIN_CONFIG_INPUT_DEBOUNCE: - pin_reg &= ~DB_TMR_OUT_MASK; - pin_reg |= arg & DB_TMR_OUT_MASK; - break; + ret = amd_gpio_set_debounce(gpio_dev, pin, arg); + goto out_unlock; case PIN_CONFIG_BIAS_PULL_DOWN: pin_reg &= ~BIT(PULL_DOWN_ENABLE_OFF); @@ -811,6 +805,7 @@ static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin, writel(pin_reg, gpio_dev->base + pin*4); } +out_unlock: raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); return ret; @@ -857,12 +852,6 @@ static int amd_gpio_set_config(struct gpio_chip *gc, unsigned int pin, { struct amd_gpio *gpio_dev = gpiochip_get_data(gc); - if (pinconf_to_config_param(config) == PIN_CONFIG_INPUT_DEBOUNCE) { - u32 debounce = pinconf_to_config_argument(config); - - return amd_gpio_set_debounce(gc, pin, debounce); - } - return amd_pinconf_set(gpio_dev->pctrl, pin, &config, 1); } From 00283137001ff4752d49bb1093df4eae67c6f493 Mon Sep 17 00:00:00 2001 From: Valentin David Date: Mon, 10 Jul 2023 22:27:49 +0200 Subject: [PATCH 087/224] tpm: Do not remap from ACPI resources again for Pluton TPM commit b1c1b98962d17a922989aa3b2822946bbb5c091f upstream. For Pluton TPM devices, it was assumed that there was no ACPI memory regions. This is not true for ASUS ROG Ally. ACPI advertises 0xfd500000-0xfd5fffff. Since remapping is already done in `crb_map_pluton`, remapping again in `crb_map_io` causes EBUSY error: [ 3.510453] tpm_crb MSFT0101:00: can't request region for resource [mem 0xfd500000-0xfd5fffff] [ 3.510463] tpm_crb: probe of MSFT0101:00 failed with error -16 Cc: stable@vger.kernel.org # v6.3+ Fixes: 4d2732882703 ("tpm_crb: Add support for CRB devices based on Pluton") Signed-off-by: Valentin David Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_crb.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c index 916f4ff246c1..cbbedf52607c 100644 --- a/drivers/char/tpm/tpm_crb.c +++ b/drivers/char/tpm/tpm_crb.c @@ -563,15 +563,18 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv, u32 rsp_size; int ret; - INIT_LIST_HEAD(&acpi_resource_list); - ret = acpi_dev_get_resources(device, &acpi_resource_list, - crb_check_resource, iores_array); - if (ret < 0) - return ret; - acpi_dev_free_resource_list(&acpi_resource_list); - - /* Pluton doesn't appear to define ACPI memory regions */ + /* + * Pluton sometimes does not define ACPI memory regions. + * Mapping is then done in crb_map_pluton + */ if (priv->sm != ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) { + INIT_LIST_HEAD(&acpi_resource_list); + ret = acpi_dev_get_resources(device, &acpi_resource_list, + crb_check_resource, iores_array); + if (ret < 0) + return ret; + acpi_dev_free_resource_list(&acpi_resource_list); + if (resource_type(iores_array) != IORESOURCE_MEM) { dev_err(dev, FW_BUG "TPM2 ACPI table does not define a memory resource\n"); return -EINVAL; From 99b998fb9d7d2d2d9dbb3e19db2d0ade02f5a604 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 16 May 2023 01:25:54 +0300 Subject: [PATCH 088/224] tpm: tpm_vtpm_proxy: fix a race condition in /dev/vtpmx creation commit f4032d615f90970d6c3ac1d9c0bce3351eb4445c upstream. /dev/vtpmx is made visible before 'workqueue' is initialized, which can lead to a memory corruption in the worst case scenario. Address this by initializing 'workqueue' as the very first step of the driver initialization. Cc: stable@vger.kernel.org Fixes: 6f99612e2500 ("tpm: Proxy driver for supporting multiple emulated TPMs") Reviewed-by: Stefan Berger Signed-off-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_vtpm_proxy.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c index 5c865987ba5c..30e953988cab 100644 --- a/drivers/char/tpm/tpm_vtpm_proxy.c +++ b/drivers/char/tpm/tpm_vtpm_proxy.c @@ -683,37 +683,21 @@ static struct miscdevice vtpmx_miscdev = { .fops = &vtpmx_fops, }; -static int vtpmx_init(void) -{ - return misc_register(&vtpmx_miscdev); -} - -static void vtpmx_cleanup(void) -{ - misc_deregister(&vtpmx_miscdev); -} - static int __init vtpm_module_init(void) { int rc; - rc = vtpmx_init(); - if (rc) { - pr_err("couldn't create vtpmx device\n"); - return rc; - } - workqueue = create_workqueue("tpm-vtpm"); if (!workqueue) { pr_err("couldn't create workqueue\n"); - rc = -ENOMEM; - goto err_vtpmx_cleanup; + return -ENOMEM; } - return 0; - -err_vtpmx_cleanup: - vtpmx_cleanup(); + rc = misc_register(&vtpmx_miscdev); + if (rc) { + pr_err("couldn't create vtpmx device\n"); + destroy_workqueue(workqueue); + } return rc; } @@ -721,7 +705,7 @@ err_vtpmx_cleanup: static void __exit vtpm_module_exit(void) { destroy_workqueue(workqueue); - vtpmx_cleanup(); + misc_deregister(&vtpmx_miscdev); } module_init(vtpm_module_init); From f5a734a689e80c24671456ee3898dc0d68cfe51a Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Wed, 24 May 2023 17:40:39 +0200 Subject: [PATCH 089/224] tpm: tis_i2c: Limit read bursts to I2C_SMBUS_BLOCK_MAX (32) bytes commit f3b70b6e3390bfdf18fdd7d278a72a12784fdcce upstream. Underlying I2C bus drivers not always support longer transfers and imx-lpi2c for instance doesn't. SLB 9673 offers 427-bytes packets. Visible symptoms are: tpm tpm0: Error left over data tpm tpm0: tpm_transmit: tpm_recv: error -5 tpm_tis_i2c: probe of 1-002e failed with error -5 Cc: stable@vger.kernel.org # v5.20+ Fixes: bbc23a07b072 ("tpm: Add tpm_tis_i2c backend for tpm_tis_core") Tested-by: Michael Haener Signed-off-by: Alexander Sverdlin Reviewed-by: Jarkko Sakkinen Reviewed-by: Jerry Snitselaar Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_i2c.c | 35 ++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_i2c.c b/drivers/char/tpm/tpm_tis_i2c.c index f3a7251c8e38..8422af2abf88 100644 --- a/drivers/char/tpm/tpm_tis_i2c.c +++ b/drivers/char/tpm/tpm_tis_i2c.c @@ -189,21 +189,28 @@ static int tpm_tis_i2c_read_bytes(struct tpm_tis_data *data, u32 addr, u16 len, int ret; for (i = 0; i < TPM_RETRY; i++) { - /* write register */ - msg.len = sizeof(reg); - msg.buf = ® - msg.flags = 0; - ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg); - if (ret < 0) - return ret; + u16 read = 0; - /* read data */ - msg.buf = result; - msg.len = len; - msg.flags = I2C_M_RD; - ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg); - if (ret < 0) - return ret; + while (read < len) { + /* write register */ + msg.len = sizeof(reg); + msg.buf = ® + msg.flags = 0; + ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg); + if (ret < 0) + return ret; + + /* read data */ + msg.buf = result + read; + msg.len = len - read; + msg.flags = I2C_M_RD; + if (msg.len > I2C_SMBUS_BLOCK_MAX) + msg.len = I2C_SMBUS_BLOCK_MAX; + ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg); + if (ret < 0) + return ret; + read += msg.len; + } ret = tpm_tis_i2c_sanity_check_read(reg, len, result); if (ret == 0) From ad249709d2747c127b02d456f058990c0ad3ae6d Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Wed, 24 May 2023 17:40:40 +0200 Subject: [PATCH 090/224] tpm: tis_i2c: Limit write bursts to I2C_SMBUS_BLOCK_MAX (32) bytes commit 83e7e5d89f04d1c417492940f7922bc8416a8cc4 upstream. Underlying I2C bus drivers not always support longer transfers and imx-lpi2c for instance doesn't. The fix is symmetric to previous patch which fixed the read direction. Cc: stable@vger.kernel.org # v5.20+ Fixes: bbc23a07b072 ("tpm: Add tpm_tis_i2c backend for tpm_tis_core") Tested-by: Michael Haener Signed-off-by: Alexander Sverdlin Reviewed-by: Jarkko Sakkinen Reviewed-by: Jerry Snitselaar Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_i2c.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_i2c.c b/drivers/char/tpm/tpm_tis_i2c.c index 8422af2abf88..9586e0857a3e 100644 --- a/drivers/char/tpm/tpm_tis_i2c.c +++ b/drivers/char/tpm/tpm_tis_i2c.c @@ -230,19 +230,27 @@ static int tpm_tis_i2c_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len, struct i2c_msg msg = { .addr = phy->i2c_client->addr }; u8 reg = tpm_tis_i2c_address_to_register(addr); int ret; + u16 wrote = 0; if (len > TPM_BUFSIZE - 1) return -EIO; - /* write register and data in one go */ phy->io_buf[0] = reg; - memcpy(phy->io_buf + sizeof(reg), value, len); - - msg.len = sizeof(reg) + len; msg.buf = phy->io_buf; - ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg); - if (ret < 0) - return ret; + while (wrote < len) { + /* write register and data in one go */ + msg.len = sizeof(reg) + len - wrote; + if (msg.len > I2C_SMBUS_BLOCK_MAX) + msg.len = I2C_SMBUS_BLOCK_MAX; + + memcpy(phy->io_buf + sizeof(reg), value + wrote, + msg.len - sizeof(reg)); + + ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg); + if (ret < 0) + return ret; + wrote += msg.len - sizeof(reg); + } return 0; } From bb4e824d6b11276c54f525c91e293f692a873e2b Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Thu, 29 Jun 2023 13:41:47 -0700 Subject: [PATCH 091/224] tpm: return false from tpm_amd_is_rng_defective on non-x86 platforms commit ecff6813d2bcf0c670881a9ba3f51cb032dd405a upstream. tpm_amd_is_rng_defective is for dealing with an issue related to the AMD firmware TPM, so on non-x86 architectures just have it inline and return false. Cc: stable@vger.kernel.org # v6.3+ Reported-by: Sachin Sant Reported-by: Aneesh Kumar K. V Closes: https://lore.kernel.org/lkml/99B81401-DB46-49B9-B321-CF832B50CAC3@linux.ibm.com/ Fixes: f1324bbc4011 ("tpm: disable hwrng for fTPM on some AMD designs") Signed-off-by: Jerry Snitselaar Reviewed-by: Jarkko Sakkinen Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm-chip.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c index 95d847c9de79..d7ef4d0a7409 100644 --- a/drivers/char/tpm/tpm-chip.c +++ b/drivers/char/tpm/tpm-chip.c @@ -515,6 +515,7 @@ static int tpm_add_legacy_sysfs(struct tpm_chip *chip) * 6.x.y.z series: 6.0.18.6 + * 3.x.y.z series: 3.57.y.5 + */ +#ifdef CONFIG_X86 static bool tpm_amd_is_rng_defective(struct tpm_chip *chip) { u32 val1, val2; @@ -563,6 +564,12 @@ release: return true; } +#else +static inline bool tpm_amd_is_rng_defective(struct tpm_chip *chip) +{ + return false; +} +#endif /* CONFIG_X86 */ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait) { From de67dadd5cb3428d68cd10f86f71f08b3f71a11d Mon Sep 17 00:00:00 2001 From: Arseniy Krasnov Date: Thu, 15 Jun 2023 11:08:15 +0300 Subject: [PATCH 092/224] mtd: rawnand: meson: fix unaligned DMA buffers handling commit 98480a181a08ceeede417e5b28f6d0429d8ae156 upstream. Meson NAND controller requires 8 bytes alignment for DMA addresses, otherwise it "aligns" passed address by itself thus accessing invalid location in the provided buffer. This patch makes unaligned buffers to be reallocated to become valid. Fixes: 8fae856c5350 ("mtd: rawnand: meson: add support for Amlogic NAND flash controller") Cc: Signed-off-by: Arseniy Krasnov Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20230615080815.3291006-1-AVKrasnov@sberdevices.ru Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/raw/meson_nand.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 074e14225c06..029a2a302aa6 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -76,6 +76,7 @@ #define GENCMDIADDRH(aih, addr) ((aih) | (((addr) >> 16) & 0xffff)) #define DMA_DIR(dir) ((dir) ? NFC_CMD_N2M : NFC_CMD_M2N) +#define DMA_ADDR_ALIGN 8 #define ECC_CHECK_RETURN_FF (-1) @@ -842,6 +843,9 @@ static int meson_nfc_read_oob(struct nand_chip *nand, int page) static bool meson_nfc_is_buffer_dma_safe(const void *buffer) { + if ((uintptr_t)buffer % DMA_ADDR_ALIGN) + return false; + if (virt_addr_valid(buffer) && (!object_is_on_stack(buffer))) return true; return false; From eac0aac07f6af47b8ba57c8064bf04ed6df73d1d Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 22 Jun 2023 03:31:07 -0700 Subject: [PATCH 093/224] net: bcmgenet: Ensure MDIO unregistration has clocks enabled commit 1b5ea7ffb7a3bdfffb4b7f40ce0d20a3372ee405 upstream. With support for Ethernet PHY LEDs having been added, while unregistering a MDIO bus and its child device liks PHYs there may be "late" accesses to the MDIO bus. One typical use case is setting the PHY LEDs brightness to OFF for instance. We need to ensure that the MDIO bus controller remains entirely functional since it runs off the main GENET adapter clock. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230617155500.4005881-1-andrew@lunn.ch/ Fixes: 9a4e79697009 ("net: bcmgenet: utilize generic Broadcom UniMAC MDIO controller driver") Signed-off-by: Florian Fainelli Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230622103107.1760280-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmmii.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index bf9e246784b6..1fe8038587ac 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -664,5 +664,7 @@ void bcmgenet_mii_exit(struct net_device *dev) if (of_phy_is_fixed_link(dn)) of_phy_deregister_fixed_link(dn); of_node_put(priv->phy_dn); + clk_prepare_enable(priv->clk); platform_device_unregister(priv->mii_pdev); + clk_disable_unprepare(priv->clk); } From 6d806841f111d4ccb3a70789141b0979a5afac26 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 21 Jun 2023 06:38:48 +0200 Subject: [PATCH 094/224] net: phy: dp83td510: fix kernel stall during netboot in DP83TD510E PHY driver commit fc0649395dca81f2b3b02d9b248acb38cbcee55c upstream. Fix an issue where the kernel would stall during netboot, showing the "sched: RT throttling activated" message. This stall was triggered by the behavior of the mii_interrupt bit (Bit 7 - DP83TD510E_STS_MII_INT) in the DP83TD510E's PHY_STS Register (Address = 0x10). The DP83TD510E datasheet (2020) states that the bit clears on write, however, in practice, the bit clears on read. This discrepancy had significant implications on the driver's interrupt handling. The PHY_STS Register was used by handle_interrupt() to check for pending interrupts and by read_status() to get the current link status. The call to read_status() was unintentionally clearing the mii_interrupt status bit without deasserting the IRQ pin, causing handle_interrupt() to miss other pending interrupts. This issue was most apparent during netboot. The fix refrains from using the PHY_STS Register for interrupt handling. Instead, we now solely rely on the INTERRUPT_REG_1 Register (Address = 0x12) and INTERRUPT_REG_2 Register (Address = 0x13) for this purpose. These registers directly influence the IRQ pin state and are latched high until read. Note: The INTERRUPT_REG_2 Register (Address = 0x13) exists and can also be used for interrupt handling, specifically for "Aneg page received interrupt" and "Polarity change interrupt". However, these features are currently not supported by this driver. Fixes: 165cd04fe253 ("net: phy: dp83td510: Add support for the DP83TD510 Ethernet PHY") Cc: Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20230621043848.3806124-1-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/dp83td510.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/net/phy/dp83td510.c b/drivers/net/phy/dp83td510.c index 3cd9a77f9532..d7616b13c594 100644 --- a/drivers/net/phy/dp83td510.c +++ b/drivers/net/phy/dp83td510.c @@ -12,6 +12,11 @@ /* MDIO_MMD_VEND2 registers */ #define DP83TD510E_PHY_STS 0x10 +/* Bit 7 - mii_interrupt, active high. Clears on read. + * Note: Clearing does not necessarily deactivate IRQ pin if interrupts pending. + * This differs from the DP83TD510E datasheet (2020) which states this bit + * clears on write 0. + */ #define DP83TD510E_STS_MII_INT BIT(7) #define DP83TD510E_LINK_STATUS BIT(0) @@ -53,12 +58,6 @@ static int dp83td510_config_intr(struct phy_device *phydev) int ret; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { - /* Clear any pending interrupts */ - ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_PHY_STS, - 0x0); - if (ret) - return ret; - ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_INTERRUPT_REG_1, DP83TD510E_INT1_LINK_EN); @@ -81,10 +80,6 @@ static int dp83td510_config_intr(struct phy_device *phydev) DP83TD510E_GENCFG_INT_EN); if (ret) return ret; - - /* Clear any pending interrupts */ - ret = phy_write_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_PHY_STS, - 0x0); } return ret; @@ -94,14 +89,6 @@ static irqreturn_t dp83td510_handle_interrupt(struct phy_device *phydev) { int ret; - ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_PHY_STS); - if (ret < 0) { - phy_error(phydev); - return IRQ_NONE; - } else if (!(ret & DP83TD510E_STS_MII_INT)) { - return IRQ_NONE; - } - /* Read the current enabled interrupts */ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_INTERRUPT_REG_1); if (ret < 0) { From a4336343ea36540760b68cd71ea0864ca7ad3e98 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 9 May 2023 16:57:20 +0200 Subject: [PATCH 095/224] kasan: add kasan_tag_mismatch prototype commit fb646a4cd3f0ff27d19911bef7b6622263723df6 upstream. The kasan sw-tags implementation contains one function that is only called from assembler and has no prototype in a header. This causes a W=1 warning: mm/kasan/sw_tags.c:171:6: warning: no previous prototype for 'kasan_tag_mismatch' [-Wmissing-prototypes] 171 | void kasan_tag_mismatch(unsigned long addr, unsigned long access_info, Add a prototype in the local header to get a clean build. Link: https://lkml.kernel.org/r/20230509145735.9263-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Marco Elver Cc: Vincenzo Frascino Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/kasan/kasan.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index abbcc1b0eec5..a898f05a2afd 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -629,4 +629,7 @@ void __hwasan_storeN_noabort(unsigned long addr, size_t size); void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size); +void kasan_tag_mismatch(unsigned long addr, unsigned long access_info, + unsigned long ret_ip); + #endif /* __MM_KASAN_KASAN_H */ From 5aea2ac374560ee9bb13e32f4c06143fe95d330d Mon Sep 17 00:00:00 2001 From: sunliming Date: Mon, 26 Jun 2023 19:13:42 +0800 Subject: [PATCH 096/224] tracing/user_events: Fix incorrect return value for writing operation when events are disabled commit f6d026eea390d59787a6cdc2ef5c983d02e029d0 upstream. The writing operation return the count of writes regardless of whether events are enabled or disabled. Switch it to return -EBADF to indicates that the event is disabled. Link: https://lkml.kernel.org/r/20230626111344.19136-2-sunliming@kylinos.cn Cc: stable@vger.kernel.org 7f5a08c79df35 ("user_events: Add minimal support for trace_event into ftrace") Acked-by: Beau Belgrave Signed-off-by: sunliming Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_user.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 625cab4b9d94..492837609294 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1456,7 +1456,8 @@ static ssize_t user_events_write_core(struct file *file, struct iov_iter *i) if (unlikely(faulted)) return -EFAULT; - } + } else + return -EBADF; return ret; } From 16eceb395994f2eb004938d4b58e405e7dd4dd94 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Tue, 30 May 2023 11:44:36 +0530 Subject: [PATCH 097/224] powerpc: Fail build if using recordmcount with binutils v2.37 commit 25ea739ea1d4d3de41acc4f4eb2d1a97eee0eb75 upstream. binutils v2.37 drops unused section symbols, which prevents recordmcount from capturing mcount locations in sections that have no non-weak symbols. This results in a build failure with a message such as: Cannot find symbol for section 12: .text.perf_callchain_kernel. kernel/events/callchain.o: failed The change to binutils was reverted for v2.38, so this behavior is specific to binutils v2.37: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=c09c8b42021180eee9495bd50d8b35e683d3901b Objtool is able to cope with such sections, so this issue is specific to recordmcount. Fail the build and print a warning if binutils v2.37 is detected and if we are using recordmcount. Cc: stable@vger.kernel.org Suggested-by: Joel Stanley Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20230530061436.56925-1-naveen@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 894d48cd0492..054844153b1f 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -402,3 +402,11 @@ checkbin: echo -n '*** Please use a different binutils version.' ; \ false ; \ fi + @if test "x${CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT}" = "xy" -a \ + "x${CONFIG_LD_IS_BFD}" = "xy" -a \ + "${CONFIG_LD_VERSION}" = "23700" ; then \ + echo -n '*** binutils 2.37 drops unused section symbols, which recordmcount ' ; \ + echo 'is unable to handle.' ; \ + echo '*** Please use a different binutils version.' ; \ + false ; \ + fi From 23ab732b961f04c20c318613f65adbf413f29e29 Mon Sep 17 00:00:00 2001 From: Ekansh Gupta Date: Wed, 14 Jun 2023 17:24:45 +0530 Subject: [PATCH 098/224] misc: fastrpc: Create fastrpc scalar with correct buffer count commit 0b4e32df3e09406b835d8230b9331273f2805058 upstream. A process can spawn a PD on DSP with some attributes that can be associated with the PD during spawn and run. The invocation corresponding to the create request with attributes has total 4 buffers at the DSP side implementation. If this number is not correct, the invocation is expected to fail on DSP. Added change to use correct number of buffer count for creating fastrpc scalar. Fixes: d73f71c7c6ee ("misc: fastrpc: Add support for create remote init process") Cc: stable Tested-by: Ekansh Gupta Signed-off-by: Ekansh Gupta Message-ID: <1686743685-21715-1-git-send-email-quic_ekangupt@quicinc.com> Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index e5cabb901213..13518cac076c 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1260,7 +1260,7 @@ static int fastrpc_init_create_process(struct fastrpc_user *fl, sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE, 4, 0); if (init.attrs) - sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_ATTR, 6, 0); + sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_ATTR, 4, 0); err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc, args); From 484b8fb1ffb5c640ee12da429fc526b55929c3cf Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 17 May 2023 17:49:45 +1000 Subject: [PATCH 099/224] powerpc/security: Fix Speculation_Store_Bypass reporting on Power10 commit 5bcedc5931e7bd6928a2d8207078d4cb476b3b55 upstream. Nageswara reported that /proc/self/status was showing "vulnerable" for the Speculation_Store_Bypass feature on Power10, eg: $ grep Speculation_Store_Bypass: /proc/self/status Speculation_Store_Bypass: vulnerable But at the same time the sysfs files, and lscpu, were showing "Not affected". This turns out to simply be a bug in the reporting of the Speculation_Store_Bypass, aka. PR_SPEC_STORE_BYPASS, case. When SEC_FTR_STF_BARRIER was added, so that firmware could communicate the vulnerability was not present, the code in ssb_prctl_get() was not updated to check the new flag. So add the check for SEC_FTR_STF_BARRIER being disabled. Rather than adding the new check to the existing if block and expanding the comment to cover both cases, rewrite the three cases to be separate so they can be commented separately for clarity. Fixes: 84ed26fd00c5 ("powerpc/security: Add a security feature for STF barrier") Cc: stable@vger.kernel.org # v5.14+ Reported-by: Nageswara R Sastry Tested-by: Nageswara R Sastry Reviewed-by: Russell Currey Signed-off-by: Michael Ellerman Link: https://msgid.link/20230517074945.53188-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/kernel/security.c | 37 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 206475e3e0b4..4856e1a5161c 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -364,26 +364,27 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute * static int ssb_prctl_get(struct task_struct *task) { - if (stf_enabled_flush_types == STF_BARRIER_NONE) - /* - * We don't have an explicit signal from firmware that we're - * vulnerable or not, we only have certain CPU revisions that - * are known to be vulnerable. - * - * We assume that if we're on another CPU, where the barrier is - * NONE, then we are not vulnerable. - */ + /* + * The STF_BARRIER feature is on by default, so if it's off that means + * firmware has explicitly said the CPU is not vulnerable via either + * the hypercall or device tree. + */ + if (!security_ftr_enabled(SEC_FTR_STF_BARRIER)) return PR_SPEC_NOT_AFFECTED; - else - /* - * If we do have a barrier type then we are vulnerable. The - * barrier is not a global or per-process mitigation, so the - * only value we can report here is PR_SPEC_ENABLE, which - * appears as "vulnerable" in /proc. - */ - return PR_SPEC_ENABLE; - return -EINVAL; + /* + * If the system's CPU has no known barrier (see setup_stf_barrier()) + * then assume that the CPU is not vulnerable. + */ + if (stf_enabled_flush_types == STF_BARRIER_NONE) + return PR_SPEC_NOT_AFFECTED; + + /* + * Otherwise the CPU is vulnerable. The barrier is not a global or + * per-process mitigation, so the only value that can be reported here + * is PR_SPEC_ENABLE, which appears as "vulnerable" in /proc. + */ + return PR_SPEC_ENABLE; } int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) From 58d1c81307d25263a8b31777bc874987ea5b847b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 17 May 2023 22:30:33 +1000 Subject: [PATCH 100/224] powerpc/64s: Fix native_hpte_remove() to be irq-safe commit 8bbe9fee5848371d4af101be445303cac8d880c5 upstream. Lockdep warns that the use of the hpte_lock in native_hpte_remove() is not safe against an IRQ coming in: ================================ WARNING: inconsistent lock state 6.4.0-rc2-g0c54f4d30ecc #1 Not tainted -------------------------------- inconsistent {IN-SOFTIRQ-W} -> {SOFTIRQ-ON-W} usage. qemu-system-ppc/93865 [HC0[0]:SC0[0]:HE1:SE1] takes: c0000000021f5180 (hpte_lock){+.?.}-{0:0}, at: native_lock_hpte+0x8/0xd0 {IN-SOFTIRQ-W} state was registered at: lock_acquire+0x134/0x3f0 native_lock_hpte+0x44/0xd0 native_hpte_insert+0xd4/0x2a0 __hash_page_64K+0x218/0x4f0 hash_page_mm+0x464/0x840 do_hash_fault+0x11c/0x260 data_access_common_virt+0x210/0x220 __ip_select_ident+0x140/0x150 ... net_rx_action+0x3bc/0x440 __do_softirq+0x180/0x534 ... sys_sendmmsg+0x34/0x50 system_call_exception+0x128/0x320 system_call_common+0x160/0x2e4 ... Possible unsafe locking scenario: CPU0 ---- lock(hpte_lock); lock(hpte_lock); *** DEADLOCK *** ... Call Trace: dump_stack_lvl+0x98/0xe0 (unreliable) print_usage_bug.part.0+0x250/0x278 mark_lock+0xc9c/0xd30 __lock_acquire+0x440/0x1ca0 lock_acquire+0x134/0x3f0 native_lock_hpte+0x44/0xd0 native_hpte_remove+0xb0/0x190 kvmppc_mmu_map_page+0x650/0x698 [kvm_pr] kvmppc_handle_pagefault+0x534/0x6e8 [kvm_pr] kvmppc_handle_exit_pr+0x6d8/0xe90 [kvm_pr] after_sprg3_load+0x80/0x90 [kvm_pr] kvmppc_vcpu_run_pr+0x108/0x270 [kvm_pr] kvmppc_vcpu_run+0x34/0x48 [kvm] kvm_arch_vcpu_ioctl_run+0x340/0x470 [kvm] kvm_vcpu_ioctl+0x338/0x8b8 [kvm] sys_ioctl+0x7c4/0x13e0 system_call_exception+0x128/0x320 system_call_common+0x160/0x2e4 I suspect kvm_pr is the only caller that doesn't already have IRQs disabled, which is why this hasn't been reported previously. Fix it by disabling IRQs in native_hpte_remove(). Fixes: 35159b5717fa ("powerpc/64s: make HPTE lock and native_tlbie_lock irq-safe") Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Michael Ellerman Link: https://msgid.link/20230517123033.18430-1-mpe@ellerman.id.au Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/mm/book3s64/hash_native.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_native.c b/arch/powerpc/mm/book3s64/hash_native.c index 9342e79870df..430d1d935a7c 100644 --- a/arch/powerpc/mm/book3s64/hash_native.c +++ b/arch/powerpc/mm/book3s64/hash_native.c @@ -328,10 +328,12 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, static long native_hpte_remove(unsigned long hpte_group) { + unsigned long hpte_v, flags; struct hash_pte *hptep; int i; int slot_offset; - unsigned long hpte_v; + + local_irq_save(flags); DBG_LOW(" remove(group=%lx)\n", hpte_group); @@ -356,13 +358,16 @@ static long native_hpte_remove(unsigned long hpte_group) slot_offset &= 0x7; } - if (i == HPTES_PER_GROUP) - return -1; + if (i == HPTES_PER_GROUP) { + i = -1; + goto out; + } /* Invalidate the hpte. NOTE: this also unlocks it */ release_hpte_lock(); hptep->v = 0; - +out: + local_irq_restore(flags); return i; } From d56b7a43a2e03f72d696d353c207ee086f7cf555 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 26 Jun 2023 15:50:14 +0800 Subject: [PATCH 101/224] MIPS: Loongson: Fix cpu_probe_loongson() again commit 65fee014dc41a774bcd94896f3fb380bc39d8dda upstream. Commit 7db5e9e9e5e6c10d7d ("MIPS: loongson64: fix FTLB configuration") move decode_configs() from the beginning of cpu_probe_loongson() to the end in order to fix FTLB configuration. However, it breaks the CPUCFG decoding because decode_configs() use "c->options = xxxx" rather than "c->options |= xxxx", all information get from CPUCFG by decode_cpucfg() is lost. This causes error when creating a KVM guest on Loongson-3A4000: Exception Code: 4 not handled @ PC: 0000000087ad5981, inst: 0xcb7a1898 BadVaddr: 0x0 Status: 0x0 Fix this by moving the c->cputype setting to the beginning and moving decode_configs() after that. Fixes: 7db5e9e9e5e6c10d7d ("MIPS: loongson64: fix FTLB configuration") Cc: stable@vger.kernel.org Cc: Huang Pei Signed-off-by: Huacai Chen Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/cpu-probe.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 6f5d82595877..482af80b8179 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1675,7 +1675,10 @@ static inline void decode_cpucfg(struct cpuinfo_mips *c) static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { + c->cputype = CPU_LOONGSON64; + /* All Loongson processors covered here define ExcCode 16 as GSExc. */ + decode_configs(c); c->options |= MIPS_CPU_GSEXCEX; switch (c->processor_id & PRID_IMP_MASK) { @@ -1685,7 +1688,6 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) case PRID_REV_LOONGSON2K_R1_1: case PRID_REV_LOONGSON2K_R1_2: case PRID_REV_LOONGSON2K_R1_3: - c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "Loongson-2K"; set_elf_platform(cpu, "gs264e"); set_isa(c, MIPS_CPU_ISA_M64R2); @@ -1698,14 +1700,12 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_LOONGSON3A_R2_0: case PRID_REV_LOONGSON3A_R2_1: - c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); break; case PRID_REV_LOONGSON3A_R3_0: case PRID_REV_LOONGSON3A_R3_1: - c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); @@ -1725,7 +1725,6 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) c->ases &= ~MIPS_ASE_VZ; /* VZ of Loongson-3A2000/3000 is incomplete */ break; case PRID_IMP_LOONGSON_64G: - c->cputype = CPU_LOONGSON64; __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); @@ -1735,8 +1734,6 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) panic("Unknown Loongson Processor ID!"); break; } - - decode_configs(c); } #else static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { } From bd9cf2a5f9e1b2229ad22f21de6f6ad1a9c8858e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Wed, 28 Jun 2023 19:08:17 +0800 Subject: [PATCH 102/224] MIPS: KVM: Fix NULL pointer dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e4de2057698636c0ee709e545d19b169d2069fa3 upstream. After commit 45c7e8af4a5e3f0bea4ac209 ("MIPS: Remove KVM_TE support") we get a NULL pointer dereference when creating a KVM guest: [ 146.243409] Starting KVM with MIPS VZ extensions [ 149.849151] CPU 3 Unable to handle kernel paging request at virtual address 0000000000000300, epc == ffffffffc06356ec, ra == ffffffffc063568c [ 149.849177] Oops[#1]: [ 149.849182] CPU: 3 PID: 2265 Comm: qemu-system-mip Not tainted 6.4.0-rc3+ #1671 [ 149.849188] Hardware name: THTF CX TL630 Series/THTF-LS3A4000-7A1000-ML4A, BIOS KL4.1F.TF.D.166.201225.R 12/25/2020 [ 149.849192] $ 0 : 0000000000000000 000000007400cce0 0000000000400004 ffffffff8119c740 [ 149.849209] $ 4 : 000000007400cce1 000000007400cce1 0000000000000000 0000000000000000 [ 149.849221] $ 8 : 000000240058bb36 ffffffff81421ac0 0000000000000000 0000000000400dc0 [ 149.849233] $12 : 9800000102a07cc8 ffffffff80e40e38 0000000000000001 0000000000400dc0 [ 149.849245] $16 : 0000000000000000 9800000106cd0000 9800000106cd0000 9800000100cce000 [ 149.849257] $20 : ffffffffc0632b28 ffffffffc05b31b0 9800000100ccca00 0000000000400000 [ 149.849269] $24 : 9800000106cd09ce ffffffff802f69d0 [ 149.849281] $28 : 9800000102a04000 9800000102a07cd0 98000001106a8000 ffffffffc063568c [ 149.849293] Hi : 00000335b2111e66 [ 149.849295] Lo : 6668d90061ae0ae9 [ 149.849298] epc : ffffffffc06356ec kvm_vz_vcpu_setup+0xc4/0x328 [kvm] [ 149.849324] ra : ffffffffc063568c kvm_vz_vcpu_setup+0x64/0x328 [kvm] [ 149.849336] Status: 7400cce3 KX SX UX KERNEL EXL IE [ 149.849351] Cause : 1000000c (ExcCode 03) [ 149.849354] BadVA : 0000000000000300 [ 149.849357] PrId : 0014c004 (ICT Loongson-3) [ 149.849360] Modules linked in: kvm nfnetlink_queue nfnetlink_log nfnetlink fuse sha256_generic libsha256 cfg80211 rfkill binfmt_misc vfat fat snd_hda_codec_hdmi input_leds led_class snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hda_core snd_pcm snd_timer snd serio_raw xhci_pci radeon drm_suballoc_helper drm_display_helper xhci_hcd ip_tables x_tables [ 149.849432] Process qemu-system-mip (pid: 2265, threadinfo=00000000ae2982d2, task=0000000038e09ad4, tls=000000ffeba16030) [ 149.849439] Stack : 9800000000000003 9800000100ccca00 9800000100ccc000 ffffffffc062cef4 [ 149.849453] 9800000102a07d18 c89b63a7ab338e00 0000000000000000 ffffffff811a0000 [ 149.849465] 0000000000000000 9800000106cd0000 ffffffff80e59938 98000001106a8920 [ 149.849476] ffffffff80e57f30 ffffffffc062854c ffffffff811a0000 9800000102bf4240 [ 149.849488] ffffffffc05b0000 ffffffff80e3a798 000000ff78000000 000000ff78000010 [ 149.849500] 0000000000000255 98000001021f7de0 98000001023f0078 ffffffff81434000 [ 149.849511] 0000000000000000 0000000000000000 9800000102ae0000 980000025e92ae28 [ 149.849523] 0000000000000000 c89b63a7ab338e00 0000000000000001 ffffffff8119dce0 [ 149.849535] 000000ff78000010 ffffffff804f3d3c 9800000102a07eb0 0000000000000255 [ 149.849546] 0000000000000000 ffffffff8049460c 000000ff78000010 0000000000000255 [ 149.849558] ... [ 149.849565] Call Trace: [ 149.849567] [] kvm_vz_vcpu_setup+0xc4/0x328 [kvm] [ 149.849586] [] kvm_arch_vcpu_create+0x184/0x228 [kvm] [ 149.849605] [] kvm_vm_ioctl+0x64c/0xf28 [kvm] [ 149.849623] [] sys_ioctl+0xc8/0x118 [ 149.849631] [] syscall_common+0x34/0x58 The root cause is the deletion of kvm_mips_commpage_init() leaves vcpu ->arch.cop0 NULL. So fix it by making cop0 from a pointer to an embedded object. Fixes: 45c7e8af4a5e3f0bea4ac209 ("MIPS: Remove KVM_TE support") Cc: stable@vger.kernel.org Reported-by: Yu Zhao Suggested-by: Thomas Bogendoerfer Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Huacai Chen Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/kvm_host.h | 6 +++--- arch/mips/kvm/emulate.c | 22 +++++++++++----------- arch/mips/kvm/mips.c | 16 ++++++++-------- arch/mips/kvm/trace.h | 8 ++++---- arch/mips/kvm/vz.c | 20 ++++++++++---------- 5 files changed, 36 insertions(+), 36 deletions(-) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 5cedb28e8a40..8042a87b8511 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -317,7 +317,7 @@ struct kvm_vcpu_arch { unsigned int aux_inuse; /* COP0 State */ - struct mips_coproc *cop0; + struct mips_coproc cop0; /* Resume PC after MMIO completion */ unsigned long io_pc; @@ -698,7 +698,7 @@ static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu) static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu) { return kvm_mips_guest_can_have_fpu(vcpu) && - kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP; + kvm_read_c0_guest_config1(&vcpu->cop0) & MIPS_CONF1_FP; } static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu) @@ -710,7 +710,7 @@ static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu) static inline bool kvm_mips_guest_has_msa(struct kvm_vcpu_arch *vcpu) { return kvm_mips_guest_can_have_msa(vcpu) && - kvm_read_c0_guest_config3(vcpu->cop0) & MIPS_CONF3_MSA; + kvm_read_c0_guest_config3(&vcpu->cop0) & MIPS_CONF3_MSA; } struct kvm_mips_callbacks { diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index edaec93a1a1f..e64372b8f66a 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -312,7 +312,7 @@ int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) */ int kvm_mips_count_disabled(struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; return (vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC) || (kvm_read_c0_guest_cause(cop0) & CAUSEF_DC); @@ -384,7 +384,7 @@ static inline ktime_t kvm_mips_count_time(struct kvm_vcpu *vcpu) */ static u32 kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; ktime_t expires, threshold; u32 count, compare; int running; @@ -444,7 +444,7 @@ static u32 kvm_mips_read_count_running(struct kvm_vcpu *vcpu, ktime_t now) */ u32 kvm_mips_read_count(struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; /* If count disabled just read static copy of count */ if (kvm_mips_count_disabled(vcpu)) @@ -502,7 +502,7 @@ ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count) static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu, ktime_t now, u32 count) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; u32 compare; u64 delta; ktime_t expire; @@ -603,7 +603,7 @@ resume: */ void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; ktime_t now; /* Calculate bias */ @@ -649,7 +649,7 @@ void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz) */ int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; int dc; ktime_t now; u32 count; @@ -696,7 +696,7 @@ int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz) */ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; int dc; u32 old_compare = kvm_read_c0_guest_compare(cop0); s32 delta = compare - old_compare; @@ -779,7 +779,7 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack) */ static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; u32 count; ktime_t now; @@ -806,7 +806,7 @@ static ktime_t kvm_mips_count_disable(struct kvm_vcpu *vcpu) */ void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; kvm_set_c0_guest_cause(cop0, CAUSEF_DC); if (!(vcpu->arch.count_ctl & KVM_REG_MIPS_COUNT_CTL_DC)) @@ -826,7 +826,7 @@ void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu) */ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; u32 count; kvm_clear_c0_guest_cause(cop0, CAUSEF_DC); @@ -852,7 +852,7 @@ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu) */ int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; s64 changed = count_ctl ^ vcpu->arch.count_ctl; s64 delta; ktime_t expire, now; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index a25e0b73ee70..a4f4407b0a33 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -659,7 +659,7 @@ static int kvm_mips_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; struct mips_fpu_struct *fpu = &vcpu->arch.fpu; int ret; s64 v; @@ -771,7 +771,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; struct mips_fpu_struct *fpu = &vcpu->arch.fpu; s64 v; s64 vs[2]; @@ -1111,7 +1111,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { return kvm_mips_pending_timer(vcpu) || - kvm_read_c0_guest_cause(vcpu->arch.cop0) & C_TI; + kvm_read_c0_guest_cause(&vcpu->arch.cop0) & C_TI; } int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) @@ -1135,7 +1135,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) kvm_debug("\thi: 0x%08lx\n", vcpu->arch.hi); kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo); - cop0 = vcpu->arch.cop0; + cop0 = &vcpu->arch.cop0; kvm_debug("\tStatus: 0x%08x, Cause: 0x%08x\n", kvm_read_c0_guest_status(cop0), kvm_read_c0_guest_cause(cop0)); @@ -1257,7 +1257,7 @@ static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu) case EXCCODE_TLBS: kvm_debug("TLB ST fault: cause %#x, status %#x, PC: %p, BadVaddr: %#lx\n", - cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc, + cause, kvm_read_c0_guest_status(&vcpu->arch.cop0), opc, badvaddr); ++vcpu->stat.tlbmiss_st_exits; @@ -1329,7 +1329,7 @@ static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu) kvm_get_badinstr(opc, vcpu, &inst); kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n", exccode, opc, inst, badvaddr, - kvm_read_c0_guest_status(vcpu->arch.cop0)); + kvm_read_c0_guest_status(&vcpu->arch.cop0)); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; @@ -1402,7 +1402,7 @@ int noinstr kvm_mips_handle_exit(struct kvm_vcpu *vcpu) /* Enable FPU for guest and restore context */ void kvm_own_fpu(struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; unsigned int sr, cfg5; preempt_disable(); @@ -1446,7 +1446,7 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu) /* Enable MSA for guest and restore context */ void kvm_own_msa(struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; unsigned int sr, cfg5; preempt_disable(); diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h index a8c7fd7bf6d2..136c3535a1cb 100644 --- a/arch/mips/kvm/trace.h +++ b/arch/mips/kvm/trace.h @@ -322,11 +322,11 @@ TRACE_EVENT_FN(kvm_guest_mode_change, ), TP_fast_assign( - __entry->epc = kvm_read_c0_guest_epc(vcpu->arch.cop0); + __entry->epc = kvm_read_c0_guest_epc(&vcpu->arch.cop0); __entry->pc = vcpu->arch.pc; - __entry->badvaddr = kvm_read_c0_guest_badvaddr(vcpu->arch.cop0); - __entry->status = kvm_read_c0_guest_status(vcpu->arch.cop0); - __entry->cause = kvm_read_c0_guest_cause(vcpu->arch.cop0); + __entry->badvaddr = kvm_read_c0_guest_badvaddr(&vcpu->arch.cop0); + __entry->status = kvm_read_c0_guest_status(&vcpu->arch.cop0); + __entry->cause = kvm_read_c0_guest_cause(&vcpu->arch.cop0); ), TP_printk("EPC: 0x%08lx PC: 0x%08lx Status: 0x%08x Cause: 0x%08x BadVAddr: 0x%08lx", diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index c706f5890a05..c8f7d793bc63 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -422,7 +422,7 @@ static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu, */ static void kvm_vz_restore_timer(struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; u32 cause, compare; compare = kvm_read_sw_gc0_compare(cop0); @@ -517,7 +517,7 @@ static void _kvm_vz_save_htimer(struct kvm_vcpu *vcpu, */ static void kvm_vz_save_timer(struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; u32 gctl0, compare, cause; gctl0 = read_c0_guestctl0(); @@ -863,7 +863,7 @@ static unsigned long mips_process_maar(unsigned int op, unsigned long val) static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; val &= MIPS_MAARI_INDEX; if (val == MIPS_MAARI_INDEX) @@ -876,7 +876,7 @@ static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst, u32 *opc, u32 cause, struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; enum emulation_result er = EMULATE_DONE; u32 rt, rd, sel; unsigned long curr_pc; @@ -1911,7 +1911,7 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, s64 *v) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; unsigned int idx; switch (reg->id) { @@ -2081,7 +2081,7 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_MAARI: if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar) return -EINVAL; - *v = kvm_read_sw_gc0_maari(vcpu->arch.cop0); + *v = kvm_read_sw_gc0_maari(&vcpu->arch.cop0); break; #ifdef CONFIG_64BIT case KVM_REG_MIPS_CP0_XCONTEXT: @@ -2135,7 +2135,7 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, s64 v) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; unsigned int idx; int ret = 0; unsigned int cur, change; @@ -2562,7 +2562,7 @@ static void kvm_vz_vcpu_load_tlb(struct kvm_vcpu *vcpu, int cpu) static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; bool migrated, all; /* @@ -2704,7 +2704,7 @@ static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; if (current->flags & PF_VCPU) kvm_vz_vcpu_save_wired(vcpu); @@ -3076,7 +3076,7 @@ static void kvm_vz_vcpu_uninit(struct kvm_vcpu *vcpu) static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_coproc *cop0 = &vcpu->arch.cop0; unsigned long count_hz = 100*1000*1000; /* default to 100 MHz */ /* From 0a90e70efa6ac91ccdf069213c47b7c7d3e189d7 Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Wed, 15 Mar 2023 09:31:23 +0800 Subject: [PATCH 103/224] ext4: Fix reusing stale buffer heads from last failed mounting commit 26fb5290240dc31cae99b8b4dd2af7f46dfcba6b upstream. Following process makes ext4 load stale buffer heads from last failed mounting in a new mounting operation: mount_bdev ext4_fill_super | ext4_load_and_init_journal | ext4_load_journal | jbd2_journal_load | load_superblock | journal_get_superblock | set_buffer_verified(bh) // buffer head is verified | jbd2_journal_recover // failed caused by EIO | goto failed_mount3a // skip 'sb->s_root' initialization deactivate_locked_super kill_block_super generic_shutdown_super if (sb->s_root) // false, skip ext4_put_super->invalidate_bdev-> // invalidate_mapping_pages->mapping_evict_folio-> // filemap_release_folio->try_to_free_buffers, which // cannot drop buffer head. blkdev_put blkdev_put_whole if (atomic_dec_and_test(&bdev->bd_openers)) // false, systemd-udev happens to open the device. Then // blkdev_flush_mapping->kill_bdev->truncate_inode_pages-> // truncate_inode_folio->truncate_cleanup_folio-> // folio_invalidate->block_invalidate_folio-> // filemap_release_folio->try_to_free_buffers will be skipped, // dropping buffer head is missed again. Second mount: ext4_fill_super ext4_load_and_init_journal ext4_load_journal ext4_get_journal jbd2_journal_init_inode journal_init_common bh = getblk_unmovable bh = __find_get_block // Found stale bh in last failed mounting journal->j_sb_buffer = bh jbd2_journal_load load_superblock journal_get_superblock if (buffer_verified(bh)) // true, skip journal->j_format_version = 2, value is 0 jbd2_journal_recover do_one_pass next_log_block += count_tags(journal, bh) // According to journal_tag_bytes(), 'tag_bytes' calculating is // affected by jbd2_has_feature_csum3(), jbd2_has_feature_csum3() // returns false because 'j->j_format_version >= 2' is not true, // then we get wrong next_log_block. The do_one_pass may exit // early whenoccuring non JBD2_MAGIC_NUMBER in 'next_log_block'. The filesystem is corrupted here, journal is partially replayed, and new journal sequence number actually is already used by last mounting. The invalidate_bdev() can drop all buffer heads even racing with bare reading block device(eg. systemd-udev), so we can fix it by invalidating bdev in error handling path in __ext4_fill_super(). Fetch a reproducer in [Link]. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217171 Fixes: 25ed6e8a54df ("jbd2: enable journal clients to enable v2 checksumming") Cc: stable@vger.kernel.org # v3.5 Signed-off-by: Zhihao Cheng Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230315013128.3911115-2-chengzhihao1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 760249d9152d..9bdd5261e92e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1129,6 +1129,12 @@ static void ext4_blkdev_remove(struct ext4_sb_info *sbi) struct block_device *bdev; bdev = sbi->s_journal_bdev; if (bdev) { + /* + * Invalidate the journal device's buffers. We don't want them + * floating about in memory - the physical journal device may + * hotswapped, and it breaks the `ro-after' testing code. + */ + invalidate_bdev(bdev); ext4_blkdev_put(bdev); sbi->s_journal_bdev = NULL; } @@ -1274,13 +1280,7 @@ static void ext4_put_super(struct super_block *sb) sync_blockdev(sb->s_bdev); invalidate_bdev(sb->s_bdev); if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) { - /* - * Invalidate the journal device's buffers. We don't want them - * floating about in memory - the physical journal device may - * hotswapped, and it breaks the `ro-after' testing code. - */ sync_blockdev(sbi->s_journal_bdev); - invalidate_bdev(sbi->s_journal_bdev); ext4_blkdev_remove(sbi); } @@ -5634,6 +5634,7 @@ failed_mount: brelse(sbi->s_sbh); ext4_blkdev_remove(sbi); out_fail: + invalidate_bdev(sb->s_bdev); sb->s_fs_info = NULL; return err ? err : ret; } From 782166ac858a44c321a591fb57b8a4a93c094cc4 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sat, 3 Jun 2023 23:03:18 +0800 Subject: [PATCH 104/224] ext4: fix wrong unit use in ext4_mb_clear_bb commit 247c3d214c23dfeeeb892e91a82ac1188bdaec9f upstream. Function ext4_issue_discard need count in cluster. Pass count_clusters instead of count to fix the mismatch. Signed-off-by: Kemeng Shi Cc: stable@kernel.org Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20230603150327.3596033-11-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 32d88757a780..8613f87c96d8 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6073,8 +6073,8 @@ do_more: * them with group lock_held */ if (test_opt(sb, DISCARD)) { - err = ext4_issue_discard(sb, block_group, bit, count, - NULL); + err = ext4_issue_discard(sb, block_group, bit, + count_clusters, NULL); if (err && err != -EOPNOTSUPP) ext4_msg(sb, KERN_WARNING, "discard request in" " group:%u block:%d count:%lu failed" From 2038d35749c74994a9cf0a4bbd206a8c976591ba Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sat, 3 Jun 2023 23:03:16 +0800 Subject: [PATCH 105/224] ext4: get block from bh in ext4_free_blocks for fast commit replay commit 11b6890be0084ad4df0e06d89a9fdcc948472c65 upstream. ext4_free_blocks will retrieve block from bh if block parameter is zero. Retrieve block before ext4_free_blocks_simple to avoid potentially passing wrong block to ext4_free_blocks_simple. Signed-off-by: Kemeng Shi Cc: stable@kernel.org Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20230603150327.3596033-9-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8613f87c96d8..9ada3a395255 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6158,12 +6158,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sbi = EXT4_SB(sb); - if (sbi->s_mount_state & EXT4_FC_REPLAY) { - ext4_free_blocks_simple(inode, block, count); - return; - } - - might_sleep(); if (bh) { if (block) BUG_ON(block != bh->b_blocknr); @@ -6171,6 +6165,13 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, block = bh->b_blocknr; } + if (sbi->s_mount_state & EXT4_FC_REPLAY) { + ext4_free_blocks_simple(inode, block, count); + return; + } + + might_sleep(); + if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && !ext4_inode_block_valid(inode, block, count)) { ext4_error(sb, "Freeing blocks not in datazone - " From e861961f3a503909d7377f8f41364464dce5fb4f Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Sat, 3 Jun 2023 23:03:19 +0800 Subject: [PATCH 106/224] ext4: fix wrong unit use in ext4_mb_new_blocks commit 2ec6d0a5ea72689a79e6f725fd8b443a788ae279 upstream. Function ext4_free_blocks_simple needs count in cluster. Function ext4_free_blocks accepts count in block. Convert count to cluster to fix the mismatch. Signed-off-by: Kemeng Shi Cc: stable@kernel.org Reviewed-by: Ojaswin Mujoo Link: https://lore.kernel.org/r/20230603150327.3596033-12-shikemeng@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/mballoc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 9ada3a395255..88ed64ebae3e 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -6166,7 +6166,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, } if (sbi->s_mount_state & EXT4_FC_REPLAY) { - ext4_free_blocks_simple(inode, block, count); + ext4_free_blocks_simple(inode, block, EXT4_NUM_B2C(sbi, count)); return; } From 029c6b106f49e22bf3fa792174246b4be79b5f40 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 6 Jun 2023 15:32:03 +0800 Subject: [PATCH 107/224] ext4: fix to check return value of freeze_bdev() in ext4_shutdown() commit c4d13222afd8a64bf11bc7ec68645496ee8b54b9 upstream. freeze_bdev() can fail due to a lot of reasons, it needs to check its reason before later process. Fixes: 783d94854499 ("ext4: add EXT4_IOC_GOINGDOWN ioctl") Cc: stable@kernel.org Signed-off-by: Chao Yu Link: https://lore.kernel.org/r/20230606073203.1310389-1-chao@kernel.org Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ioctl.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 8c2b1ff5e695..3784f7041649 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -800,6 +800,7 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg) { struct ext4_sb_info *sbi = EXT4_SB(sb); __u32 flags; + int ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -818,7 +819,9 @@ static int ext4_shutdown(struct super_block *sb, unsigned long arg) switch (flags) { case EXT4_GOING_FLAGS_DEFAULT: - freeze_bdev(sb->s_bdev); + ret = freeze_bdev(sb->s_bdev); + if (ret) + return ret; set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); thaw_bdev(sb->s_bdev); break; From deef86fa3005cbb61ae8aa5729324c09b3f4ba73 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 27 Mar 2023 22:16:29 +0800 Subject: [PATCH 108/224] ext4: turn quotas off if mount failed after enabling quotas commit d13f99632748462c32fc95d729f5e754bab06064 upstream. Yi found during a review of the patch "ext4: don't BUG on inconsistent journal feature" that when ext4_mark_recovery_complete() returns an error value, the error handling path does not turn off the enabled quotas, which triggers the following kmemleak: ================================================================ unreferenced object 0xffff8cf68678e7c0 (size 64): comm "mount", pid 746, jiffies 4294871231 (age 11.540s) hex dump (first 32 bytes): 00 90 ef 82 f6 8c ff ff 00 00 00 00 41 01 00 00 ............A... c7 00 00 00 bd 00 00 00 0a 00 00 00 48 00 00 00 ............H... backtrace: [<00000000c561ef24>] __kmem_cache_alloc_node+0x4d4/0x880 [<00000000d4e621d7>] kmalloc_trace+0x39/0x140 [<00000000837eee74>] v2_read_file_info+0x18a/0x3a0 [<0000000088f6c877>] dquot_load_quota_sb+0x2ed/0x770 [<00000000340a4782>] dquot_load_quota_inode+0xc6/0x1c0 [<0000000089a18bd5>] ext4_enable_quotas+0x17e/0x3a0 [ext4] [<000000003a0268fa>] __ext4_fill_super+0x3448/0x3910 [ext4] [<00000000b0f2a8a8>] ext4_fill_super+0x13d/0x340 [ext4] [<000000004a9489c4>] get_tree_bdev+0x1dc/0x370 [<000000006e723bf1>] ext4_get_tree+0x1d/0x30 [ext4] [<00000000c7cb663d>] vfs_get_tree+0x31/0x160 [<00000000320e1bed>] do_new_mount+0x1d5/0x480 [<00000000c074654c>] path_mount+0x22e/0xbe0 [<0000000003e97a8e>] do_mount+0x95/0xc0 [<000000002f3d3736>] __x64_sys_mount+0xc4/0x160 [<0000000027d2140c>] do_syscall_64+0x3f/0x90 ================================================================ To solve this problem, we add a "failed_mount10" tag, and call ext4_quota_off_umount() in this tag to release the enabled qoutas. Fixes: 11215630aada ("ext4: don't BUG on inconsistent journal feature") Cc: stable@kernel.org Signed-off-by: Zhang Yi Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230327141630.156875-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9bdd5261e92e..601e097e1720 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5544,7 +5544,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ext4_msg(sb, KERN_INFO, "recovery complete"); err = ext4_mark_recovery_complete(sb, es); if (err) - goto failed_mount9; + goto failed_mount10; } if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) @@ -5563,7 +5563,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) return 0; -failed_mount9: +failed_mount10: + ext4_quota_off_umount(sb); +failed_mount9: __maybe_unused ext4_release_orphan_info(sb); failed_mount8: ext4_unregister_sysfs(sb); From 95d49f79e94d4fa8105c880a266789609f3e791a Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 24 Apr 2023 11:38:35 +0800 Subject: [PATCH 109/224] ext4: only update i_reserved_data_blocks on successful block allocation commit de25d6e9610a8b30cce9bbb19b50615d02ebca02 upstream. In our fault injection test, we create an ext4 file, migrate it to non-extent based file, then punch a hole and finally trigger a WARN_ON in the ext4_da_update_reserve_space(): EXT4-fs warning (device sda): ext4_da_update_reserve_space:369: ino 14, used 11 with only 10 reserved data blocks When writing back a non-extent based file, if we enable delalloc, the number of reserved blocks will be subtracted from the number of blocks mapped by ext4_ind_map_blocks(), and the extent status tree will be updated. We update the extent status tree by first removing the old extent_status and then inserting the new extent_status. If the block range we remove happens to be in an extent, then we need to allocate another extent_status with ext4_es_alloc_extent(). use old to remove to add new |----------|------------|------------| old extent_status The problem is that the allocation of a new extent_status failed due to a fault injection, and __es_shrink() did not get free memory, resulting in a return of -ENOMEM. Then do_writepages() retries after receiving -ENOMEM, we map to the same extent again, and the number of reserved blocks is again subtracted from the number of blocks in that extent. Since the blocks in the same extent are subtracted twice, we end up triggering WARN_ON at ext4_da_update_reserve_space() because used > ei->i_reserved_data_blocks. For non-extent based file, we update the number of reserved blocks after ext4_ind_map_blocks() is executed, which causes a problem that when we call ext4_ind_map_blocks() to create a block, it doesn't always create a block, but we always reduce the number of reserved blocks. So we move the logic for updating reserved blocks to ext4_ind_map_blocks() to ensure that the number of reserved blocks is updated only after we do succeed in allocating some new blocks. Fixes: 5f634d064c70 ("ext4: Fix quota accounting error with fallocate") Cc: stable@kernel.org Signed-off-by: Baokun Li Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230424033846.4732-2-libaokun1@huawei.com Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/indirect.c | 8 ++++++++ fs/ext4/inode.c | 10 ---------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index c68bebe7ff4b..a9f3716119d3 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -651,6 +651,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, ext4_update_inode_fsync_trans(handle, inode, 1); count = ar.len; + + /* + * Update reserved blocks/metadata blocks after successful block + * allocation which had been deferred till now. + */ + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) + ext4_da_update_reserve_space(inode, count, 1); + got_it: map->m_flags |= EXT4_MAP_MAPPED; map->m_pblk = le32_to_cpu(chain[depth-1].key); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 80d5a859ab14..5aa3003cfc68 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -660,16 +660,6 @@ found: */ ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); } - - /* - * Update reserved blocks/metadata blocks after successful - * block allocation which had been deferred till now. We don't - * support fallocate for non extent files. So we can update - * reserve space here. - */ - if ((retval > 0) && - (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) - ext4_da_update_reserve_space(inode, retval, 1); } if (retval > 0) { From 5dc507de0c8d89f79d84fbe4b638eb4e4cb089ca Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 29 May 2023 17:44:29 -0400 Subject: [PATCH 110/224] fs: dlm: revert check required context while close commit c6b6d6dcc7f32767d57740e0552337c8de40610b upstream. This patch reverts commit 2c3fa6ae4d52 ("dlm: check required context while close"). The function dlm_midcomms_close(), which will call later dlm_lowcomms_close(), is called when the cluster manager tells the node got fenced which means on midcomms/lowcomms layer to disconnect the node from the cluster communication. The node can rejoin the cluster later. This patch was ensuring no new message were able to be triggered when we are in the close() function context. This was done by checking if the lockspace has been stopped. However there is a missing check that we only need to check specific lockspaces where the fenced node is member of. This is currently complicated because there is no way to easily check if a node is part of a specific lockspace without stopping the recovery. For now we just revert this commit as it is just a check to finding possible leaks of stopping lockspaces before close() is called. Cc: stable@vger.kernel.org Fixes: 2c3fa6ae4d52 ("dlm: check required context while close") Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/lockspace.c | 12 ------------ fs/dlm/lockspace.h | 1 - fs/dlm/midcomms.c | 3 --- 3 files changed, 16 deletions(-) diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 7b29ea7bfb41..23cf9b8f31b7 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -958,15 +958,3 @@ void dlm_stop_lockspaces(void) log_print("dlm user daemon left %d lockspaces", count); } -void dlm_stop_lockspaces_check(void) -{ - struct dlm_ls *ls; - - spin_lock(&lslist_lock); - list_for_each_entry(ls, &lslist, ls_list) { - if (WARN_ON(!rwsem_is_locked(&ls->ls_in_recovery) || - !dlm_locking_stopped(ls))) - break; - } - spin_unlock(&lslist_lock); -} diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h index 03f4a4a3a871..47ebd4411926 100644 --- a/fs/dlm/lockspace.h +++ b/fs/dlm/lockspace.h @@ -27,7 +27,6 @@ struct dlm_ls *dlm_find_lockspace_local(void *id); struct dlm_ls *dlm_find_lockspace_device(int minor); void dlm_put_lockspace(struct dlm_ls *ls); void dlm_stop_lockspaces(void); -void dlm_stop_lockspaces_check(void); int dlm_new_user_lockspace(const char *name, const char *cluster, uint32_t flags, int lvblen, const struct dlm_lockspace_ops *ops, diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index b2a25a33a148..71e426da48c3 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -136,7 +136,6 @@ #include #include "dlm_internal.h" -#include "lockspace.h" #include "lowcomms.h" #include "config.h" #include "memory.h" @@ -1458,8 +1457,6 @@ int dlm_midcomms_close(int nodeid) if (nodeid == dlm_our_nodeid()) return 0; - dlm_stop_lockspaces_check(); - idx = srcu_read_lock(&nodes_srcu); /* Abort pending close/remove operation */ node = nodeid2node(nodeid, 0); From 33f8dff6e1cbba5c2ec85fa5649c0a759a7e685c Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Fri, 26 May 2023 13:55:11 +0200 Subject: [PATCH 111/224] soc: qcom: mdt_loader: Fix unconditional call to scm_pas_mem_setup commit bcb889891371c3cf767f2b9e8768cfe2fdd3810f upstream. Commit ebeb20a9cd3f ("soc: qcom: mdt_loader: Always invoke PAS mem_setup") dropped the relocate check and made pas_mem_setup run unconditionally. The code was later moved with commit f4e526ff7e38 ("soc: qcom: mdt_loader: Extract PAS operations") to qcom_mdt_pas_init() effectively losing track of what was actually done. The assumption that PAS mem_setup can be done anytime was effectively wrong, with no good reason and this caused regression on some SoC that use remoteproc to bringup ath11k. One example is IPQ8074 SoC that effectively broke resulting in remoteproc silently die and ath11k not working. On this SoC FW relocate is not enabled and PAS mem_setup was correctly skipped in previous kernel version resulting in correct bringup and function of remoteproc and ath11k. To fix the regression, reintroduce the relocate check in qcom_mdt_pas_init() and correctly skip PAS mem_setup where relocate is not enabled. Fixes: ebeb20a9cd3f ("soc: qcom: mdt_loader: Always invoke PAS mem_setup") Tested-by: Robert Marko Co-developed-by: Robert Marko Signed-off-by: Robert Marko Signed-off-by: Christian Marangi Cc: stable@vger.kernel.org Reviewed-by: Mukesh Ojha Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20230526115511.3328-1-ansuelsmth@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/soc/qcom/mdt_loader.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c index 3f11554df2f3..10235b36d131 100644 --- a/drivers/soc/qcom/mdt_loader.c +++ b/drivers/soc/qcom/mdt_loader.c @@ -210,6 +210,7 @@ int qcom_mdt_pas_init(struct device *dev, const struct firmware *fw, const struct elf32_hdr *ehdr; phys_addr_t min_addr = PHYS_ADDR_MAX; phys_addr_t max_addr = 0; + bool relocate = false; size_t metadata_len; void *metadata; int ret; @@ -224,6 +225,9 @@ int qcom_mdt_pas_init(struct device *dev, const struct firmware *fw, if (!mdt_phdr_valid(phdr)) continue; + if (phdr->p_flags & QCOM_MDT_RELOCATABLE) + relocate = true; + if (phdr->p_paddr < min_addr) min_addr = phdr->p_paddr; @@ -246,11 +250,13 @@ int qcom_mdt_pas_init(struct device *dev, const struct firmware *fw, goto out; } - ret = qcom_scm_pas_mem_setup(pas_id, mem_phys, max_addr - min_addr); - if (ret) { - /* Unable to set up relocation */ - dev_err(dev, "error %d setting up firmware %s\n", ret, fw_name); - goto out; + if (relocate) { + ret = qcom_scm_pas_mem_setup(pas_id, mem_phys, max_addr - min_addr); + if (ret) { + /* Unable to set up relocation */ + dev_err(dev, "error %d setting up firmware %s\n", ret, fw_name); + goto out; + } } out: From 9e54fd14bd143c261e52fde74355e85e9526c58c Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 21 Apr 2023 15:16:11 +0530 Subject: [PATCH 112/224] ext2/dax: Fix ext2_setsize when len is page aligned commit fcced95b6ba2a507a83b8b3e0358a8ac16b13e35 upstream. PAGE_ALIGN(x) macro gives the next highest value which is multiple of pagesize. But if x is already page aligned then it simply returns x. So, if x passed is 0 in dax_zero_range() function, that means the length gets passed as 0 to ->iomap_begin(). In ext2 it then calls ext2_get_blocks -> max_blocks as 0 and hits bug_on here in ext2_get_blocks(). BUG_ON(maxblocks == 0); Instead we should be calling dax_truncate_page() here which takes care of it. i.e. it only calls dax_zero_range if the offset is not page/block aligned. This can be easily triggered with following on fsdax mounted pmem device. dd if=/dev/zero of=file count=1 bs=512 truncate -s 0 file [79.525838] EXT2-fs (pmem0): DAX enabled. Warning: EXPERIMENTAL, use at your own risk [79.529376] ext2 filesystem being mounted at /mnt1/test supports timestamps until 2038 (0x7fffffff) [93.793207] ------------[ cut here ]------------ [93.795102] kernel BUG at fs/ext2/inode.c:637! [93.796904] invalid opcode: 0000 [#1] PREEMPT SMP PTI [93.798659] CPU: 0 PID: 1192 Comm: truncate Not tainted 6.3.0-rc2-xfstests-00056-g131086faa369 #139 [93.806459] RIP: 0010:ext2_get_blocks.constprop.0+0x524/0x610 <...> [93.835298] Call Trace: [93.836253] [93.837103] ? lock_acquire+0xf8/0x110 [93.838479] ? d_lookup+0x69/0xd0 [93.839779] ext2_iomap_begin+0xa7/0x1c0 [93.841154] iomap_iter+0xc7/0x150 [93.842425] dax_zero_range+0x6e/0xa0 [93.843813] ext2_setsize+0x176/0x1b0 [93.845164] ext2_setattr+0x151/0x200 [93.846467] notify_change+0x341/0x4e0 [93.847805] ? lock_acquire+0xf8/0x110 [93.849143] ? do_truncate+0x74/0xe0 [93.850452] ? do_truncate+0x84/0xe0 [93.851739] do_truncate+0x84/0xe0 [93.852974] do_sys_ftruncate+0x2b4/0x2f0 [93.854404] do_syscall_64+0x3f/0x90 [93.855789] entry_SYSCALL_64_after_hwframe+0x72/0xdc CC: stable@vger.kernel.org Fixes: 2aa3048e03d3 ("iomap: switch iomap_zero_range to use iomap_iter") Reviewed-by: Darrick J. Wong Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Message-Id: <046a58317f29d9603d1068b2bbae47c2332c17ae.1682069716.git.ritesh.list@gmail.com> Signed-off-by: Greg Kroah-Hartman --- fs/ext2/inode.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 918ab2f9e4c0..5a32fcd55183 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1265,9 +1265,8 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) inode_dio_wait(inode); if (IS_DAX(inode)) - error = dax_zero_range(inode, newsize, - PAGE_ALIGN(newsize) - newsize, NULL, - &ext2_iomap_ops); + error = dax_truncate_page(inode, newsize, NULL, + &ext2_iomap_ops); else error = block_truncate_page(inode->i_mapping, newsize, ext2_get_block); From c7feb54b113802d2aba98708769d3c33fb017254 Mon Sep 17 00:00:00 2001 From: Siddh Raman Pant Date: Tue, 20 Jun 2023 22:17:00 +0530 Subject: [PATCH 113/224] jfs: jfs_dmap: Validate db_l2nbperpage while mounting commit 11509910c599cbd04585ec35a6d5e1a0053d84c1 upstream. In jfs_dmap.c at line 381, BLKTODMAP is used to get a logical block number inside dbFree(). db_l2nbperpage, which is the log2 number of blocks per page, is passed as an argument to BLKTODMAP which uses it for shifting. Syzbot reported a shift out-of-bounds crash because db_l2nbperpage is too big. This happens because the large value is set without any validation in dbMount() at line 181. Thus, make sure that db_l2nbperpage is correct while mounting. Max number of blocks per page = Page size / Min block size => log2(Max num_block per page) = log2(Page size / Min block size) = log2(Page size) - log2(Min block size) => Max db_l2nbperpage = L2PSIZE - L2MINBLOCKSIZE Reported-and-tested-by: syzbot+d2cd27dcf8e04b232eb2@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?id=2a70a453331db32ed491f5cbb07e81bf2d225715 Cc: stable@vger.kernel.org Suggested-by: Dave Kleikamp Signed-off-by: Siddh Raman Pant Signed-off-by: Dave Kleikamp Signed-off-by: Greg Kroah-Hartman --- fs/jfs/jfs_dmap.c | 6 ++++++ fs/jfs/jfs_filsys.h | 2 ++ 2 files changed, 8 insertions(+) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index a3eb1e826947..da6a2bc6bf02 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -178,7 +178,13 @@ int dbMount(struct inode *ipbmap) dbmp_le = (struct dbmap_disk *) mp->data; bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize); bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree); + bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); + if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE) { + err = -EINVAL; + goto err_release_metapage; + } + bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); if (!bmp->db_numag) { err = -EINVAL; diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index b5d702df7111..33ef13a0b110 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h @@ -122,7 +122,9 @@ #define NUM_INODE_PER_IAG INOSPERIAG #define MINBLOCKSIZE 512 +#define L2MINBLOCKSIZE 9 #define MAXBLOCKSIZE 4096 +#define L2MAXBLOCKSIZE 12 #define MAXFILESIZE ((s64)1 << 52) #define JFS_LINK_MAX 0xffffffff From ef709350ef0b42995f899c8ef7bf74690dca4d76 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Thu, 15 Jun 2023 15:49:59 +0100 Subject: [PATCH 114/224] hwrng: imx-rngc - fix the timeout for init and self check commit d744ae7477190967a3ddc289e2cd4ae59e8b1237 upstream. Fix the timeout that is used for the initialisation and for the self test. wait_for_completion_timeout expects a timeout in jiffies, but RNGC_TIMEOUT is in milliseconds. Call msecs_to_jiffies to do the conversion. Cc: stable@vger.kernel.org Fixes: 1d5449445bd0 ("hwrng: mx-rngc - add a driver for Freescale RNGC") Signed-off-by: Martin Kaiser Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/char/hw_random/imx-rngc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c index a1c24148ed31..75fb46298a87 100644 --- a/drivers/char/hw_random/imx-rngc.c +++ b/drivers/char/hw_random/imx-rngc.c @@ -110,7 +110,7 @@ static int imx_rngc_self_test(struct imx_rngc *rngc) cmd = readl(rngc->base + RNGC_COMMAND); writel(cmd | RNGC_CMD_SELF_TEST, rngc->base + RNGC_COMMAND); - ret = wait_for_completion_timeout(&rngc->rng_op_done, RNGC_TIMEOUT); + ret = wait_for_completion_timeout(&rngc->rng_op_done, msecs_to_jiffies(RNGC_TIMEOUT)); imx_rngc_irq_mask_clear(rngc); if (!ret) return -ETIMEDOUT; @@ -187,9 +187,7 @@ static int imx_rngc_init(struct hwrng *rng) cmd = readl(rngc->base + RNGC_COMMAND); writel(cmd | RNGC_CMD_SEED, rngc->base + RNGC_COMMAND); - ret = wait_for_completion_timeout(&rngc->rng_op_done, - RNGC_TIMEOUT); - + ret = wait_for_completion_timeout(&rngc->rng_op_done, msecs_to_jiffies(RNGC_TIMEOUT)); if (!ret) { ret = -ETIMEDOUT; goto err; From b933df9dda0127eee4fd2001236accf264c94697 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 26 Jun 2023 16:44:34 +0200 Subject: [PATCH 115/224] dm integrity: reduce vmalloc space footprint on 32-bit architectures commit 6d50eb4725934fd22f5eeccb401000687c790fd0 upstream. It was reported that dm-integrity runs out of vmalloc space on 32-bit architectures. On x86, there is only 128MiB vmalloc space and dm-integrity consumes it quickly because it has a 64MiB journal and 8MiB recalculate buffer. Fix this by reducing the size of the journal to 4MiB and the size of the recalculate buffer to 1MiB, so that multiple dm-integrity devices can be created and activated on 32-bit architectures. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-integrity.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index a2b8f8781a99..fe7dad3ffa75 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -33,11 +33,11 @@ #define DEFAULT_BUFFER_SECTORS 128 #define DEFAULT_JOURNAL_WATERMARK 50 #define DEFAULT_SYNC_MSEC 10000 -#define DEFAULT_MAX_JOURNAL_SECTORS 131072 +#define DEFAULT_MAX_JOURNAL_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 131072 : 8192) #define MIN_LOG2_INTERLEAVE_SECTORS 3 #define MAX_LOG2_INTERLEAVE_SECTORS 31 #define METADATA_WORKQUEUE_MAX_ACTIVE 16 -#define RECALC_SECTORS 32768 +#define RECALC_SECTORS (IS_ENABLED(CONFIG_64BIT) ? 32768 : 2048) #define RECALC_WRITE_SUPER 16 #define BITMAP_BLOCK_SIZE 4096 /* don't change it */ #define BITMAP_FLUSH_INTERVAL (10 * HZ) From 026e46d26aaf759c2ffe2e986a64d9778bd2247b Mon Sep 17 00:00:00 2001 From: Sathya Prakash Date: Thu, 1 Jun 2023 00:10:25 +0530 Subject: [PATCH 116/224] scsi: mpi3mr: Propagate sense data for admin queue SCSI I/O commit f762326b2baa86ae647e2ba6832bc87e238f68ad upstream. Copy the sense data to internal driver buffer when the firmware completes any SCSI I/O command sent through admin queue with sense data for further use. Fixes: 506bc1a0d6ba ("scsi: mpi3mr: Add support for MPT commands") Cc: Signed-off-by: Sathya Prakash Signed-off-by: Sumit Saxena Link: https://lore.kernel.org/r/20230531184025.3803-1-sumit.saxena@broadcom.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpi3mr/mpi3mr_fw.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index 64355d0baa5f..d2c7de804b99 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -402,6 +402,11 @@ static void mpi3mr_process_admin_reply_desc(struct mpi3mr_ioc *mrioc, memcpy((u8 *)cmdptr->reply, (u8 *)def_reply, mrioc->reply_sz); } + if (sense_buf && cmdptr->sensebuf) { + cmdptr->is_sense = 1; + memcpy(cmdptr->sensebuf, sense_buf, + MPI3MR_SENSE_BUF_SZ); + } if (cmdptr->is_waiting) { complete(&cmdptr->done); cmdptr->is_waiting = 0; From 25cb64ecc3845a28f9e1f0a40fe9472dfd4e526b Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 12 Jun 2023 11:13:39 +0200 Subject: [PATCH 117/224] s390/zcrypt: do not retry administrative requests commit af40322e90d4e0093569eceb7d3a28ab635f3e75 upstream. All kind of administrative requests should not been retried. Some card firmware detects this and assumes a replay attack. This patch checks on failure if the low level functions indicate a retry (EAGAIN) and checks for the ADMIN flag set on the request message. If this both are true, the response code for this message is changed to EIO to make sure the zcrypt API layer does not attempt to retry the request. As of now the ADMIN flag is set for a request message when - for EP11 the field 'flags' of the EP11 CPRB struct has the leftmost bit set. - for CCA when the CPRB minor version is 'T3', 'T5', 'T6' or 'T7'. Please note that the do-not-retry only applies to a request which has been sent to the card (= has been successfully enqueued) but the reply indicates some kind of failure and by default it would be replied. It is totally fine to retry a request if a previous attempt to enqueue the msg into the firmware queue had some kind of failure and thus the card has never seen this request. Reported-by: Frank Uhlig Signed-off-by: Harald Freudenberger Reviewed-by: Holger Dengler Cc: stable@vger.kernel.org Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- drivers/s390/crypto/zcrypt_msgtype6.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c index 5ad251477593..f99a9ef42116 100644 --- a/drivers/s390/crypto/zcrypt_msgtype6.c +++ b/drivers/s390/crypto/zcrypt_msgtype6.c @@ -1188,6 +1188,9 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq, ap_cancel_message(zq->queue, ap_msg); } + if (rc == -EAGAIN && ap_msg->flags & AP_MSG_FLAG_ADMIN) + rc = -EIO; /* do not retry administrative requests */ + out: if (rc) ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n", @@ -1308,6 +1311,9 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue * ap_cancel_message(zq->queue, ap_msg); } + if (rc == -EAGAIN && ap_msg->flags & AP_MSG_FLAG_ADMIN) + rc = -EIO; /* do not retry administrative requests */ + out: if (rc) ZCRYPT_DBF_DBG("%s send cprb at dev=%02x.%04x rc=%d\n", From 3367d4be9b192afc6f1e3bd386ef0aab17ea3630 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Wed, 14 Jun 2023 09:42:53 +0200 Subject: [PATCH 118/224] PCI/PM: Avoid putting EloPOS E2/S2/H2 PCIe Ports in D3cold commit 9e30fd26f43b89cb6b4e850a86caa2e50dedb454 upstream. The quirk for Elo i2 introduced in commit 92597f97a40b ("PCI/PM: Avoid putting Elo i2 PCIe Ports in D3cold") is also needed by EloPOS E2/S2/H2 which uses the same Continental Z2 board. Change the quirk to match the board instead of system. Link: https://bugzilla.kernel.org/show_bug.cgi?id=215715 Link: https://lore.kernel.org/r/20230614074253.22318-1-linux@zary.sk Signed-off-by: Ondrej Zary Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 98d841a7b45b..88c437249982 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2942,13 +2942,13 @@ static const struct dmi_system_id bridge_d3_blacklist[] = { { /* * Downstream device is not accessible after putting a root port - * into D3cold and back into D0 on Elo i2. + * into D3cold and back into D0 on Elo Continental Z2 board */ - .ident = "Elo i2", + .ident = "Elo Continental Z2", .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Elo Touch Solutions"), - DMI_MATCH(DMI_PRODUCT_NAME, "Elo i2"), - DMI_MATCH(DMI_PRODUCT_VERSION, "RevB"), + DMI_MATCH(DMI_BOARD_VENDOR, "Elo Touch Solutions"), + DMI_MATCH(DMI_BOARD_NAME, "Geminilake"), + DMI_MATCH(DMI_BOARD_VERSION, "Continental Z2"), }, }, #endif From 465c195e86f3d0ffd2e250c4b78a5a1f11cc1b0a Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 25 May 2023 16:32:48 +0100 Subject: [PATCH 119/224] PCI: Release resource invalidated by coalescing commit e54223275ba1bc6f704a6bab015fcd2ae4f72572 upstream. When contiguous windows are coalesced by pci_register_host_bridge(), the second resource is expanded to include the first, and the first is invalidated and consequently not added to the bus. However, it remains in the resource hierarchy. For example, these windows: fec00000-fec7ffff : PCI Bus 0000:00 fec80000-fecbffff : PCI Bus 0000:00 are coalesced into this, where the first resource remains in the tree with start/end zeroed out: 00000000-00000000 : PCI Bus 0000:00 fec00000-fecbffff : PCI Bus 0000:00 In some cases (e.g. the Xen scratch region), this causes future calls to allocate_resource() to choose an inappropriate location which the caller cannot handle. Fix by releasing the zeroed-out resource and removing it from the resource hierarchy. [bhelgaas: commit log] Fixes: 7c3855c423b1 ("PCI: Coalesce host bridge contiguous apertures") Link: https://lore.kernel.org/r/20230525153248.712779-1-ross.lagerwall@citrix.com Signed-off-by: Ross Lagerwall Signed-off-by: Bjorn Helgaas Cc: stable@vger.kernel.org # v5.16+ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/probe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 90e676439170..7170516298b0 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -994,8 +994,10 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) resource_list_for_each_entry_safe(window, n, &resources) { offset = window->offset; res = window->res; - if (!res->flags && !res->start && !res->end) + if (!res->flags && !res->start && !res->end) { + release_resource(res); continue; + } list_move_tail(&window->node, &bridge->windows); From c459365ec7ba182f8ed3fe18080ad19e606c67df Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 7 Jun 2023 18:18:47 +0100 Subject: [PATCH 120/224] PCI: Add function 1 DMA alias quirk for Marvell 88SE9235 commit 88d341716b83abd355558523186ca488918627ee upstream. Marvell's own product brief implies the 92xx series are a closely related family, and sure enough it turns out that 9235 seems to need the same quirk as the other three, although possibly only when certain ports are used. Link: https://lore.kernel.org/linux-iommu/2a699a99-545c-1324-e052-7d2f41fed1ae@yahoo.co.uk/ Link: https://lore.kernel.org/r/731507e05d70239aec96fcbfab6e65d8ce00edd2.1686157165.git.robin.murphy@arm.com Reported-by: Jason Adriaanse Signed-off-by: Robin Murphy Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index ccc90656130a..472fa2c8ebce 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4174,6 +4174,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9220, /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c49 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9230, quirk_dma_func1_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9235, + quirk_dma_func1_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0642, quirk_dma_func1_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TTI, 0x0645, From cf0d7b72707dcaa21e4c685721072d1234a87162 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Mon, 19 Jun 2023 20:34:00 +0530 Subject: [PATCH 121/224] PCI: qcom: Disable write access to read only registers for IP v2.3.3 commit a33d700e8eea76c62120cb3dbf5e01328f18319a upstream. In the post init sequence of v2.9.0, write access to read only registers are not disabled after updating the registers. Fix it by disabling the access after register update. Link: https://lore.kernel.org/r/20230619150408.8468-2-manivannan.sadhasivam@linaro.org Fixes: 5d76117f070d ("PCI: qcom: Add support for IPQ8074 PCIe controller") Signed-off-by: Manivannan Sadhasivam Signed-off-by: Lorenzo Pieralisi Cc: Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/dwc/pcie-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 49905b2a9960..d24712a76ba7 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -1176,6 +1176,8 @@ static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie) writel(PCI_EXP_DEVCTL2_COMP_TMOUT_DIS, pci->dbi_base + offset + PCI_EXP_DEVCTL2); + dw_pcie_dbi_ro_wr_dis(pci); + return 0; } From bcd276f1431e058bafcbb5a85cc0034607f9db1b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 15 Apr 2023 11:35:28 +0900 Subject: [PATCH 122/224] PCI: epf-test: Fix DMA transfer completion initialization commit 4aca56f8eae8aa44867ddd6aa107e06f7613226f upstream. Reinitialize the transfer_complete DMA transfer completion before calling tx_submit(), to avoid seeing the DMA transfer complete before the completion is initialized, thus potentially losing the completion notification. Link: https://lore.kernel.org/r/20230415023542.77601-4-dlemoal@kernel.org Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities") Signed-off-by: Damien Le Moal Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/endpoint/functions/pci-epf-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index f0c4d0f77453..bf12354b18d5 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -151,10 +151,10 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test, return -EIO; } + reinit_completion(&epf_test->transfer_complete); tx->callback = pci_epf_test_dma_callback; tx->callback_param = epf_test; cookie = tx->tx_submit(tx); - reinit_completion(&epf_test->transfer_complete); ret = dma_submit_error(cookie); if (ret) { From 07d997ef1052c0df803ffbf48342c11eee562b48 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 15 Apr 2023 11:35:29 +0900 Subject: [PATCH 123/224] PCI: epf-test: Fix DMA transfer completion detection commit 933f31a2fe1f20e5b1ee065579f652cd1b317183 upstream. pci_epf_test_data_transfer() and pci_epf_test_dma_callback() are not handling DMA transfer completion correctly, leading to completion notifications to the RC side that are too early. This problem can be detected when the RC side is running an IOMMU with messages such as: pci-endpoint-test 0000:0b:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x001c address=0xfff00000 flags=0x0000] When running the pcitest.sh tests: the address used for a previous test transfer generates the above error while the next test transfer is running. Fix this by testing the DMA transfer status in pci_epf_test_dma_callback() and notifying the completion only when the transfer status is DMA_COMPLETE or DMA_ERROR. Furthermore, in pci_epf_test_data_transfer(), be paranoid and check again the transfer status and always call dmaengine_terminate_sync() before returning. Link: https://lore.kernel.org/r/20230415023542.77601-5-dlemoal@kernel.org Fixes: 8353813c88ef ("PCI: endpoint: Enable DMA tests for endpoints with DMA capabilities") Signed-off-by: Damien Le Moal Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/endpoint/functions/pci-epf-test.c | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index bf12354b18d5..2e8a4de2abab 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -54,6 +54,9 @@ struct pci_epf_test { struct delayed_work cmd_handler; struct dma_chan *dma_chan_tx; struct dma_chan *dma_chan_rx; + struct dma_chan *transfer_chan; + dma_cookie_t transfer_cookie; + enum dma_status transfer_status; struct completion transfer_complete; bool dma_supported; bool dma_private; @@ -85,8 +88,14 @@ static size_t bar_size[] = { 512, 512, 1024, 16384, 131072, 1048576 }; static void pci_epf_test_dma_callback(void *param) { struct pci_epf_test *epf_test = param; + struct dma_tx_state state; - complete(&epf_test->transfer_complete); + epf_test->transfer_status = + dmaengine_tx_status(epf_test->transfer_chan, + epf_test->transfer_cookie, &state); + if (epf_test->transfer_status == DMA_COMPLETE || + epf_test->transfer_status == DMA_ERROR) + complete(&epf_test->transfer_complete); } /** @@ -120,7 +129,6 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test, struct dma_async_tx_descriptor *tx; struct dma_slave_config sconf = {}; struct device *dev = &epf->dev; - dma_cookie_t cookie; int ret; if (IS_ERR_OR_NULL(chan)) { @@ -152,25 +160,33 @@ static int pci_epf_test_data_transfer(struct pci_epf_test *epf_test, } reinit_completion(&epf_test->transfer_complete); + epf_test->transfer_chan = chan; tx->callback = pci_epf_test_dma_callback; tx->callback_param = epf_test; - cookie = tx->tx_submit(tx); + epf_test->transfer_cookie = tx->tx_submit(tx); - ret = dma_submit_error(cookie); + ret = dma_submit_error(epf_test->transfer_cookie); if (ret) { - dev_err(dev, "Failed to do DMA tx_submit %d\n", cookie); - return -EIO; + dev_err(dev, "Failed to do DMA tx_submit %d\n", ret); + goto terminate; } dma_async_issue_pending(chan); ret = wait_for_completion_interruptible(&epf_test->transfer_complete); if (ret < 0) { - dmaengine_terminate_sync(chan); - dev_err(dev, "DMA wait_for_completion_timeout\n"); - return -ETIMEDOUT; + dev_err(dev, "DMA wait_for_completion interrupted\n"); + goto terminate; } - return 0; + if (epf_test->transfer_status == DMA_ERROR) { + dev_err(dev, "DMA transfer failed\n"); + ret = -EIO; + } + +terminate: + dmaengine_terminate_sync(chan); + + return ret; } struct epf_dma_filter { From b2e2ffbfd341d449d3d0552eb3423d9eda94dacd Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Tue, 18 Apr 2023 09:46:50 +0200 Subject: [PATCH 124/224] PCI: rockchip: Assert PCI Configuration Enable bit after probe commit f397fd4ac1fa3afcabd8cee030f953ccaed2a364 upstream. Assert PCI Configuration Enable bit after probe. When this bit is left to 0 in the endpoint mode, the RK3399 PCIe endpoint core will generate configuration request retry status (CRS) messages back to the root complex. Assert this bit after probe to allow the RK3399 PCIe endpoint core to reply to configuration requests from the root complex. This is documented in section 17.5.8.1.2 of the RK3399 TRM. Link: https://lore.kernel.org/r/20230418074700.1083505-4-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Tested-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip-ep.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index d1a200b93b2b..19af5819c616 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -631,6 +631,9 @@ static int rockchip_pcie_ep_probe(struct platform_device *pdev) ep->irq_pci_addr = ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR; + rockchip_pcie_write(rockchip, PCIE_CLIENT_CONF_ENABLE, + PCIE_CLIENT_CONFIG); + return 0; err_epc_mem_exit: pci_epc_mem_exit(epc); From 05f55f7530e2e44218cb9d5054fee70bc9b5ec1e Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Tue, 18 Apr 2023 09:46:49 +0200 Subject: [PATCH 125/224] PCI: rockchip: Write PCI Device ID to correct register commit 1f1c42ece18de365c976a060f3c8eb481b038e3a upstream. Write PCI Device ID (DID) to the correct register. The Device ID was not updated through the correct register. Device ID was written to a read-only register and therefore did not work. The Device ID is now set through the correct register. This is documented in the RK3399 TRM section 17.6.6.1.1 Link: https://lore.kernel.org/r/20230418074700.1083505-3-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Tested-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip-ep.c | 6 ++++-- drivers/pci/controller/pcie-rockchip.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 19af5819c616..e4cbd593e19a 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -125,6 +125,7 @@ static void rockchip_pcie_prog_ep_ob_atu(struct rockchip_pcie *rockchip, u8 fn, static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn, struct pci_epf_header *hdr) { + u32 reg; struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; @@ -137,8 +138,9 @@ static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn, PCIE_CORE_CONFIG_VENDOR); } - rockchip_pcie_write(rockchip, hdr->deviceid << 16, - ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + PCI_VENDOR_ID); + reg = rockchip_pcie_read(rockchip, PCIE_EP_CONFIG_DID_VID); + reg = (reg & 0xFFFF) | (hdr->deviceid << 16); + rockchip_pcie_write(rockchip, reg, PCIE_EP_CONFIG_DID_VID); rockchip_pcie_write(rockchip, hdr->revid | diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 32c3a859c26b..51a123e5c0cf 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -133,6 +133,8 @@ #define PCIE_RC_RP_ATS_BASE 0x400000 #define PCIE_RC_CONFIG_NORMAL_BASE 0x800000 #define PCIE_RC_CONFIG_BASE 0xa00000 +#define PCIE_EP_CONFIG_BASE 0xa00000 +#define PCIE_EP_CONFIG_DID_VID (PCIE_EP_CONFIG_BASE + 0x00) #define PCIE_RC_CONFIG_RID_CCR (PCIE_RC_CONFIG_BASE + 0x08) #define PCIE_RC_CONFIG_DCR (PCIE_RC_CONFIG_BASE + 0xc4) #define PCIE_RC_CONFIG_DCR_CSPL_SHIFT 18 From dfd20ebcae84600113c42e50efc342fb78862fab Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Tue, 18 Apr 2023 09:46:51 +0200 Subject: [PATCH 126/224] PCI: rockchip: Add poll and timeout to wait for PHY PLLs to be locked commit 9dd3c7c4c8c3f7f010d9cdb7c3f42506d93c9527 upstream. The RK3399 PCIe controller should wait until the PHY PLLs are locked. Add poll and timeout to wait for PHY PLLs to be locked. If they cannot be locked generate error message and jump to error handler. Accessing registers in the PHY clock domain when PLLs are not locked causes hang The PHY PLLs status is checked through a side channel register. This is documented in the TRM section 17.5.8.1 "PCIe Initialization Sequence". Link: https://lore.kernel.org/r/20230418074700.1083505-5-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Tested-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip.c | 17 +++++++++++++++++ drivers/pci/controller/pcie-rockchip.h | 2 ++ 2 files changed, 19 insertions(+) diff --git a/drivers/pci/controller/pcie-rockchip.c b/drivers/pci/controller/pcie-rockchip.c index 990a00e08bc5..1aa84035a8bc 100644 --- a/drivers/pci/controller/pcie-rockchip.c +++ b/drivers/pci/controller/pcie-rockchip.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -153,6 +154,12 @@ int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) } EXPORT_SYMBOL_GPL(rockchip_pcie_parse_dt); +#define rockchip_pcie_read_addr(addr) rockchip_pcie_read(rockchip, addr) +/* 100 ms max wait time for PHY PLLs to lock */ +#define RK_PHY_PLL_LOCK_TIMEOUT_US 100000 +/* Sleep should be less than 20ms */ +#define RK_PHY_PLL_LOCK_SLEEP_US 1000 + int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) { struct device *dev = rockchip->dev; @@ -254,6 +261,16 @@ int rockchip_pcie_init_port(struct rockchip_pcie *rockchip) } } + err = readx_poll_timeout(rockchip_pcie_read_addr, + PCIE_CLIENT_SIDE_BAND_STATUS, + regs, !(regs & PCIE_CLIENT_PHY_ST), + RK_PHY_PLL_LOCK_SLEEP_US, + RK_PHY_PLL_LOCK_TIMEOUT_US); + if (err) { + dev_err(dev, "PHY PLLs could not lock, %d\n", err); + goto err_power_off_phy; + } + /* * Please don't reorder the deassert sequence of the following * four reset pins. diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index 51a123e5c0cf..f3a5ff1cf7f4 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -38,6 +38,8 @@ #define PCIE_CLIENT_MODE_EP HIWORD_UPDATE(0x0040, 0) #define PCIE_CLIENT_GEN_SEL_1 HIWORD_UPDATE(0x0080, 0) #define PCIE_CLIENT_GEN_SEL_2 HIWORD_UPDATE_BIT(0x0080) +#define PCIE_CLIENT_SIDE_BAND_STATUS (PCIE_CLIENT_BASE + 0x20) +#define PCIE_CLIENT_PHY_ST BIT(12) #define PCIE_CLIENT_DEBUG_OUT_0 (PCIE_CLIENT_BASE + 0x3c) #define PCIE_CLIENT_DEBUG_LTSSM_MASK GENMASK(5, 0) #define PCIE_CLIENT_DEBUG_LTSSM_L1 0x18 From 1a48294ade5ce8029c6eef539c884bfe8b5d0fb2 Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Tue, 18 Apr 2023 09:46:54 +0200 Subject: [PATCH 127/224] PCI: rockchip: Fix legacy IRQ generation for RK3399 PCIe endpoint core commit 166e89d99dd85a856343cca51eee781b793801f2 upstream. Fix legacy IRQ generation for RK3399 PCIe endpoint core according to the technical reference manual (TRM). Assert and deassert legacy interrupt (INTx) through the legacy interrupt control register ("PCIE_CLIENT_LEGACY_INT_CTRL") instead of manually generating a PCIe message. The generation of the legacy interrupt was tested and validated with the PCIe endpoint test driver. Link: https://lore.kernel.org/r/20230418074700.1083505-8-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Tested-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip-ep.c | 45 ++++++----------------- drivers/pci/controller/pcie-rockchip.h | 6 ++- 2 files changed, 16 insertions(+), 35 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index e4cbd593e19a..0186cc13d71e 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -347,48 +347,25 @@ static int rockchip_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn) } static void rockchip_pcie_ep_assert_intx(struct rockchip_pcie_ep *ep, u8 fn, - u8 intx, bool is_asserted) + u8 intx, bool do_assert) { struct rockchip_pcie *rockchip = &ep->rockchip; - u32 r = ep->max_regions - 1; - u32 offset; - u32 status; - u8 msg_code; - - if (unlikely(ep->irq_pci_addr != ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR || - ep->irq_pci_fn != fn)) { - rockchip_pcie_prog_ep_ob_atu(rockchip, fn, r, - AXI_WRAPPER_NOR_MSG, - ep->irq_phys_addr, 0, 0); - ep->irq_pci_addr = ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR; - ep->irq_pci_fn = fn; - } intx &= 3; - if (is_asserted) { + + if (do_assert) { ep->irq_pending |= BIT(intx); - msg_code = ROCKCHIP_PCIE_MSG_CODE_ASSERT_INTA + intx; + rockchip_pcie_write(rockchip, + PCIE_CLIENT_INT_IN_ASSERT | + PCIE_CLIENT_INT_PEND_ST_PEND, + PCIE_CLIENT_LEGACY_INT_CTRL); } else { ep->irq_pending &= ~BIT(intx); - msg_code = ROCKCHIP_PCIE_MSG_CODE_DEASSERT_INTA + intx; + rockchip_pcie_write(rockchip, + PCIE_CLIENT_INT_IN_DEASSERT | + PCIE_CLIENT_INT_PEND_ST_NORMAL, + PCIE_CLIENT_LEGACY_INT_CTRL); } - - status = rockchip_pcie_read(rockchip, - ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + - ROCKCHIP_PCIE_EP_CMD_STATUS); - status &= ROCKCHIP_PCIE_EP_CMD_STATUS_IS; - - if ((status != 0) ^ (ep->irq_pending != 0)) { - status ^= ROCKCHIP_PCIE_EP_CMD_STATUS_IS; - rockchip_pcie_write(rockchip, status, - ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + - ROCKCHIP_PCIE_EP_CMD_STATUS); - } - - offset = - ROCKCHIP_PCIE_MSG_ROUTING(ROCKCHIP_PCIE_MSG_ROUTING_LOCAL_INTX) | - ROCKCHIP_PCIE_MSG_CODE(msg_code) | ROCKCHIP_PCIE_MSG_NO_DATA; - writel(0, ep->irq_cpu_addr + offset); } static int rockchip_pcie_ep_send_legacy_irq(struct rockchip_pcie_ep *ep, u8 fn, diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index f3a5ff1cf7f4..ffc68a3a5fee 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -38,6 +38,11 @@ #define PCIE_CLIENT_MODE_EP HIWORD_UPDATE(0x0040, 0) #define PCIE_CLIENT_GEN_SEL_1 HIWORD_UPDATE(0x0080, 0) #define PCIE_CLIENT_GEN_SEL_2 HIWORD_UPDATE_BIT(0x0080) +#define PCIE_CLIENT_LEGACY_INT_CTRL (PCIE_CLIENT_BASE + 0x0c) +#define PCIE_CLIENT_INT_IN_ASSERT HIWORD_UPDATE_BIT(0x0002) +#define PCIE_CLIENT_INT_IN_DEASSERT HIWORD_UPDATE(0x0002, 0) +#define PCIE_CLIENT_INT_PEND_ST_PEND HIWORD_UPDATE_BIT(0x0001) +#define PCIE_CLIENT_INT_PEND_ST_NORMAL HIWORD_UPDATE(0x0001, 0) #define PCIE_CLIENT_SIDE_BAND_STATUS (PCIE_CLIENT_BASE + 0x20) #define PCIE_CLIENT_PHY_ST BIT(12) #define PCIE_CLIENT_DEBUG_OUT_0 (PCIE_CLIENT_BASE + 0x3c) @@ -227,7 +232,6 @@ #define ROCKCHIP_PCIE_EP_MSI_CTRL_ME BIT(16) #define ROCKCHIP_PCIE_EP_MSI_CTRL_MASK_MSI_CAP BIT(24) #define ROCKCHIP_PCIE_EP_DUMMY_IRQ_ADDR 0x1 -#define ROCKCHIP_PCIE_EP_PCI_LEGACY_IRQ_ADDR 0x3 #define ROCKCHIP_PCIE_EP_FUNC_BASE(fn) (((fn) << 12) & GENMASK(19, 12)) #define ROCKCHIP_PCIE_AT_IB_EP_FUNC_BAR_ADDR0(fn, bar) \ (PCIE_RC_RP_ATS_BASE + 0x0840 + (fn) * 0x0040 + (bar) * 0x0008) From 5b15ebec56979d03e14c3b4e09b7a77f932fdd17 Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Tue, 18 Apr 2023 09:46:56 +0200 Subject: [PATCH 128/224] PCI: rockchip: Use u32 variable to access 32-bit registers commit 8962b2cb39119cbda4fc69a1f83957824f102f81 upstream. Previously u16 variables were used to access 32-bit registers, this resulted in not all of the data being read from the registers. Also the left shift of more than 16-bits would result in moving data out of the variable. Use u32 variables to access 32-bit registers Link: https://lore.kernel.org/r/20230418074700.1083505-10-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Tested-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Reviewed-by: Damien Le Moal Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip-ep.c | 10 +++++----- drivers/pci/controller/pcie-rockchip.h | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 0186cc13d71e..f914075a8d3d 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -314,15 +314,15 @@ static int rockchip_pcie_ep_set_msi(struct pci_epc *epc, u8 fn, u8 vfn, { struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; - u16 flags; + u32 flags; flags = rockchip_pcie_read(rockchip, ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + ROCKCHIP_PCIE_EP_MSI_CTRL_REG); flags &= ~ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_MASK; flags |= - ((multi_msg_cap << 1) << ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET) | - PCI_MSI_FLAGS_64BIT; + (multi_msg_cap << ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET) | + (PCI_MSI_FLAGS_64BIT << ROCKCHIP_PCIE_EP_MSI_FLAGS_OFFSET); flags &= ~ROCKCHIP_PCIE_EP_MSI_CTRL_MASK_MSI_CAP; rockchip_pcie_write(rockchip, flags, ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + @@ -334,7 +334,7 @@ static int rockchip_pcie_ep_get_msi(struct pci_epc *epc, u8 fn, u8 vfn) { struct rockchip_pcie_ep *ep = epc_get_drvdata(epc); struct rockchip_pcie *rockchip = &ep->rockchip; - u16 flags; + u32 flags; flags = rockchip_pcie_read(rockchip, ROCKCHIP_PCIE_EP_FUNC_BASE(fn) + @@ -395,7 +395,7 @@ static int rockchip_pcie_ep_send_msi_irq(struct rockchip_pcie_ep *ep, u8 fn, u8 interrupt_num) { struct rockchip_pcie *rockchip = &ep->rockchip; - u16 flags, mme, data, data_mask; + u32 flags, mme, data, data_mask; u8 msi_count; u64 pci_addr, pci_addr_mask = 0xff; diff --git a/drivers/pci/controller/pcie-rockchip.h b/drivers/pci/controller/pcie-rockchip.h index ffc68a3a5fee..8e92dc3339ec 100644 --- a/drivers/pci/controller/pcie-rockchip.h +++ b/drivers/pci/controller/pcie-rockchip.h @@ -225,6 +225,7 @@ #define ROCKCHIP_PCIE_EP_CMD_STATUS 0x4 #define ROCKCHIP_PCIE_EP_CMD_STATUS_IS BIT(19) #define ROCKCHIP_PCIE_EP_MSI_CTRL_REG 0x90 +#define ROCKCHIP_PCIE_EP_MSI_FLAGS_OFFSET 16 #define ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_OFFSET 17 #define ROCKCHIP_PCIE_EP_MSI_CTRL_MMC_MASK GENMASK(19, 17) #define ROCKCHIP_PCIE_EP_MSI_CTRL_MME_OFFSET 20 From 0813bb2f2cb86d85a19849513f1dec3a744e85e7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 18 Apr 2023 09:46:58 +0200 Subject: [PATCH 129/224] PCI: rockchip: Set address alignment for endpoint mode commit 7e6689b34a815bd379dfdbe9855d36f395ef056c upstream. The address translation unit of the rockchip EP controller does not use the lower 8 bits of a PCIe-space address to map local memory. Thus we must set the align feature field to 256 to let the user know about this constraint. Link: https://lore.kernel.org/r/20230418074700.1083505-12-rick.wertenbroek@gmail.com Fixes: cf590b078391 ("PCI: rockchip: Add EP driver for Rockchip PCIe controller") Signed-off-by: Damien Le Moal Signed-off-by: Rick Wertenbroek Signed-off-by: Lorenzo Pieralisi Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/pci/controller/pcie-rockchip-ep.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index f914075a8d3d..827d91e73efa 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -485,6 +485,7 @@ static const struct pci_epc_features rockchip_pcie_epc_features = { .linkup_notifier = false, .msi_capable = true, .msix_capable = false, + .align = 256, }; static const struct pci_epc_features* From c2dba13bc0c62b79a3cbe4bfe5faa32231bf9b55 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 15 Apr 2023 11:35:39 +0900 Subject: [PATCH 130/224] misc: pci_endpoint_test: Free IRQs before removing the device commit f61b7634a3249d12b9daa36ffbdb9965b6f24c6c upstream. In pci_endpoint_test_remove(), freeing the IRQs after removing the device creates a small race window for IRQs to be received with the test device memory already released, causing the IRQ handler to access invalid memory, resulting in an oops. Free the device IRQs before removing the device to avoid this issue. Link: https://lore.kernel.org/r/20230415023542.77601-15-dlemoal@kernel.org Fixes: e03327122e2c ("pci_endpoint_test: Add 2 ioctl commands") Signed-off-by: Damien Le Moal Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pci_endpoint_test.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 11530b4ec389..e27d471cc847 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -937,6 +937,9 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) if (id < 0) return; + pci_endpoint_test_release_irq(test); + pci_endpoint_test_free_irq_vectors(test); + misc_deregister(&test->miscdev); kfree(misc_device->name); kfree(test->name); @@ -946,9 +949,6 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) pci_iounmap(pdev, test->bar[bar]); } - pci_endpoint_test_release_irq(test); - pci_endpoint_test_free_irq_vectors(test); - pci_release_regions(pdev); pci_disable_device(pdev); } From 7ef181f84ef3ffede5c2a6bffed2e1e2a3d5cf75 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 15 Apr 2023 11:35:40 +0900 Subject: [PATCH 131/224] misc: pci_endpoint_test: Re-init completion for every test commit fb620ae73b70c2f57b9d3e911fc24c024ba2324f upstream. The irq_raised completion used to detect the end of a test case is initialized when the test device is probed, but never reinitialized again before a test case. As a result, the irq_raised completion synchronization is effective only for the first ioctl test case executed. Any subsequent call to wait_for_completion() by another ioctl() call will immediately return, potentially too early, leading to false positive failures. Fix this by reinitializing the irq_raised completion before starting a new ioctl() test command. Link: https://lore.kernel.org/r/20230415023542.77601-16-dlemoal@kernel.org Fixes: 2c156ac71c6b ("misc: Add host side PCI driver for PCI test function device") Signed-off-by: Damien Le Moal Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Manivannan Sadhasivam Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/pci_endpoint_test.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index e27d471cc847..d1e2f22537db 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -728,6 +728,10 @@ static long pci_endpoint_test_ioctl(struct file *file, unsigned int cmd, struct pci_dev *pdev = test->pdev; mutex_lock(&test->mutex); + + reinit_completion(&test->irq_raised); + test->last_irq = -ENODATA; + switch (cmd) { case PCITEST_BAR: bar = arg; From 31df8b9609f392493f62ce5d2e38ee744465aea4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 26 May 2023 11:16:45 +0200 Subject: [PATCH 132/224] mfd: pm8008: Fix module autoloading commit d420c9886f5369697047b880221789bf0054e438 upstream. Add the missing module device table alias to that the driver can be autoloaded when built as a module. Cc: stable@vger.kernel.org # 5.14 Fixes: 6b149f3310a4 ("mfd: pm8008: Add driver for QCOM PM8008 PMIC") Signed-off-by: Johan Hovold Reviewed-by: Konrad Dybcio Signed-off-by: Lee Jones Link: https://lore.kernel.org/r/20230526091646.17318-2-johan+linaro@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/qcom-pm8008.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mfd/qcom-pm8008.c b/drivers/mfd/qcom-pm8008.c index 9f3c4a01b4c1..4af1d368c321 100644 --- a/drivers/mfd/qcom-pm8008.c +++ b/drivers/mfd/qcom-pm8008.c @@ -233,6 +233,7 @@ static const struct of_device_id pm8008_match[] = { { .compatible = "qcom,pm8008", }, { }, }; +MODULE_DEVICE_TABLE(of, pm8008_match); static struct i2c_driver pm8008_mfd_driver = { .driver = { From e30128926a0f89c04646ecedb0ca3702b04555e1 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Fri, 23 Jun 2023 14:05:23 -0400 Subject: [PATCH 133/224] md/raid0: add discard support for the 'original' layout commit e836007089ba8fdf24e636ef2b007651fb4582e6 upstream. We've found that using raid0 with the 'original' layout and discard enabled with different disk sizes (such that at least two zones are created) can result in data corruption. This is due to the fact that the discard handling in 'raid0_handle_discard()' assumes the 'alternate' layout. We've seen this corruption using ext4 but other filesystems are likely susceptible as well. More specifically, while multiple zones are necessary to create the corruption, the corruption may not occur with multiple zones if they layout in such a way the layout matches what the 'alternate' layout would have produced. Thus, not all raid0 devices with the 'original' layout, different size disks and discard enabled will encounter this corruption. The 3.14 kernel inadvertently changed the raid0 disk layout for different size disks. Thus, running a pre-3.14 kernel and post-3.14 kernel on the same raid0 array could corrupt data. This lead to the creation of the 'original' layout (to match the pre-3.14 layout) and the 'alternate' layout (to match the post 3.14 layout) in the 5.4 kernel time frame and an option to tell the kernel which layout to use (since it couldn't be autodetected). However, when the 'original' layout was added back to 5.4 discard support for the 'original' layout was not added leading this issue. I've been able to reliably reproduce the corruption with the following test case: 1. create raid0 array with different size disks using original layout 2. mkfs 3. mount -o discard 4. create lots of files 5. remove 1/2 the files 6. fstrim -a (or just the mount point for the raid0 array) 7. umount 8. fsck -fn /dev/md0 (spews all sorts of corruptions) Let's fix this by adding proper discard support to the 'original' layout. The fix 'maps' the 'original' layout disks to the order in which they are read/written such that we can compare the disks in the same way that the current 'alternate' layout does. A 'disk_shift' field is added to 'struct strip_zone'. This could be computed on the fly in raid0_handle_discard() but by adding this field, we save some computation in the discard path. Note we could also potentially fix this by re-ordering the disks in the zones that follow the first one, and then always read/writing them using the 'alternate' layout. However, that is seen as a more substantial change, and we are attempting the least invasive fix at this time to remedy the corruption. I've verified the change using the reproducer mentioned above. Typically, the corruption is seen after less than 3 iterations, while the patch has run 500+ iterations. Cc: NeilBrown Cc: Song Liu Fixes: c84a1372df92 ("md/raid0: avoid RAID0 data corruption due to layout confusion.") Cc: stable@vger.kernel.org Signed-off-by: Jason Baron Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20230623180523.1901230-1-jbaron@akamai.com Signed-off-by: Greg Kroah-Hartman --- drivers/md/raid0.c | 62 ++++++++++++++++++++++++++++++++++++++++------ drivers/md/raid0.h | 1 + 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index b536befd8898..0f7c3b3c62b2 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -270,6 +270,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) goto abort; } + if (conf->layout == RAID0_ORIG_LAYOUT) { + for (i = 1; i < conf->nr_strip_zones; i++) { + sector_t first_sector = conf->strip_zone[i-1].zone_end; + + sector_div(first_sector, mddev->chunk_sectors); + zone = conf->strip_zone + i; + /* disk_shift is first disk index used in the zone */ + zone->disk_shift = sector_div(first_sector, + zone->nb_dev); + } + } + pr_debug("md/raid0:%s: done.\n", mdname(mddev)); *private_conf = conf; @@ -431,6 +443,20 @@ exit_acct_set: return ret; } +/* + * Convert disk_index to the disk order in which it is read/written. + * For example, if we have 4 disks, they are numbered 0,1,2,3. If we + * write the disks starting at disk 3, then the read/write order would + * be disk 3, then 0, then 1, and then disk 2 and we want map_disk_shift() + * to map the disks as follows 0,1,2,3 => 1,2,3,0. So disk 0 would map + * to 1, 1 to 2, 2 to 3, and 3 to 0. That way we can compare disks in + * that 'output' space to understand the read/write disk ordering. + */ +static int map_disk_shift(int disk_index, int num_disks, int disk_shift) +{ + return ((disk_index + num_disks - disk_shift) % num_disks); +} + static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) { struct r0conf *conf = mddev->private; @@ -444,7 +470,9 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) sector_t end_disk_offset; unsigned int end_disk_index; unsigned int disk; + sector_t orig_start, orig_end; + orig_start = start; zone = find_zone(conf, &start); if (bio_end_sector(bio) > zone->zone_end) { @@ -458,6 +486,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) } else end = bio_end_sector(bio); + orig_end = end; if (zone != conf->strip_zone) end = end - zone[-1].zone_end; @@ -469,13 +498,26 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) last_stripe_index = end; sector_div(last_stripe_index, stripe_size); - start_disk_index = (int)(start - first_stripe_index * stripe_size) / - mddev->chunk_sectors; + /* In the first zone the original and alternate layouts are the same */ + if ((conf->layout == RAID0_ORIG_LAYOUT) && (zone != conf->strip_zone)) { + sector_div(orig_start, mddev->chunk_sectors); + start_disk_index = sector_div(orig_start, zone->nb_dev); + start_disk_index = map_disk_shift(start_disk_index, + zone->nb_dev, + zone->disk_shift); + sector_div(orig_end, mddev->chunk_sectors); + end_disk_index = sector_div(orig_end, zone->nb_dev); + end_disk_index = map_disk_shift(end_disk_index, + zone->nb_dev, zone->disk_shift); + } else { + start_disk_index = (int)(start - first_stripe_index * stripe_size) / + mddev->chunk_sectors; + end_disk_index = (int)(end - last_stripe_index * stripe_size) / + mddev->chunk_sectors; + } start_disk_offset = ((int)(start - first_stripe_index * stripe_size) % mddev->chunk_sectors) + first_stripe_index * mddev->chunk_sectors; - end_disk_index = (int)(end - last_stripe_index * stripe_size) / - mddev->chunk_sectors; end_disk_offset = ((int)(end - last_stripe_index * stripe_size) % mddev->chunk_sectors) + last_stripe_index * mddev->chunk_sectors; @@ -483,18 +525,22 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) for (disk = 0; disk < zone->nb_dev; disk++) { sector_t dev_start, dev_end; struct md_rdev *rdev; + int compare_disk; - if (disk < start_disk_index) + compare_disk = map_disk_shift(disk, zone->nb_dev, + zone->disk_shift); + + if (compare_disk < start_disk_index) dev_start = (first_stripe_index + 1) * mddev->chunk_sectors; - else if (disk > start_disk_index) + else if (compare_disk > start_disk_index) dev_start = first_stripe_index * mddev->chunk_sectors; else dev_start = start_disk_offset; - if (disk < end_disk_index) + if (compare_disk < end_disk_index) dev_end = (last_stripe_index + 1) * mddev->chunk_sectors; - else if (disk > end_disk_index) + else if (compare_disk > end_disk_index) dev_end = last_stripe_index * mddev->chunk_sectors; else dev_end = end_disk_offset; diff --git a/drivers/md/raid0.h b/drivers/md/raid0.h index 3816e5477db1..8cc761ca7423 100644 --- a/drivers/md/raid0.h +++ b/drivers/md/raid0.h @@ -6,6 +6,7 @@ struct strip_zone { sector_t zone_end; /* Start of the next zone (in sectors) */ sector_t dev_start; /* Zone offset in real dev (in sectors) */ int nb_dev; /* # of devices attached to the zone */ + int disk_shift; /* start disk for the original layout */ }; /* Linux 3.14 (20d0189b101) made an unintended change to From be19cb67165143212c6e136de200d26dce5b5ee6 Mon Sep 17 00:00:00 2001 From: Peter Korsgaard Date: Wed, 16 Nov 2022 07:16:56 +0100 Subject: [PATCH 134/224] dm init: add dm-mod.waitfor to wait for asynchronously probed block devices commit 035641b01e72af4f6c6cf22a4bdb5d7dfc4e8e8e upstream. Just calling wait_for_device_probe() is not enough to ensure that asynchronously probed block devices are available (E.G. mmc, usb), so add a "dm-mod.waitfor=[,..,]" parameter to get dm-init to explicitly wait for specific block devices before initializing the tables with logic similar to the rootwait logic that was introduced with commit cc1ed7542c8c ("init: wait for asynchronously scanned block devices"). E.G. with dm-verity on mmc using: dm-mod.waitfor="PARTLABEL=hash-a,PARTLABEL=root-a" [ 0.671671] device-mapper: init: waiting for all devices to be available before creating mapped devices [ 0.671679] device-mapper: init: waiting for device PARTLABEL=hash-a ... [ 0.710695] mmc0: new HS200 MMC card at address 0001 [ 0.711158] mmcblk0: mmc0:0001 004GA0 3.69 GiB [ 0.715954] mmcblk0boot0: mmc0:0001 004GA0 partition 1 2.00 MiB [ 0.722085] mmcblk0boot1: mmc0:0001 004GA0 partition 2 2.00 MiB [ 0.728093] mmcblk0rpmb: mmc0:0001 004GA0 partition 3 512 KiB, chardev (249:0) [ 0.738274] mmcblk0: p1 p2 p3 p4 p5 p6 p7 [ 0.751282] device-mapper: init: waiting for device PARTLABEL=root-a ... [ 0.751306] device-mapper: init: all devices available [ 0.751683] device-mapper: verity: sha256 using implementation "sha256-generic" [ 0.759344] device-mapper: ioctl: dm-0 (vroot) is ready [ 0.766540] VFS: Mounted root (squashfs filesystem) readonly on device 254:0. Signed-off-by: Peter Korsgaard Signed-off-by: Mike Snitzer Cc: Mark-PK Tsai Signed-off-by: Greg Kroah-Hartman --- .../admin-guide/device-mapper/dm-init.rst | 8 +++++++ drivers/md/dm-init.c | 22 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/device-mapper/dm-init.rst b/Documentation/admin-guide/device-mapper/dm-init.rst index e5242ff17e9b..981d6a907699 100644 --- a/Documentation/admin-guide/device-mapper/dm-init.rst +++ b/Documentation/admin-guide/device-mapper/dm-init.rst @@ -123,3 +123,11 @@ Other examples (per target): 0 1638400 verity 1 8:1 8:2 4096 4096 204800 1 sha256 fb1a5a0f00deb908d8b53cb270858975e76cf64105d412ce764225d53b8f3cfd 51934789604d1b92399c52e7cb149d1b3a1b74bbbcb103b2a0aaacbed5c08584 + +For setups using device-mapper on top of asynchronously probed block +devices (MMC, USB, ..), it may be necessary to tell dm-init to +explicitly wait for them to become available before setting up the +device-mapper tables. This can be done with the "dm-mod.waitfor=" +module parameter, which takes a list of devices to wait for:: + + dm-mod.waitfor=[,..,] diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index b0c45c6ebe0b..dc4381d68313 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -18,12 +19,17 @@ #define DM_MAX_DEVICES 256 #define DM_MAX_TARGETS 256 #define DM_MAX_STR_SIZE 4096 +#define DM_MAX_WAITFOR 256 static char *create; +static char *waitfor[DM_MAX_WAITFOR]; + /* * Format: dm-mod.create=,,,,[,
+][;,,,,
[,
+]+] * Table format: + * Block devices to wait for to become available before setting up tables: + * dm-mod.waitfor=[,..,] * * See Documentation/admin-guide/device-mapper/dm-init.rst for dm-mod.create="..." format * details. @@ -266,7 +272,7 @@ static int __init dm_init_init(void) struct dm_device *dev; LIST_HEAD(devices); char *str; - int r; + int i, r; if (!create) return 0; @@ -286,6 +292,17 @@ static int __init dm_init_init(void) DMINFO("waiting for all devices to be available before creating mapped devices"); wait_for_device_probe(); + for (i = 0; i < ARRAY_SIZE(waitfor); i++) { + if (waitfor[i]) { + DMINFO("waiting for device %s ...", waitfor[i]); + while (!dm_get_dev_t(waitfor[i])) + msleep(5); + } + } + + if (waitfor[0]) + DMINFO("all devices available"); + list_for_each_entry(dev, &devices, list) { if (dm_early_create(&dev->dmi, dev->table, dev->target_args_array)) @@ -301,3 +318,6 @@ late_initcall(dm_init_init); module_param(create, charp, 0); MODULE_PARM_DESC(create, "Create a mapped device in early boot"); + +module_param_array(waitfor, charp, NULL, 0); +MODULE_PARM_DESC(waitfor, "Devices to wait for before setting up tables"); From 3346ffdee42b962aa2bb691f7de7e2e4a3db209f Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 19 May 2023 11:21:24 -0400 Subject: [PATCH 135/224] fs: dlm: return positive pid value for F_GETLK commit 92655fbda5c05950a411eaabc19e025e86e2a291 upstream. The GETLK pid values have all been negated since commit 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks"). Revert this for local pids, and leave in place negative pids for remote owners. Cc: stable@vger.kernel.org Fixes: 9d5b86ac13c5 ("fs/locks: Remove fl_nspid and use fs-specific l_pid for remote locks") Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/plock.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 737f185aad8d..b2d3f52dab9b 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -359,7 +359,9 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, locks_init_lock(fl); fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; fl->fl_flags = FL_POSIX; - fl->fl_pid = -op->info.pid; + fl->fl_pid = op->info.pid; + if (op->info.nodeid != dlm_our_nodeid()) + fl->fl_pid = -fl->fl_pid; fl->fl_start = op->info.start; fl->fl_end = op->info.end; rv = 0; From a1b6adf4b1802b8ef3b5ec99e2710294eb786a38 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 19 May 2023 11:21:25 -0400 Subject: [PATCH 136/224] fs: dlm: fix cleanup pending ops when interrupted commit c847f4e203046a2c93d8a1cf0348315c0b655a60 upstream. Immediately clean up a posix lock request if it is interrupted while waiting for a result from user space (dlm_controld.) This largely reverts the recent commit b92a4e3f86b1 ("fs: dlm: change posix lock sigint handling"). That previous commit attempted to defer lock cleanup to the point in time when a result from user space arrived. The deferred approach was not reliable because some dlm plock ops may not receive replies. Cc: stable@vger.kernel.org Fixes: b92a4e3f86b1 ("fs: dlm: change posix lock sigint handling") Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/plock.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index b2d3f52dab9b..d6196abe51a5 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -29,8 +29,6 @@ struct plock_async_data { struct plock_op { struct list_head list; int done; - /* if lock op got interrupted while waiting dlm_controld reply */ - bool sigint; struct dlm_plock_info info; /* if set indicates async handling */ struct plock_async_data *data; @@ -166,12 +164,14 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, spin_unlock(&ops_lock); goto do_lock_wait; } - - op->sigint = true; + list_del(&op->list); spin_unlock(&ops_lock); + log_debug(ls, "%s: wait interrupted %x %llx pid %d", __func__, ls->ls_global_id, (unsigned long long)number, op->info.pid); + do_unlock_close(&op->info); + dlm_release_plock_op(op); goto out; } @@ -433,19 +433,6 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, if (iter->info.fsid == info.fsid && iter->info.number == info.number && iter->info.owner == info.owner) { - if (iter->sigint) { - list_del(&iter->list); - spin_unlock(&ops_lock); - - pr_debug("%s: sigint cleanup %x %llx pid %d", - __func__, iter->info.fsid, - (unsigned long long)iter->info.number, - iter->info.pid); - do_unlock_close(&iter->info); - memcpy(&iter->info, &info, sizeof(info)); - dlm_release_plock_op(iter); - return count; - } list_del_init(&iter->list); memcpy(&iter->info, &info, sizeof(info)); if (iter->data) @@ -464,8 +451,8 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, else wake_up(&recv_wq); } else - log_print("%s: no op %x %llx", __func__, - info.fsid, (unsigned long long)info.number); + pr_debug("%s: no op %x %llx", __func__, + info.fsid, (unsigned long long)info.number); return count; } From 2a37d73395a51238a324a94b64bba734417ff2f8 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 19 May 2023 11:21:26 -0400 Subject: [PATCH 137/224] fs: dlm: interrupt posix locks only when process is killed commit 59e45c758ca1b9893ac923dd63536da946ac333b upstream. If a posix lock request is waiting for a result from user space (dlm_controld), do not let it be interrupted unless the process is killed. This reverts commit a6b1533e9a57 ("dlm: make posix locks interruptible"). The problem with the interruptible change is that all locks were cleared on any signal interrupt. If a signal was received that did not terminate the process, the process could continue running after all its dlm posix locks had been cleared. A future patch will add cancelation to allow proper interruption. Cc: stable@vger.kernel.org Fixes: a6b1533e9a57 ("dlm: make posix locks interruptible") Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/plock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index d6196abe51a5..7754d66ea9d8 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -154,7 +154,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, send_op(op); - rv = wait_event_interruptible(recv_wq, (op->done != 0)); + rv = wait_event_killable(recv_wq, (op->done != 0)); if (rv == -ERESTARTSYS) { spin_lock(&ops_lock); /* recheck under ops_lock if we got a done != 0, From adeaef5a00dcfe023b0ecae3272ef0fd556efbaf Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 19 May 2023 11:21:27 -0400 Subject: [PATCH 138/224] fs: dlm: make F_SETLK use unkillable wait_event commit 0f2b1cb89ccdbdcedf7143f4153a4da700a05f48 upstream. While a non-waiting posix lock request (F_SETLK) is waiting for user space processing (in dlm_controld), wait for that processing to complete with an unkillable wait_event(). This makes F_SETLK behave the same way for F_RDLCK, F_WRLCK and F_UNLCK. F_SETLKW continues to use wait_event_killable(). Cc: stable@vger.kernel.org Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/plock.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 7754d66ea9d8..0df24702125e 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -154,25 +154,29 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, send_op(op); - rv = wait_event_killable(recv_wq, (op->done != 0)); - if (rv == -ERESTARTSYS) { - spin_lock(&ops_lock); - /* recheck under ops_lock if we got a done != 0, - * if so this interrupt case should be ignored - */ - if (op->done != 0) { + if (op->info.wait) { + rv = wait_event_killable(recv_wq, (op->done != 0)); + if (rv == -ERESTARTSYS) { + spin_lock(&ops_lock); + /* recheck under ops_lock if we got a done != 0, + * if so this interrupt case should be ignored + */ + if (op->done != 0) { + spin_unlock(&ops_lock); + goto do_lock_wait; + } + list_del(&op->list); spin_unlock(&ops_lock); - goto do_lock_wait; - } - list_del(&op->list); - spin_unlock(&ops_lock); - log_debug(ls, "%s: wait interrupted %x %llx pid %d", - __func__, ls->ls_global_id, - (unsigned long long)number, op->info.pid); - do_unlock_close(&op->info); - dlm_release_plock_op(op); - goto out; + log_debug(ls, "%s: wait interrupted %x %llx pid %d", + __func__, ls->ls_global_id, + (unsigned long long)number, op->info.pid); + do_unlock_close(&op->info); + dlm_release_plock_op(op); + goto out; + } + } else { + wait_event(recv_wq, (op->done != 0)); } do_lock_wait: From 7adcc32eb5232a9324d150dc786691b403a7d80b Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 24 May 2023 12:02:04 -0400 Subject: [PATCH 139/224] fs: dlm: fix mismatch of plock results from userspace commit 57e2c2f2d94cfd551af91cedfa1af6d972487197 upstream. When a waiting plock request (F_SETLKW) is sent to userspace for processing (dlm_controld), the result is returned at a later time. That result could be incorrectly matched to a different waiting request in cases where the owner field is the same (e.g. different threads in a process.) This is fixed by comparing all the properties in the request and reply. The results for non-waiting plock requests are now matched based on list order because the results are returned in the same order they were sent. Cc: stable@vger.kernel.org Signed-off-by: Alexander Aring Signed-off-by: David Teigland Signed-off-by: Greg Kroah-Hartman --- fs/dlm/plock.c | 58 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 13 deletions(-) diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 0df24702125e..739e7d55c9e3 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -394,7 +394,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, if (op->info.flags & DLM_PLOCK_FL_CLOSE) list_del(&op->list); else - list_move(&op->list, &recv_list); + list_move_tail(&op->list, &recv_list); memcpy(&info, &op->info, sizeof(info)); } spin_unlock(&ops_lock); @@ -432,20 +432,52 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, if (check_version(&info)) return -EINVAL; + /* + * The results for waiting ops (SETLKW) can be returned in any + * order, so match all fields to find the op. The results for + * non-waiting ops are returned in the order that they were sent + * to userspace, so match the result with the first non-waiting op. + */ spin_lock(&ops_lock); - list_for_each_entry(iter, &recv_list, list) { - if (iter->info.fsid == info.fsid && - iter->info.number == info.number && - iter->info.owner == info.owner) { - list_del_init(&iter->list); - memcpy(&iter->info, &info, sizeof(info)); - if (iter->data) - do_callback = 1; - else - iter->done = 1; - op = iter; - break; + if (info.wait) { + list_for_each_entry(iter, &recv_list, list) { + if (iter->info.fsid == info.fsid && + iter->info.number == info.number && + iter->info.owner == info.owner && + iter->info.pid == info.pid && + iter->info.start == info.start && + iter->info.end == info.end && + iter->info.ex == info.ex && + iter->info.wait) { + op = iter; + break; + } } + } else { + list_for_each_entry(iter, &recv_list, list) { + if (!iter->info.wait) { + op = iter; + break; + } + } + } + + if (op) { + /* Sanity check that op and info match. */ + if (info.wait) + WARN_ON(op->info.optype != DLM_PLOCK_OP_LOCK); + else + WARN_ON(op->info.fsid != info.fsid || + op->info.number != info.number || + op->info.owner != info.owner || + op->info.optype != info.optype); + + list_del_init(&op->list); + memcpy(&op->info, &info, sizeof(info)); + if (op->data) + do_callback = 1; + else + op->done = 1; } spin_unlock(&ops_lock); From 6436ca035bccac03d64c13f232f833ee2763d1c5 Mon Sep 17 00:00:00 2001 From: Justin Tee Date: Mon, 17 Apr 2023 12:15:53 -0700 Subject: [PATCH 140/224] scsi: lpfc: Fix double free in lpfc_cmpl_els_logo_acc() caused by lpfc_nlp_not_used() commit 97f975823f8196d970bd795087b514271214677a upstream. Smatch detected a double free path because lpfc_nlp_not_used() releases an ndlp object before reaching lpfc_nlp_put() at the end of lpfc_cmpl_els_logo_acc(). Remove the outdated lpfc_nlp_not_used() routine. In lpfc_mbx_cmpl_ns_reg_login(), replace the call with lpfc_nlp_put(). In lpfc_cmpl_els_logo_acc(), replace the call with lpfc_unreg_rpi() and keep the lpfc_nlp_put() at the end of the routine. If ndlp's rpi was registered, then lpfc_unreg_rpi()'s completion routine performs the final ndlp clean up after lpfc_nlp_put() is called from lpfc_cmpl_els_logo_acc(). Otherwise if ndlp has no rpi registered, the lpfc_nlp_put() at the end of lpfc_cmpl_els_logo_acc() is the final ndlp clean up. Fixes: 4430f7fd09ec ("scsi: lpfc: Rework locations of ndlp reference taking") Cc: # v5.11+ Reported-by: Dan Carpenter Link: https://lore.kernel.org/all/Y3OefhyyJNKH%2Fiaf@kili/ Signed-off-by: Justin Tee Link: https://lore.kernel.org/r/20230417191558.83100-3-justintee8345@gmail.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_crtn.h | 1 - drivers/scsi/lpfc/lpfc_els.c | 30 +++++++----------------------- drivers/scsi/lpfc/lpfc_hbadisc.c | 24 +++--------------------- 3 files changed, 10 insertions(+), 45 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index d2d207791056..3a5650e0e207 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -134,7 +134,6 @@ void lpfc_check_nlp_post_devloss(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp); void lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_iocbq *rspiocb); -int lpfc_nlp_not_used(struct lpfc_nodelist *ndlp); struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_vport *, uint32_t); void lpfc_disc_list_loopmap(struct lpfc_vport *); void lpfc_disc_start(struct lpfc_vport *); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index e21c73a3803e..43ebb41ded59 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -5150,14 +5150,9 @@ lpfc_els_free_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *elsiocb) * * This routine is the completion callback function to the Logout (LOGO) * Accept (ACC) Response ELS command. This routine is invoked to indicate - * the completion of the LOGO process. It invokes the lpfc_nlp_not_used() to - * release the ndlp if it has the last reference remaining (reference count - * is 1). If succeeded (meaning ndlp released), it sets the iocb ndlp - * field to NULL to inform the following lpfc_els_free_iocb() routine no - * ndlp reference count needs to be decremented. Otherwise, the ndlp - * reference use-count shall be decremented by the lpfc_els_free_iocb() - * routine. Finally, the lpfc_els_free_iocb() is invoked to release the - * IOCB data structure. + * the completion of the LOGO process. If the node has transitioned to NPR, + * this routine unregisters the RPI if it is still registered. The + * lpfc_els_free_iocb() is invoked to release the IOCB data structure. **/ static void lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, @@ -5198,19 +5193,9 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, (ndlp->nlp_last_elscmd == ELS_CMD_PLOGI)) goto out; - /* NPort Recovery mode or node is just allocated */ - if (!lpfc_nlp_not_used(ndlp)) { - /* A LOGO is completing and the node is in NPR state. - * Just unregister the RPI because the node is still - * required. - */ + if (ndlp->nlp_flag & NLP_RPI_REGISTERED) lpfc_unreg_rpi(vport, ndlp); - } else { - /* Indicate the node has already released, should - * not reference to it from within lpfc_els_free_iocb. - */ - cmdiocb->ndlp = NULL; - } + } out: /* @@ -5230,9 +5215,8 @@ lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, * RPI (Remote Port Index) mailbox command to the @phba. It simply releases * the associated lpfc Direct Memory Access (DMA) buffer back to the pool and * decrements the ndlp reference count held for this completion callback - * function. After that, it invokes the lpfc_nlp_not_used() to check - * whether there is only one reference left on the ndlp. If so, it will - * perform one more decrement and trigger the release of the ndlp. + * function. After that, it invokes the lpfc_drop_node to check + * whether it is appropriate to release the node. **/ void lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index d38ebd7281b9..549fa7d6c0f6 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4332,13 +4332,14 @@ out: /* If the node is not registered with the scsi or nvme * transport, remove the fabric node. The failed reg_login - * is terminal. + * is terminal and forces the removal of the last node + * reference. */ if (!(ndlp->fc4_xpt_flags & (SCSI_XPT_REGD | NVME_XPT_REGD))) { spin_lock_irq(&ndlp->lock); ndlp->nlp_flag &= ~NLP_NPR_2B_DISC; spin_unlock_irq(&ndlp->lock); - lpfc_nlp_not_used(ndlp); + lpfc_nlp_put(ndlp); } if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) { @@ -6703,25 +6704,6 @@ lpfc_nlp_put(struct lpfc_nodelist *ndlp) return ndlp ? kref_put(&ndlp->kref, lpfc_nlp_release) : 0; } -/* This routine free's the specified nodelist if it is not in use - * by any other discovery thread. This routine returns 1 if the - * ndlp has been freed. A return value of 0 indicates the ndlp is - * not yet been released. - */ -int -lpfc_nlp_not_used(struct lpfc_nodelist *ndlp) -{ - lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE, - "node not used: did:x%x flg:x%x refcnt:x%x", - ndlp->nlp_DID, ndlp->nlp_flag, - kref_read(&ndlp->kref)); - - if (kref_read(&ndlp->kref) == 1) - if (lpfc_nlp_put(ndlp)) - return 1; - return 0; -} - /** * lpfc_fcf_inuse - Check if FCF can be unregistered. * @phba: Pointer to hba context object. From db0a9a29912cdc1834214fe35101a4917f8b9907 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 9 Jan 2023 17:18:16 -0800 Subject: [PATCH 141/224] drm/atomic: Allow vblank-enabled + self-refresh "disable" commit 9d0e3cac3517942a6e00eeecfe583a98715edb16 upstream. The self-refresh helper framework overloads "disable" to sometimes mean "go into self-refresh mode," and this mode activates automatically (e.g., after some period of unchanging display output). In such cases, the display pipe is still considered "on", and user-space is not aware that we went into self-refresh mode. Thus, users may expect that vblank-related features (such as DRM_IOCTL_WAIT_VBLANK) still work properly. However, we trigger the WARN_ONCE() here if a CRTC driver tries to leave vblank enabled. Add a different expectation: that CRTCs *should* leave vblank enabled when going into self-refresh. This patch is preparation for another patch -- "drm/rockchip: vop: Leave vblank enabled in self-refresh" -- which resolves conflicts between the above self-refresh behavior and the API tests in IGT's kms_vblank test module. == Some alternatives discussed: == It's likely that on many display controllers, vblank interrupts will turn off when the CRTC is disabled, and so in some cases, self-refresh may not support vblank. To support such cases, we might consider additions to the generic helpers such that we fire vblank events based on a timer. However, there is currently only one driver using the common self-refresh helpers (i.e., rockchip), and at least as of commit bed030a49f3e ("drm/rockchip: Don't fully disable vop on self refresh"), the CRTC hardware is powered enough to continue to generate vblank interrupts. So we chose the simpler option of leaving vblank interrupts enabled. We can reevaluate this decision and perhaps augment the helpers if/when we gain a second driver that has different requirements. v3: * include discussion summary v2: * add 'ret != 0' warning case for self-refresh * describe failing test case and relation to drm/rockchip patch better Cc: # dependency for "drm/rockchip: vop: Leave # vblank enabled in self-refresh" Signed-off-by: Brian Norris Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230109171809.v3.1.I3904f697863649eb1be540ecca147a66e42bfad7@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic_helper.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 02b4a7dc92f5..202a9990f451 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -1225,7 +1225,16 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state) continue; ret = drm_crtc_vblank_get(crtc); - WARN_ONCE(ret != -EINVAL, "driver forgot to call drm_crtc_vblank_off()\n"); + /* + * Self-refresh is not a true "disable"; ensure vblank remains + * enabled. + */ + if (new_crtc_state->self_refresh_active) + WARN_ONCE(ret != 0, + "driver disabled vblank in self-refresh\n"); + else + WARN_ONCE(ret != -EINVAL, + "driver forgot to call drm_crtc_vblank_off()\n"); if (ret == 0) drm_crtc_vblank_put(crtc); } From c41963e50a55a7ff5a96cec7ef0cc2cea9200fd1 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 9 Jan 2023 17:18:17 -0800 Subject: [PATCH 142/224] drm/rockchip: vop: Leave vblank enabled in self-refresh commit 2bdba9d4a3baa758c2ca7f5b37b35c7b3391dc42 upstream. If we disable vblank when entering self-refresh, vblank APIs (like DRM_IOCTL_WAIT_VBLANK) no longer work. But user space is not aware when we enter self-refresh, so this appears to be an API violation -- that DRM_IOCTL_WAIT_VBLANK fails with EINVAL whenever the display is idle and enters self-refresh. The downstream driver used by many of these systems never used to disable vblank for PSR, and in fact, even upstream, we didn't do that until radically redesigning the state machine in commit 6c836d965bad ("drm/rockchip: Use the helpers for PSR"). Thus, it seems like a reasonable API fix to simply restore that behavior, and leave vblank enabled. Note that this appears to potentially unbalance the drm_crtc_vblank_{off,on}() calls in some cases, but: (a) drm_crtc_vblank_on() documents this as OK and (b) if I do the naive balancing, I find state machine issues such that we're not in sync properly; so it's easier to take advantage of (a). This issue was exposed by IGT's kms_vblank tests, and reported by KernelCI. The bug has been around a while (longer than KernelCI noticed), but was only exposed once self-refresh was bugfixed more recently, and so KernelCI could properly test it. Some other notes in: https://lore.kernel.org/dri-devel/Y6OCg9BPnJvimQLT@google.com/ Re: renesas/master bisection: igt-kms-rockchip.kms_vblank.pipe-A-wait-forked on rk3399-gru-kevin == Backporting notes: == Marking as 'Fixes' commit 6c836d965bad ("drm/rockchip: Use the helpers for PSR"), but it probably depends on commit bed030a49f3e ("drm/rockchip: Don't fully disable vop on self refresh") as well. We also need the previous patch ("drm/atomic: Allow vblank-enabled + self-refresh "disable""), of course. v3: * no update v2: * skip unnecessary lock/unlock Fixes: 6c836d965bad ("drm/rockchip: Use the helpers for PSR") Cc: Reported-by: "kernelci.org bot" Link: https://lore.kernel.org/dri-devel/Y5itf0+yNIQa6fU4@sirena.org.uk/ Signed-off-by: Brian Norris Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20230109171809.v3.2.Ic07cba4ab9a7bd3618a9e4258b8f92ea7d10ae5a@changeid Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index fa1f4ee6d195..9fea03121247 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -717,13 +717,13 @@ static void vop_crtc_atomic_disable(struct drm_crtc *crtc, if (crtc->state->self_refresh_active) rockchip_drm_set_win_enabled(crtc, false); + if (crtc->state->self_refresh_active) + goto out; + mutex_lock(&vop->vop_lock); drm_crtc_vblank_off(crtc); - if (crtc->state->self_refresh_active) - goto out; - /* * Vop standby will take effect at end of current frame, * if dsp hold valid irq happen, it means standby complete. @@ -757,9 +757,9 @@ static void vop_crtc_atomic_disable(struct drm_crtc *crtc, vop_core_clks_disable(vop); pm_runtime_put(vop->dev); -out: mutex_unlock(&vop->vop_lock); +out: if (crtc->state->event && !crtc->state->active) { spin_lock_irq(&crtc->dev->event_lock); drm_crtc_send_vblank_event(crtc, crtc->state->event); From 31fb25ecbba6ebe11dc497952310b986e05dd3a0 Mon Sep 17 00:00:00 2001 From: Dmytro Laktyushkin Date: Tue, 18 Apr 2023 10:11:56 -0400 Subject: [PATCH 143/224] drm/amd/display: fix seamless odm transitions commit 75c2b7ed080d7421157c03064be82275364136e7 upstream. Add missing programming and function pointers Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Stylon Wang Signed-off-by: Dmytro Laktyushkin Reviewed-by: Charlene Liu Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c | 11 +++++++++++ drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c | 2 +- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h | 1 + 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 2d49e99a152c..622efa556e7a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1678,6 +1678,17 @@ static void dcn20_program_pipe( if (hws->funcs.setup_vupdate_interrupt) hws->funcs.setup_vupdate_interrupt(dc, pipe_ctx); + + if (hws->funcs.calculate_dccg_k1_k2_values && dc->res_pool->dccg->funcs->set_pixel_rate_div) { + unsigned int k1_div, k2_div; + + hws->funcs.calculate_dccg_k1_k2_values(pipe_ctx, &k1_div, &k2_div); + + dc->res_pool->dccg->funcs->set_pixel_rate_div( + dc->res_pool->dccg, + pipe_ctx->stream_res.tg->inst, + k1_div, k2_div); + } } if (pipe_ctx->update_flags.bits.odm) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c index 2b33eeb213e2..fe941b103de8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c @@ -98,7 +98,7 @@ static void optc32_set_odm_combine(struct timing_generator *optc, int *opp_id, i optc1->opp_count = opp_cnt; } -static void optc32_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode) +void optc32_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode) { struct optc *optc1 = DCN10TG_FROM_TG(optc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h index 5e57c39235fa..e5c5343e5640 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.h @@ -250,5 +250,6 @@ SF(OTG0_OTG_DRR_CONTROL, OTG_V_TOTAL_LAST_USED_BY_DRR, mask_sh) void dcn32_timing_generator_init(struct optc *optc1); +void optc32_set_h_timing_div_manual_mode(struct timing_generator *optc, bool manual_mode); #endif /* __DC_OPTC_DCN32_H__ */ From c4629c757528f87ee22ea8fe16c645e471b9cbf6 Mon Sep 17 00:00:00 2001 From: Hersen Wu Date: Thu, 25 May 2023 08:37:40 -0400 Subject: [PATCH 144/224] drm/amd/display: edp do not add non-edid timings commit 7a0e005c7957931689a327b2a4e7333a19f13f95 upstream. [Why] most edp support only timings from edid. applying non-edid timings, especially those timings out of edp bandwidth, may damage edp. [How] do not add non-edid timings for edp. Cc: Mario Limonciello Cc: Alex Deucher Cc: stable@vger.kernel.org Acked-by: Stylon Wang Signed-off-by: Hersen Wu Reviewed-by: Roman Li Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 9be3769d68a8..b854eec2787e 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6972,7 +6972,13 @@ static int amdgpu_dm_connector_get_modes(struct drm_connector *connector) drm_add_modes_noedid(connector, 640, 480); } else { amdgpu_dm_connector_ddc_get_modes(connector, edid); - amdgpu_dm_connector_add_common_modes(encoder, connector); + /* most eDP supports only timings from its edid, + * usually only detailed timings are available + * from eDP edid. timings which are not from edid + * may damage eDP + */ + if (connector->connector_type != DRM_MODE_CONNECTOR_eDP) + amdgpu_dm_connector_add_common_modes(encoder, connector); amdgpu_dm_connector_add_freesync_modes(connector, edid); } amdgpu_dm_fbc_init(connector); From a2ef3163c3604788abdc060cab74c95ed44fec1a Mon Sep 17 00:00:00 2001 From: Austin Zheng Date: Thu, 15 Jun 2023 16:41:08 -0400 Subject: [PATCH 145/224] drm/amd/display: Remove Phantom Pipe Check When Calculating K1 and K2 commit 1966bbfdfe476d271b338336254854c5edd5a907 upstream. [Why] K1 and K2 not being setting properly when subVP is active. [How] Have phantom pipes use the same programing as the main pipes without checking the paired stream Cc: stable@vger.kernel.org Tested-by: Daniel Wheeler Reviewed-by: Alvin Lee Acked-by: Rodrigo Siqueira Signed-off-by: Austin Zheng Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c index 2f4afe40f3e6..f5fa7abd97fc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c @@ -1165,10 +1165,6 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign unsigned int odm_combine_factor = 0; bool two_pix_per_container = false; - // For phantom pipes, use the same programming as the main pipes - if (pipe_ctx->stream->mall_stream_config.type == SUBVP_PHANTOM) { - stream = pipe_ctx->stream->mall_stream_config.paired_stream; - } two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing); odm_combine_factor = get_odm_config(pipe_ctx, NULL); From 3546f76c7ad87d0ade575a5c4acad5e4704d927c Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 8 Jun 2023 16:37:38 -0400 Subject: [PATCH 146/224] drm/amd/display: disable seamless boot if force_odm_combine is enabled commit 26518b39181876064850209ecdab48c0ee5924b1 upstream. [Why & How] Having seamless boot on while forcing debug option ODM combine 2 to 1 will cause some corruptions because of some missing programmings. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Leo Chen Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/core/dc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b405f2e86927..cca014344416 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1539,6 +1539,9 @@ bool dc_validate_boot_timing(const struct dc *dc, return false; } + if (dc->debug.force_odm_combine) + return false; + /* Check for enabled DIG to identify enabled display */ if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) return false; From 91e69e67d401eb67178ce5992ddc9b1046b39ee7 Mon Sep 17 00:00:00 2001 From: Samuel Pitoiset Date: Fri, 16 Jun 2023 15:14:07 +0200 Subject: [PATCH 147/224] drm/amdgpu: fix clearing mappings for BOs that are always valid in VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ea2c3c08554601b051d91403a241266e1cf490a5 upstream. Per VM BOs must be marked as moved or otherwise their ranges are not updated on use which might be necessary when the replace operation splits mappings. This fixes random GPU hangs when replacing sparse mappings from the userspace, while OP_MAP/OP_UNMAP works fine because always valid BOs are correctly handled there. Cc: stable@vger.kernel.org Signed-off-by: Samuel Pitoiset Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 58fe7279599f..ec938a1a5062 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1668,18 +1668,30 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, /* Insert partial mapping before the range */ if (!list_empty(&before->list)) { + struct amdgpu_bo *bo = before->bo_va->base.bo; + amdgpu_vm_it_insert(before, &vm->va); if (before->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !before->bo_va->base.moved) + amdgpu_vm_bo_moved(&before->bo_va->base); } else { kfree(before); } /* Insert partial mapping after the range */ if (!list_empty(&after->list)) { + struct amdgpu_bo *bo = after->bo_va->base.bo; + amdgpu_vm_it_insert(after, &vm->va); if (after->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); + + if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + !after->bo_va->base.moved) + amdgpu_vm_bo_moved(&after->bo_va->base); } else { kfree(after); } From 3092beeb25dc0944a3223acd6763720962063af9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Mon, 19 Jun 2023 15:04:24 -0500 Subject: [PATCH 148/224] drm/amd: Disable PSR-SU on Parade 0803 TCON commit 072030b1783056b5de8b0fac5303a5e9dbc6cfde upstream. A number of users have reported that there are random hangs occurring caused by PSR-SU specifically on panels that contain the parade 0803 TCON. Users have been able to work around the issue by disabling PSR entirely. To avoid these hangs, disable PSR-SU when this TCON is found. Cc: stable@vger.kernel.org Cc: Sean Wang Cc: Marc Rossi Cc: Hamza Mahfooz Suggested-by: Tsung-hua (Ryan) Lin Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2443 Signed-off-by: Mario Limonciello Reviewed-by: Hamza Mahfooz Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 9edd39322c82..67287ad07226 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -816,6 +816,8 @@ bool is_psr_su_specific_panel(struct dc_link *link) ((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) || (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07))) isPSRSUSupported = false; + else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) + isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } From ad85fc99d6389bde08dc1dec55a2443514feba6e Mon Sep 17 00:00:00 2001 From: Sung-huai Wang Date: Tue, 6 Jun 2023 14:28:38 +0800 Subject: [PATCH 149/224] drm/amd/display: add a NULL pointer check commit 0f48a4b83610cb0e4e0bc487800ab69f51b4aca6 upstream. [Why & How] We have to check if stream is properly initialized before calling find_matching_pll(), otherwise we might end up trying to deferecence a NULL pointer. Cc: stable@vger.kernel.org # 6.1+ Reviewed-by: Nicholas Kazlauskas Acked-by: Hamza Mahfooz Signed-off-by: Sung-huai Wang Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/display/dc/dce112/dce112_resource.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c index e179e80667d1..19d7cfa53211 100644 --- a/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce112/dce112_resource.c @@ -970,10 +970,12 @@ enum dc_status resource_map_phy_clock_resources( || dc_is_virtual_signal(pipe_ctx->stream->signal)) pipe_ctx->clock_source = dc->res_pool->dp_clock_source; - else - pipe_ctx->clock_source = find_matching_pll( - &context->res_ctx, dc->res_pool, - stream); + else { + if (stream && stream->link && stream->link->link_enc) + pipe_ctx->clock_source = find_matching_pll( + &context->res_ctx, dc->res_pool, + stream); + } if (pipe_ctx->clock_source == NULL) return DC_NO_CLOCK_SOURCE_RESOURCE; From 7ad40467fdfb57bdd8540a4a08cbb448f323c275 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 23 Jun 2023 10:05:19 -0500 Subject: [PATCH 150/224] drm/amd/display: Correct `DMUB_FW_VERSION` macro commit 274d205cb59f43815542e04b42a9e6d0b9b95eff upstream. The `DMUB_FW_VERSION` macro has a mistake in that the revision field is off by one byte. The last byte is typically used for other purposes and not a revision. Cc: stable@vger.kernel.org Cc: Sean Wang Cc: Marc Rossi Cc: Hamza Mahfooz Cc: Tsung-hua (Ryan) Lin Reviewed-by: Leo Li Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dmub/dmub_srv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index eb5b7eb292ef..b53468aca4a9 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -471,7 +471,7 @@ struct dmub_notification { * of a firmware to know if feature or functionality is supported or present. */ #define DMUB_FW_VERSION(major, minor, revision) \ - ((((major) & 0xFF) << 24) | (((minor) & 0xFF) << 16) | ((revision) & 0xFFFF)) + ((((major) & 0xFF) << 24) | (((minor) & 0xFF) << 16) | (((revision) & 0xFF) << 8)) /** * dmub_srv_create() - creates the DMUB service. From 8404d0e274ac1f780e29fa6380ad4e2f9c4bd3da Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Mon, 12 Jun 2023 12:44:00 -0400 Subject: [PATCH 151/224] drm/amd/display: Add monitor specific edid quirk commit 613a7956deb3b1ffa2810c6d4c90ee9c3d743dbb upstream. Disable FAMS on a Samsung Odyssey G9 monitor. Experiments show that this monitor does not work well under some use cases, and is likely implementation specific bug on the monitor's firmware. Cc: stable@vger.kernel.org Reviewed-by: Rodrigo Siqueira Signed-off-by: Aurabindo Pillai Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 0329e548134b..db7744beed5f 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -42,6 +42,30 @@ #include "dm_helpers.h" #include "ddc_service_types.h" +static u32 edid_extract_panel_id(struct edid *edid) +{ + return (u32)edid->mfg_id[0] << 24 | + (u32)edid->mfg_id[1] << 16 | + (u32)EDID_PRODUCT_ID(edid); +} + +static void apply_edid_quirks(struct edid *edid, struct dc_edid_caps *edid_caps) +{ + uint32_t panel_id = edid_extract_panel_id(edid); + + switch (panel_id) { + /* Workaround for some monitors which does not work well with FAMS */ + case drm_edid_encode_panel_id('S', 'A', 'M', 0x0E5E): + case drm_edid_encode_panel_id('S', 'A', 'M', 0x7053): + case drm_edid_encode_panel_id('S', 'A', 'M', 0x71AC): + DRM_DEBUG_DRIVER("Disabling FAMS on monitor with panel id %X\n", panel_id); + edid_caps->panel_patch.disable_fams = true; + break; + default: + return; + } +} + /* dm_helpers_parse_edid_caps * * Parse edid caps @@ -113,6 +137,8 @@ enum dc_edid_status dm_helpers_parse_edid_caps( else edid_caps->speaker_flags = DEFAULT_SPEAKER_LOCATION; + apply_edid_quirks(edid_buf, edid_caps); + kfree(sads); kfree(sadb); From fe26d0fa9408896e821d1c8dd2ab52171da03ed9 Mon Sep 17 00:00:00 2001 From: gaba Date: Thu, 2 Mar 2023 19:03:56 -0500 Subject: [PATCH 152/224] drm/amdgpu: avoid restore process run into dead loop. commit 8a774fe912ff09e39c2d3a3589c729330113f388 upstream. In restore process worker, pinned BO cause update PTE fail, then the function re-schedule the restore_work. This will generate dead loop. Signed-off-by: gaba Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index da01c1424b4a..260e6a3316db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2737,6 +2737,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (!attachment->is_mapped) continue; + if (attachment->bo_va->base.bo->tbo.pin_count) + continue; + kfd_mem_dmaunmap_attachment(mem, attachment); ret = update_gpuvm_pte(mem, attachment, &sync_obj); if (ret) { From f037f6038736bd038ddb9c72de979a08cc1ee3b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 26 Jun 2023 11:14:50 +0200 Subject: [PATCH 153/224] drm/ttm: Don't leak a resource on swapout move error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a590f03d8de7c4cb7ce4916dc7f2fd10711faabe upstream. If moving the bo to system for swapout failed, we were leaking a resource. Fix. Fixes: bfa3357ef9ab ("drm/ttm: allocate resource object instead of embedding it v2") Cc: Christian König Cc: "Christian König" Cc: dri-devel@lists.freedesktop.org Cc: # v5.14+ Signed-off-by: Thomas Hellström Reviewed-by: Nirmoy Das Reviewed-by: Andi Shyti Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20230626091450.14757-5-thomas.hellstrom@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/ttm/ttm_bo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 7c8e8be774f1..f2c4e9037d6e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1165,6 +1165,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, ret = ttm_bo_handle_move_mem(bo, evict_mem, true, &ctx, &hop); if (unlikely(ret != 0)) { WARN(ret == -EMULTIHOP, "Unexpected multihop in swaput - likely driver bug.\n"); + ttm_resource_free(bo, &evict_mem); goto out; } } From 9fd9e1d0987863c37558400be0ebe80867ae7866 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 19 Jun 2023 12:45:17 +0300 Subject: [PATCH 154/224] serial: atmel: don't enable IRQs prematurely commit 27a826837ec9a3e94cc44bd9328b8289b0fcecd7 upstream. The atmel_complete_tx_dma() function disables IRQs at the start of the function by calling spin_lock_irqsave(&port->lock, flags); There is no need to disable them a second time using the spin_lock_irq() function and, in fact, doing so is a bug because it will enable IRQs prematurely when we call spin_unlock_irq(). Just use spin_lock/unlock() instead without disabling or enabling IRQs. Fixes: 08f738be88bb ("serial: at91: add tx dma support") Signed-off-by: Dan Carpenter Reviewed-by: Jiri Slaby Acked-by: Richard Genoud Link: https://lore.kernel.org/r/cb7c39a9-c004-4673-92e1-be4e34b85368@moroto.mountain Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index cff64e5edee2..fbce8ef205ce 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -880,11 +880,11 @@ static void atmel_complete_tx_dma(void *arg) port->icount.tx += atmel_port->tx_len; - spin_lock_irq(&atmel_port->lock_tx); + spin_lock(&atmel_port->lock_tx); async_tx_ack(atmel_port->desc_tx); atmel_port->cookie_tx = -EINVAL; atmel_port->desc_tx = NULL; - spin_unlock_irq(&atmel_port->lock_tx); + spin_unlock(&atmel_port->lock_tx); if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); From 34f5b826dd509b76644f83094b4af7e7668a6a38 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 10 Jun 2023 17:59:25 +0200 Subject: [PATCH 155/224] tty: serial: samsung_tty: Fix a memory leak in s3c24xx_serial_getclk() in case of error commit a9c09546e903f1068acfa38e1ee18bded7114b37 upstream. If clk_get_rate() fails, the clk that has just been allocated needs to be freed. Cc: # v3.3+ Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andi Shyti Fixes: 5f5a7a5578c5 ("serial: samsung: switch to clkdev based clock lookup") Signed-off-by: Christophe JAILLET Reviewed-by: Jiri Slaby Message-ID: Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung_tty.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index 77d1363029f5..c8365a367513 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -1467,8 +1467,12 @@ static unsigned int s3c24xx_serial_getclk(struct s3c24xx_uart_port *ourport, continue; rate = clk_get_rate(clk); - if (!rate) + if (!rate) { + dev_err(ourport->port.dev, + "Failed to get clock rate for %s.\n", clkname); + clk_put(clk); continue; + } if (ourport->info->has_divslot) { unsigned long div = rate / req_baud; From f0bf102ef9b05d7294bd8d506755465f6867d944 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 10 Jun 2023 17:59:26 +0200 Subject: [PATCH 156/224] tty: serial: samsung_tty: Fix a memory leak in s3c24xx_serial_getclk() when iterating clk commit 832e231cff476102e8204a9e7bddfe5c6154a375 upstream. When the best clk is searched, we iterate over all possible clk. If we find a better match, the previous one, if any, needs to be freed. If a better match has already been found, we still need to free the new one, otherwise it leaks. Cc: # v3.3+ Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andi Shyti Fixes: 5f5a7a5578c5 ("serial: samsung: switch to clkdev based clock lookup") Signed-off-by: Christophe JAILLET Reviewed-by: Jiri Slaby Message-ID: Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung_tty.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c index c8365a367513..aa2c51b84116 100644 --- a/drivers/tty/serial/samsung_tty.c +++ b/drivers/tty/serial/samsung_tty.c @@ -1498,10 +1498,18 @@ static unsigned int s3c24xx_serial_getclk(struct s3c24xx_uart_port *ourport, calc_deviation = -calc_deviation; if (calc_deviation < deviation) { + /* + * If we find a better clk, release the previous one, if + * any. + */ + if (!IS_ERR(*best_clk)) + clk_put(*best_clk); *best_clk = clk; best_quot = quot; *clk_num = cnt; deviation = calc_deviation; + } else { + clk_put(clk); } } From 5553d587a371326644d5d59bb4ff88f9d6ac76ed Mon Sep 17 00:00:00 2001 From: Martin Fuzzey Date: Fri, 16 Jun 2023 12:47:23 +0200 Subject: [PATCH 157/224] tty: serial: imx: fix rs485 rx after tx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 639949a7031e04c59ec91614eceb9543e9120f43 upstream. Since commit 79d0224f6bf2 ("tty: serial: imx: Handle RS485 DE signal active high") RS485 reception no longer works after a transmission. The following scenario shows the problem: 1) Open a port in RS485 mode 2) Receive data from remote (OK) 3) Transmit data to remote (OK) 4) Receive data from remote (Nothing received) In RS485 mode, imx_uart_start_tx() calls imx_uart_stop_rx() and, when the transmission is complete, imx_uart_stop_tx() calls imx_uart_start_rx(). Since the above commit imx_uart_stop_rx() now sets the loopback bit but imx_uart_start_rx() does not clear it causing the hardware to remain in loopback mode and not receive external data. Fix this by moving the existing loopback disable code to a helper function and calling it from imx_uart_start_rx() too. Fixes: 79d0224f6bf2 ("tty: serial: imx: Handle RS485 DE signal active high") Cc: stable@vger.kernel.org Signed-off-by: Martin Fuzzey Reviewed-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20230616104838.2729694-1-martin.fuzzey@flowbird.group Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/imx.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index f07c4f9ff13c..d2137f6eff32 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -397,6 +397,16 @@ static void start_hrtimer_ms(struct hrtimer *hrt, unsigned long msec) hrtimer_start(hrt, ms_to_ktime(msec), HRTIMER_MODE_REL); } +static void imx_uart_disable_loopback_rs485(struct imx_port *sport) +{ + unsigned int uts; + + /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */ + uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); + uts &= ~UTS_LOOP; + imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); +} + /* called with port.lock taken and irqs off */ static void imx_uart_start_rx(struct uart_port *port) { @@ -418,6 +428,7 @@ static void imx_uart_start_rx(struct uart_port *port) /* Write UCR2 first as it includes RXEN */ imx_uart_writel(sport, ucr2, UCR2); imx_uart_writel(sport, ucr1, UCR1); + imx_uart_disable_loopback_rs485(sport); } /* called with port.lock taken and irqs off */ @@ -1404,7 +1415,7 @@ static int imx_uart_startup(struct uart_port *port) int retval, i; unsigned long flags; int dma_is_inited = 0; - u32 ucr1, ucr2, ucr3, ucr4, uts; + u32 ucr1, ucr2, ucr3, ucr4; retval = clk_prepare_enable(sport->clk_per); if (retval) @@ -1509,10 +1520,7 @@ static int imx_uart_startup(struct uart_port *port) imx_uart_writel(sport, ucr2, UCR2); } - /* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */ - uts = imx_uart_readl(sport, imx_uart_uts_reg(sport)); - uts &= ~UTS_LOOP; - imx_uart_writel(sport, uts, imx_uart_uts_reg(sport)); + imx_uart_disable_loopback_rs485(sport); spin_unlock_irqrestore(&sport->port.lock, flags); From cb8a31a56df8492fb0d900959238e1a3ff8b8981 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 13 Jun 2023 16:15:21 -0500 Subject: [PATCH 158/224] firmware: stratix10-svc: Fix a potential resource leak in svc_create_memory_pool() commit 1995f15590ca222f91193ed11461862b450abfd6 upstream. svc_create_memory_pool() is only called from stratix10_svc_drv_probe(). Most of resources in the probe are managed, but not this memremap() call. There is also no memunmap() call in the file. So switch to devm_memremap() to avoid a resource leak. Cc: stable@vger.kernel.org Fixes: 7ca5ce896524 ("firmware: add Intel Stratix10 service layer driver") Link: https://lore.kernel.org/all/783e9dfbba34e28505c9efa8bba41f97fd0fa1dc.1686109400.git.christophe.jaillet@wanadoo.fr/ Signed-off-by: Christophe JAILLET Signed-off-by: Dinh Nguyen Message-ID: <20230613211521.16366-1-dinguyen@kernel.org> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/stratix10-svc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/stratix10-svc.c b/drivers/firmware/stratix10-svc.c index 80f4e2d14e04..2d674126160f 100644 --- a/drivers/firmware/stratix10-svc.c +++ b/drivers/firmware/stratix10-svc.c @@ -755,7 +755,7 @@ svc_create_memory_pool(struct platform_device *pdev, end = rounddown(sh_memory->addr + sh_memory->size, PAGE_SIZE); paddr = begin; size = end - begin; - va = memremap(paddr, size, MEMREMAP_WC); + va = devm_memremap(dev, paddr, size, MEMREMAP_WC); if (!va) { dev_err(dev, "fail to remap shared memory\n"); return ERR_PTR(-EINVAL); From 183c0ae4fafcdcb95c06f40c0c35a39d89c1aa2d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 10 Jul 2023 20:39:29 +0200 Subject: [PATCH 159/224] libceph: harden msgr2.1 frame segment length checks commit a282a2f10539dce2aa619e71e1817570d557fc97 upstream. ceph_frame_desc::fd_lens is an int array. decode_preamble() thus effectively casts u32 -> int but the checks for segment lengths are written as if on unsigned values. While reading in HELLO or one of the AUTH frames (before authentication is completed), arithmetic in head_onwire_len() can get duped by negative ctrl_len and produce head_len which is less than CEPH_PREAMBLE_LEN but still positive. This would lead to a buffer overrun in prepare_read_control() as the preamble gets copied to the newly allocated buffer of size head_len. Cc: stable@vger.kernel.org Fixes: cd1a677cad99 ("libceph, ceph: implement msgr2.1 protocol (crc and secure modes)") Reported-by: Thelford Williams Signed-off-by: Ilya Dryomov Reviewed-by: Xiubo Li Signed-off-by: Greg Kroah-Hartman --- net/ceph/messenger_v2.c | 47 +++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index 3009028c4fa2..489baf2e6176 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -392,6 +392,8 @@ static int head_onwire_len(int ctrl_len, bool secure) int head_len; int rem_len; + BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN); + if (secure) { head_len = CEPH_PREAMBLE_SECURE_LEN; if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) { @@ -410,6 +412,10 @@ static int head_onwire_len(int ctrl_len, bool secure) static int __tail_onwire_len(int front_len, int middle_len, int data_len, bool secure) { + BUG_ON(front_len < 0 || front_len > CEPH_MSG_MAX_FRONT_LEN || + middle_len < 0 || middle_len > CEPH_MSG_MAX_MIDDLE_LEN || + data_len < 0 || data_len > CEPH_MSG_MAX_DATA_LEN); + if (!front_len && !middle_len && !data_len) return 0; @@ -522,29 +528,34 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc) desc->fd_aligns[i] = ceph_decode_16(&p); } + if (desc->fd_lens[0] < 0 || + desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) { + pr_err("bad control segment length %d\n", desc->fd_lens[0]); + return -EINVAL; + } + if (desc->fd_lens[1] < 0 || + desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) { + pr_err("bad front segment length %d\n", desc->fd_lens[1]); + return -EINVAL; + } + if (desc->fd_lens[2] < 0 || + desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) { + pr_err("bad middle segment length %d\n", desc->fd_lens[2]); + return -EINVAL; + } + if (desc->fd_lens[3] < 0 || + desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) { + pr_err("bad data segment length %d\n", desc->fd_lens[3]); + return -EINVAL; + } + /* * This would fire for FRAME_TAG_WAIT (it has one empty * segment), but we should never get it as client. */ if (!desc->fd_lens[desc->fd_seg_cnt - 1]) { - pr_err("last segment empty\n"); - return -EINVAL; - } - - if (desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) { - pr_err("control segment too big %d\n", desc->fd_lens[0]); - return -EINVAL; - } - if (desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) { - pr_err("front segment too big %d\n", desc->fd_lens[1]); - return -EINVAL; - } - if (desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) { - pr_err("middle segment too big %d\n", desc->fd_lens[2]); - return -EINVAL; - } - if (desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) { - pr_err("data segment too big %d\n", desc->fd_lens[3]); + pr_err("last segment empty, segment count %d\n", + desc->fd_seg_cnt); return -EINVAL; } From d545ff97cf43996d2f5abdec9cb153e02fabcaef Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 10 May 2023 19:55:46 +0800 Subject: [PATCH 160/224] ceph: add a dedicated private data for netfs rreq commit 23ee27dce30e7d3091d6c3143b79f48dab6f9a3e upstream. We need to save the 'f_ra.ra_pages' to expand the readahead window later. Cc: stable@vger.kernel.org Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead") Link: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.edu.cn Link: https://www.spinics.net/lists/ceph-users/msg76183.html Signed-off-by: Xiubo Li Reviewed-and-tested-by: Hu Weiwen Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/addr.c | 45 ++++++++++++++++++++++++++++++++++----------- fs/ceph/super.h | 13 +++++++++++++ 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 478c03bfba66..95e29c902e44 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -362,18 +362,28 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) { struct inode *inode = rreq->inode; int got = 0, want = CEPH_CAP_FILE_CACHE; + struct ceph_netfs_request_data *priv; int ret = 0; if (rreq->origin != NETFS_READAHEAD) return 0; + priv = kzalloc(sizeof(*priv), GFP_NOFS); + if (!priv) + return -ENOMEM; + if (file) { struct ceph_rw_context *rw_ctx; struct ceph_file_info *fi = file->private_data; + priv->file_ra_pages = file->f_ra.ra_pages; + priv->file_ra_disabled = file->f_mode & FMODE_RANDOM; + rw_ctx = ceph_find_rw_context(fi); - if (rw_ctx) + if (rw_ctx) { + rreq->netfs_priv = priv; return 0; + } } /* @@ -383,27 +393,40 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got); if (ret < 0) { dout("start_read %p, error getting cap\n", inode); - return ret; + goto out; } if (!(got & want)) { dout("start_read %p, no cache cap\n", inode); - return -EACCES; + ret = -EACCES; + goto out; + } + if (ret == 0) { + ret = -EACCES; + goto out; } - if (ret == 0) - return -EACCES; - rreq->netfs_priv = (void *)(uintptr_t)got; - return 0; + priv->caps = got; + rreq->netfs_priv = priv; + +out: + if (ret < 0) + kfree(priv); + + return ret; } static void ceph_netfs_free_request(struct netfs_io_request *rreq) { - struct ceph_inode_info *ci = ceph_inode(rreq->inode); - int got = (uintptr_t)rreq->netfs_priv; + struct ceph_netfs_request_data *priv = rreq->netfs_priv; - if (got) - ceph_put_cap_refs(ci, got); + if (!priv) + return; + + if (priv->caps) + ceph_put_cap_refs(ceph_inode(rreq->inode), priv->caps); + kfree(priv); + rreq->netfs_priv = NULL; } const struct netfs_request_ops ceph_netfs_ops = { diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 478b741b1107..562f42f4a77d 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -451,6 +451,19 @@ struct ceph_inode_info { unsigned long i_work_mask; }; +struct ceph_netfs_request_data { + int caps; + + /* + * Maximum size of a file readahead request. + * The fadvise could update the bdi's default ra_pages. + */ + unsigned int file_ra_pages; + + /* Set it if fadvise disables file readahead entirely */ + bool file_ra_disabled; +}; + static inline struct ceph_inode_info * ceph_inode(const struct inode *inode) { From 0471d907d8c1e107e34cc75cb32e7dbc0008da4f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 4 May 2023 19:00:42 +0800 Subject: [PATCH 161/224] ceph: fix blindly expanding the readahead windows commit dc94bb8f271c079f69583d0f12a489aaf5202751 upstream. Blindly expanding the readahead windows will cause unneccessary pagecache thrashing and also will introduce the network workload. We should disable expanding the windows if the readahead is disabled and also shouldn't expand the windows too much. Expanding forward firstly instead of expanding backward for possible sequential reads. Bound `rreq->len` to the actual file size to restore the previous page cache usage. The posix_fadvise may change the maximum size of a file readahead. Cc: stable@vger.kernel.org Fixes: 49870056005c ("ceph: convert ceph_readpages to ceph_readahead") Link: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.edu.cn Link: https://www.spinics.net/lists/ceph-users/msg76183.html Signed-off-by: Xiubo Li Reviewed-and-tested-by: Hu Weiwen Reviewed-by: Milind Changire Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/addr.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 95e29c902e44..dd2ce7fabbae 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -187,16 +187,42 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_layout *lo = &ci->i_layout; + unsigned long max_pages = inode->i_sb->s_bdi->ra_pages; + loff_t end = rreq->start + rreq->len, new_end; + struct ceph_netfs_request_data *priv = rreq->netfs_priv; + unsigned long max_len; u32 blockoff; - u64 blockno; - /* Expand the start downward */ - blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff); - rreq->start = blockno * lo->stripe_unit; - rreq->len += blockoff; + if (priv) { + /* Readahead is disabled by posix_fadvise POSIX_FADV_RANDOM */ + if (priv->file_ra_disabled) + max_pages = 0; + else + max_pages = priv->file_ra_pages; - /* Now, round up the length to the next block */ - rreq->len = roundup(rreq->len, lo->stripe_unit); + } + + /* Readahead is disabled */ + if (!max_pages) + return; + + max_len = max_pages << PAGE_SHIFT; + + /* + * Try to expand the length forward by rounding up it to the next + * block, but do not exceed the file size, unless the original + * request already exceeds it. + */ + new_end = min(round_up(end, lo->stripe_unit), rreq->i_size); + if (new_end > end && new_end <= rreq->start + max_len) + rreq->len = new_end - rreq->start; + + /* Try to expand the start downward */ + div_u64_rem(rreq->start, lo->stripe_unit, &blockoff); + if (rreq->len + blockoff <= max_len) { + rreq->start -= blockoff; + rreq->len += blockoff; + } } static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq) From 829361479860051b334ec210257cacfcf0605524 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 28 Jun 2023 07:57:09 +0800 Subject: [PATCH 162/224] ceph: don't let check_caps skip sending responses for revoke msgs commit 257e6172ab36ebbe295a6c9ee9a9dd0fe54c1dc2 upstream. If a client sends out a cap update dropping caps with the prior 'seq' just before an incoming cap revoke request, then the client may drop the revoke because it believes it's already released the requested capabilities. This causes the MDS to wait indefinitely for the client to respond to the revoke. It's therefore always a good idea to ack the cap revoke request with the bumped up 'seq'. Cc: stable@vger.kernel.org Link: https://tracker.ceph.com/issues/61782 Signed-off-by: Xiubo Li Reviewed-by: Milind Changire Reviewed-by: Patrick Donnelly Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/caps.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index cdb26aadae12..4a9ad5ff726d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3561,6 +3561,15 @@ static void handle_cap_grant(struct inode *inode, } BUG_ON(cap->issued & ~cap->implemented); + /* don't let check_caps skip sending a response to MDS for revoke msgs */ + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { + cap->mds_wanted = 0; + if (cap == ci->i_auth_cap) + check_caps = 1; /* check auth cap only */ + else + check_caps = 2; /* check all caps */ + } + if (extra_info->inline_version > 0 && extra_info->inline_version >= ci->i_inline_version) { ci->i_inline_version = extra_info->inline_version; From 8e273a2190b5e98fbc251d8df3398cc901eb80dc Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Fri, 2 Jun 2023 17:40:06 +0300 Subject: [PATCH 163/224] xhci: Fix resume issue of some ZHAOXIN hosts commit f927728186f0de1167262d6a632f9f7e96433d1a upstream. On ZHAOXIN ZX-100 project, xHCI can't work normally after resume from system Sx state. To fix this issue, when resume from system Sx state, reinitialize xHCI instead of restore. So, Add XHCI_RESET_ON_RESUME quirk for ZX-100 to fix issue of resuming from system Sx state. Cc: stable@vger.kernel.org Signed-off-by: Weitao Wang Signed-off-by: Mathias Nyman Message-ID: <20230602144009.1225632-9-mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 6e4dac71c409..36735ceef1c0 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -335,6 +335,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4)) xhci->quirks |= XHCI_NO_SOFT_RETRY; + if (pdev->vendor == PCI_VENDOR_ID_ZHAOXIN) { + if (pdev->device == 0x9202) + xhci->quirks |= XHCI_RESET_ON_RESUME; + } + /* xHC spec requires PCI devices to support D3hot and D3cold */ if (xhci->hci_version >= 0x120) xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; From 892ef759300042bd247364987dad072beabc5082 Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Fri, 2 Jun 2023 17:40:07 +0300 Subject: [PATCH 164/224] xhci: Fix TRB prefetch issue of ZHAOXIN hosts commit 2a865a652299f5666f3b785cbe758c5f57453036 upstream. On some ZHAOXIN hosts, xHCI will prefetch TRB for performance improvement. However this TRB prefetch mechanism may cross page boundary, which may access memory not allocated by xHCI driver. In order to fix this issue, two pages was allocated for a segment and only the first page will be used. And add a quirk XHCI_ZHAOXIN_TRB_FETCH for this issue. Cc: stable@vger.kernel.org Signed-off-by: Weitao Wang Signed-off-by: Mathias Nyman Message-ID: <20230602144009.1225632-10-mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 8 ++++++-- drivers/usb/host/xhci-pci.c | 7 ++++++- drivers/usb/host/xhci.h | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 81ca2bc1f0be..587540fadbdc 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2439,8 +2439,12 @@ int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags) * and our use of dma addresses in the trb_address_map radix tree needs * TRB_SEGMENT_SIZE alignment, so we pick the greater alignment need. */ - xhci->segment_pool = dma_pool_create("xHCI ring segments", dev, - TRB_SEGMENT_SIZE, TRB_SEGMENT_SIZE, xhci->page_size); + if (xhci->quirks & XHCI_ZHAOXIN_TRB_FETCH) + xhci->segment_pool = dma_pool_create("xHCI ring segments", dev, + TRB_SEGMENT_SIZE * 2, TRB_SEGMENT_SIZE * 2, xhci->page_size * 2); + else + xhci->segment_pool = dma_pool_create("xHCI ring segments", dev, + TRB_SEGMENT_SIZE, TRB_SEGMENT_SIZE, xhci->page_size); /* See Table 46 and Note on Figure 55 */ xhci->device_pool = dma_pool_create("xHCI input/output contexts", dev, diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 36735ceef1c0..3617e183f0d3 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -336,8 +336,13 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_NO_SOFT_RETRY; if (pdev->vendor == PCI_VENDOR_ID_ZHAOXIN) { - if (pdev->device == 0x9202) + if (pdev->device == 0x9202) { xhci->quirks |= XHCI_RESET_ON_RESUME; + xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; + } + + if (pdev->device == 0x9203) + xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; } /* xHC spec requires PCI devices to support D3hot and D3cold */ diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 6348cacdc65e..c06136f7fb44 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1899,6 +1899,7 @@ struct xhci_hcd { #define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42) #define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43) #define XHCI_RESET_TO_DEFAULT BIT_ULL(44) +#define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) unsigned int num_active_eps; unsigned int limit_active_eps; From 610ddd79fc6a4b69562b8cdddc49627a174090c1 Mon Sep 17 00:00:00 2001 From: Weitao Wang Date: Fri, 2 Jun 2023 17:40:08 +0300 Subject: [PATCH 165/224] xhci: Show ZHAOXIN xHCI root hub speed correctly commit d9b0328d0b8b8298dfdc97cd8e0e2371d4bcc97b upstream. Some ZHAOXIN xHCI controllers follow usb3.1 spec, but only support gen1 speed 5Gbps. While in Linux kernel, if xHCI suspport usb3.1, root hub speed will show on 10Gbps. To fix this issue of ZHAOXIN xHCI platforms, read usb speed ID supported by xHCI to determine root hub speed. And add a quirk XHCI_ZHAOXIN_HOST for this issue. [fix warning about uninitialized symbol -Mathias] Suggested-by: Mathias Nyman Cc: stable@vger.kernel.org Signed-off-by: Weitao Wang Signed-off-by: Mathias Nyman Message-ID: <20230602144009.1225632-11-mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 31 ++++++++++++++++++++++++------- drivers/usb/host/xhci-pci.c | 2 ++ drivers/usb/host/xhci.h | 1 + 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index 587540fadbdc..019dcbe55dbd 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -2116,7 +2116,7 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, { u32 temp, port_offset, port_count; int i; - u8 major_revision, minor_revision; + u8 major_revision, minor_revision, tmp_minor_revision; struct xhci_hub *rhub; struct device *dev = xhci_to_hcd(xhci)->self.sysdev; struct xhci_port_cap *port_cap; @@ -2136,6 +2136,15 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, */ if (minor_revision > 0x00 && minor_revision < 0x10) minor_revision <<= 4; + /* + * Some zhaoxin's xHCI controller that follow usb3.1 spec + * but only support Gen1. + */ + if (xhci->quirks & XHCI_ZHAOXIN_HOST) { + tmp_minor_revision = minor_revision; + minor_revision = 0; + } + } else if (major_revision <= 0x02) { rhub = &xhci->usb2_rhub; } else { @@ -2145,10 +2154,6 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, /* Ignoring port protocol we can't understand. FIXME */ return; } - rhub->maj_rev = XHCI_EXT_PORT_MAJOR(temp); - - if (rhub->min_rev < minor_revision) - rhub->min_rev = minor_revision; /* Port offset and count in the third dword, see section 7.2 */ temp = readl(addr + 2); @@ -2167,8 +2172,6 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, if (xhci->num_port_caps > max_caps) return; - port_cap->maj_rev = major_revision; - port_cap->min_rev = minor_revision; port_cap->psi_count = XHCI_EXT_PORT_PSIC(temp); if (port_cap->psi_count) { @@ -2189,6 +2192,11 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, XHCI_EXT_PORT_PSIV(port_cap->psi[i - 1]))) port_cap->psi_uid_count++; + if (xhci->quirks & XHCI_ZHAOXIN_HOST && + major_revision == 0x03 && + XHCI_EXT_PORT_PSIV(port_cap->psi[i]) >= 5) + minor_revision = tmp_minor_revision; + xhci_dbg(xhci, "PSIV:%d PSIE:%d PLT:%d PFD:%d LP:%d PSIM:%d\n", XHCI_EXT_PORT_PSIV(port_cap->psi[i]), XHCI_EXT_PORT_PSIE(port_cap->psi[i]), @@ -2198,6 +2206,15 @@ static void xhci_add_in_port(struct xhci_hcd *xhci, unsigned int num_ports, XHCI_EXT_PORT_PSIM(port_cap->psi[i])); } } + + rhub->maj_rev = major_revision; + + if (rhub->min_rev < minor_revision) + rhub->min_rev = minor_revision; + + port_cap->maj_rev = major_revision; + port_cap->min_rev = minor_revision; + /* cache usb2 port capabilities */ if (major_revision < 0x03 && xhci->num_ext_caps < max_caps) xhci->ext_caps[xhci->num_ext_caps++] = temp; diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 3617e183f0d3..5e72d02042ce 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -336,6 +336,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_NO_SOFT_RETRY; if (pdev->vendor == PCI_VENDOR_ID_ZHAOXIN) { + xhci->quirks |= XHCI_ZHAOXIN_HOST; + if (pdev->device == 0x9202) { xhci->quirks |= XHCI_RESET_ON_RESUME; xhci->quirks |= XHCI_ZHAOXIN_TRB_FETCH; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index c06136f7fb44..1354310cb37b 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1900,6 +1900,7 @@ struct xhci_hcd { #define XHCI_SUSPEND_RESUME_CLKS BIT_ULL(43) #define XHCI_RESET_TO_DEFAULT BIT_ULL(44) #define XHCI_ZHAOXIN_TRB_FETCH BIT_ULL(45) +#define XHCI_ZHAOXIN_HOST BIT_ULL(46) unsigned int num_active_eps; unsigned int limit_active_eps; From be06ffa8f4ac58bdbcd677b30cf319ab2884c5d0 Mon Sep 17 00:00:00 2001 From: George Stark Date: Tue, 6 Jun 2023 19:53:57 +0300 Subject: [PATCH 166/224] meson saradc: fix clock divider mask length commit c57fa0037024c92c2ca34243e79e857da5d2c0a9 upstream. According to the datasheets of supported meson SoCs length of ADC_CLK_DIV field is 6-bit. Although all supported SoCs have the register with that field documented later SoCs use external clock rather than ADC internal clock so this patch affects only meson8 family (S8* SoCs). Fixes: 3adbf3427330 ("iio: adc: add a driver for the SAR ADC found in Amlogic Meson SoCs") Signed-off-by: George Stark Reviewed-by: Andy Shevchenko Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20230606165357.42417-1-gnstark@sberdevices.ru Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/meson_saradc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 1a68b099d323..eb965974ed85 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -71,7 +71,7 @@ #define MESON_SAR_ADC_REG3_PANEL_DETECT_COUNT_MASK GENMASK(20, 18) #define MESON_SAR_ADC_REG3_PANEL_DETECT_FILTER_TB_MASK GENMASK(17, 16) #define MESON_SAR_ADC_REG3_ADC_CLK_DIV_SHIFT 10 - #define MESON_SAR_ADC_REG3_ADC_CLK_DIV_WIDTH 5 + #define MESON_SAR_ADC_REG3_ADC_CLK_DIV_WIDTH 6 #define MESON_SAR_ADC_REG3_BLOCK_DLY_SEL_MASK GENMASK(9, 8) #define MESON_SAR_ADC_REG3_BLOCK_DLY_MASK GENMASK(7, 0) From 76ab057de777723ec924654502d1a260ba7d7d54 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Tue, 30 May 2023 17:54:46 +0200 Subject: [PATCH 167/224] opp: Fix use-after-free in lazy_opp_tables after probe deferral commit b2a2ab039bd58f51355e33d7d3fc64605d7f870d upstream. When dev_pm_opp_of_find_icc_paths() in _allocate_opp_table() returns -EPROBE_DEFER, the opp_table is freed again, to wait until all the interconnect paths are available. However, if the OPP table is using required-opps then it may already have been added to the global lazy_opp_tables list. The error path does not remove the opp_table from the list again. This can cause crashes later when the provider of the required-opps is added, since we will iterate over OPP tables that have already been freed. E.g.: Unable to handle kernel NULL pointer dereference when read CPU: 0 PID: 7 Comm: kworker/0:0 Not tainted 6.4.0-rc3 PC is at _of_add_opp_table_v2 (include/linux/of.h:949 drivers/opp/of.c:98 drivers/opp/of.c:344 drivers/opp/of.c:404 drivers/opp/of.c:1032) -> lazy_link_required_opp_table() Fix this by calling _of_clear_opp_table() to remove the opp_table from the list and clear other allocated resources. While at it, also add the missing mutex_destroy() calls in the error path. Cc: stable@vger.kernel.org Suggested-by: Viresh Kumar Fixes: 7eba0c7641b0 ("opp: Allow lazy-linking of required-opps") Signed-off-by: Stephan Gerhold Signed-off-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/opp/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index e87567dbe99f..d707214069ca 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1348,7 +1348,10 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) return opp_table; remove_opp_dev: + _of_clear_opp_table(opp_table); _remove_opp_dev(opp_dev, opp_table); + mutex_destroy(&opp_table->genpd_virt_dev_lock); + mutex_destroy(&opp_table->lock); err: kfree(opp_table); return ERR_PTR(ret); From 801daff0078087b5df9145c9f5e643c28129734b Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 1 Jun 2023 12:25:25 +0200 Subject: [PATCH 168/224] soundwire: qcom: fix storing port config out-of-bounds commit 490937d479abe5f6584e69b96df066bc87be92e9 upstream. The 'qcom_swrm_ctrl->pconfig' has size of QCOM_SDW_MAX_PORTS (14), however we index it starting from 1, not 0, to match real port numbers. This can lead to writing port config past 'pconfig' bounds and overwriting next member of 'qcom_swrm_ctrl' struct. Reported also by smatch: drivers/soundwire/qcom.c:1269 qcom_swrm_get_port_config() error: buffer overflow 'ctrl->pconfig' 14 <= 14 Fixes: 9916c02ccd74 ("soundwire: qcom: cleanup internal port config indexing") Cc: Reported-by: kernel test robot Reported-by: Dan Carpenter Link: https://lore.kernel.org/r/202305201301.sCJ8UDKV-lkp@intel.com/ Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20230601102525.609627-1-krzysztof.kozlowski@linaro.org Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/soundwire/qcom.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index b2eb3090f4b4..08934d27f709 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -167,7 +167,8 @@ struct qcom_swrm_ctrl { u32 intr_mask; u8 rcmd_id; u8 wcmd_id; - struct qcom_swrm_port_config pconfig[QCOM_SDW_MAX_PORTS]; + /* Port numbers are 1 - 14 */ + struct qcom_swrm_port_config pconfig[QCOM_SDW_MAX_PORTS + 1]; struct sdw_stream_runtime *sruntime[SWRM_MAX_DAIS]; enum sdw_slave_status status[SDW_MAX_DEVICES + 1]; int (*reg_read)(struct qcom_swrm_ctrl *ctrl, int reg, u32 *val); From 599c0ebdb5ccd9afecc326c09c7a95aa8cb9dbcb Mon Sep 17 00:00:00 2001 From: Jiaqing Zhao Date: Mon, 19 Jun 2023 15:57:44 +0000 Subject: [PATCH 169/224] Revert "8250: add support for ASIX devices with a FIFO bug" commit a82d62f708545d22859584e0e0620da8e3759bbc upstream. This reverts commit eb26dfe8aa7eeb5a5aa0b7574550125f8aa4c3b3. Commit eb26dfe8aa7e ("8250: add support for ASIX devices with a FIFO bug") merged on Jul 13, 2012 adds a quirk for PCI_VENDOR_ID_ASIX (0x9710). But that ID is the same as PCI_VENDOR_ID_NETMOS defined in 1f8b061050c7 ("[PATCH] Netmos parallel/serial/combo support") merged on Mar 28, 2005. In pci_serial_quirks array, the NetMos entry always takes precedence over the ASIX entry even since it was initially merged, code in that commit is always unreachable. In my tests, adding the FIFO workaround to pci_netmos_init() makes no difference, and the vendor driver also does not have such workaround. Given that the code was never used for over a decade, it's safe to revert it. Also, the real PCI_VENDOR_ID_ASIX should be 0x125b, which is used on their newer AX99100 PCIe serial controllers released on 2016. The FIFO workaround should not be intended for these newer controllers, and it was never implemented in vendor driver. Fixes: eb26dfe8aa7e ("8250: add support for ASIX devices with a FIFO bug") Cc: stable Signed-off-by: Jiaqing Zhao Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20230619155743.827859-1-jiaqing.zhao@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.h | 1 - drivers/tty/serial/8250/8250_pci.c | 19 ------------------- drivers/tty/serial/8250/8250_port.c | 11 +++-------- include/linux/serial_8250.h | 1 - 4 files changed, 3 insertions(+), 29 deletions(-) diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 1e8fe44a7099..eeb7b43ebe53 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -91,7 +91,6 @@ struct serial8250_config { #define UART_BUG_TXEN BIT(1) /* UART has buggy TX IIR status */ #define UART_BUG_NOMSR BIT(2) /* UART has buggy MSR status bits (Au1x00) */ #define UART_BUG_THRE BIT(3) /* UART has buggy THRE reassertion */ -#define UART_BUG_PARITY BIT(4) /* UART mishandles parity if FIFO enabled */ #define UART_BUG_TXRACE BIT(5) /* UART Tx fails to set remote DR */ diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index cd27821f54ec..0ea89df6702f 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1252,14 +1252,6 @@ static int pci_oxsemi_tornado_setup(struct serial_private *priv, return pci_default_setup(priv, board, up, idx); } -static int pci_asix_setup(struct serial_private *priv, - const struct pciserial_board *board, - struct uart_8250_port *port, int idx) -{ - port->bugs |= UART_BUG_PARITY; - return pci_default_setup(priv, board, port, idx); -} - #define QPCR_TEST_FOR1 0x3F #define QPCR_TEST_GET1 0x00 #define QPCR_TEST_FOR2 0x40 @@ -1975,7 +1967,6 @@ pci_moxa_setup(struct serial_private *priv, #define PCI_DEVICE_ID_WCH_CH355_4S 0x7173 #define PCI_VENDOR_ID_AGESTAR 0x5372 #define PCI_DEVICE_ID_AGESTAR_9375 0x6872 -#define PCI_VENDOR_ID_ASIX 0x9710 #define PCI_DEVICE_ID_BROADCOM_TRUMANAGE 0x160a #define PCI_DEVICE_ID_AMCC_ADDIDATA_APCI7800 0x818e @@ -2620,16 +2611,6 @@ static struct pci_serial_quirk pci_serial_quirks[] = { .exit = pci_wch_ch38x_exit, .setup = pci_wch_ch38x_setup, }, - /* - * ASIX devices with FIFO bug - */ - { - .vendor = PCI_VENDOR_ID_ASIX, - .device = PCI_ANY_ID, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .setup = pci_asix_setup, - }, /* * Broadcom TruManage (NetXtreme) */ diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index b8e8a96c3eb6..acf578aa9930 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2629,11 +2629,8 @@ static unsigned char serial8250_compute_lcr(struct uart_8250_port *up, if (c_cflag & CSTOPB) cval |= UART_LCR_STOP; - if (c_cflag & PARENB) { + if (c_cflag & PARENB) cval |= UART_LCR_PARITY; - if (up->bugs & UART_BUG_PARITY) - up->fifo_bug = true; - } if (!(c_cflag & PARODD)) cval |= UART_LCR_EPAR; if (c_cflag & CMSPAR) @@ -2794,8 +2791,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios, up->lcr = cval; /* Save computed LCR */ if (up->capabilities & UART_CAP_FIFO && port->fifosize > 1) { - /* NOTE: If fifo_bug is not set, a user can set RX_trigger. */ - if ((baud < 2400 && !up->dma) || up->fifo_bug) { + if (baud < 2400 && !up->dma) { up->fcr &= ~UART_FCR_TRIGGER_MASK; up->fcr |= UART_FCR_TRIGGER_1; } @@ -3131,8 +3127,7 @@ static int do_set_rxtrig(struct tty_port *port, unsigned char bytes) struct uart_8250_port *up = up_to_u8250p(uport); int rxtrig; - if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1 || - up->fifo_bug) + if (!(up->capabilities & UART_CAP_FIFO) || uport->fifosize <= 1) return -EINVAL; rxtrig = bytes_to_fcr_rxtrig(up, bytes); diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 19376bee9667..79b328861c5f 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -98,7 +98,6 @@ struct uart_8250_port { struct list_head list; /* ports on this IRQ */ u32 capabilities; /* port capabilities */ unsigned short bugs; /* port bugs */ - bool fifo_bug; /* min RX trigger if enabled */ unsigned int tx_loadsz; /* transmit fifo load size */ unsigned char acr; unsigned char fcr; From 2ebf4ddcc6570229ab54fc6a755dab1aab154540 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sat, 24 Jun 2023 14:21:39 +0200 Subject: [PATCH 170/224] bus: ixp4xx: fix IXP4XX_EXP_T1_MASK commit 6722e46513e0af8e2fff4698f7cb78bc50a9f13f upstream. The IXP4XX_EXP_T1_MASK was shifted one bit to the right, overlapping IXP4XX_EXP_T2_MASK and leaving bit 29 unused. The offset being wrong is also confirmed at least by the datasheet of IXP45X/46X [1]. Fix this by aligning it to IXP4XX_EXP_T1_SHIFT. [1] https://www.intel.com/content/dam/www/public/us/en/documents/manuals/ixp45x-ixp46x-developers-manual.pdf Cc: stable@vger.kernel.org Fixes: 1c953bda90ca ("bus: ixp4xx: Add a driver for IXP4xx expansion bus") Signed-off-by: Jonas Gorski Link: https://lore.kernel.org/r/20230624112958.27727-1-jonas.gorski@gmail.com Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20230624122139.3229642-1-linus.walleij@linaro.org Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/bus/intel-ixp4xx-eb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bus/intel-ixp4xx-eb.c b/drivers/bus/intel-ixp4xx-eb.c index 91db001eb69a..972603ed06a6 100644 --- a/drivers/bus/intel-ixp4xx-eb.c +++ b/drivers/bus/intel-ixp4xx-eb.c @@ -33,7 +33,7 @@ #define IXP4XX_EXP_TIMING_STRIDE 0x04 #define IXP4XX_EXP_CS_EN BIT(31) #define IXP456_EXP_PAR_EN BIT(30) /* Only on IXP45x and IXP46x */ -#define IXP4XX_EXP_T1_MASK GENMASK(28, 27) +#define IXP4XX_EXP_T1_MASK GENMASK(29, 28) #define IXP4XX_EXP_T1_SHIFT 28 #define IXP4XX_EXP_T2_MASK GENMASK(27, 26) #define IXP4XX_EXP_T2_SHIFT 26 From 08aaeda414f7bdea360f1ab7d3baa391d788ad43 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 22 Jun 2023 14:55:08 +0200 Subject: [PATCH 171/224] s390/decompressor: fix misaligned symbol build error commit 938f0c35d7d93a822ab9c9728e3205e8e57409d0 upstream. Nathan Chancellor reported a kernel build error on Fedora 39: $ clang --version | head -1 clang version 16.0.5 (Fedora 16.0.5-1.fc39) $ s390x-linux-gnu-ld --version | head -1 GNU ld version 2.40-1.fc39 $ make -skj"$(nproc)" ARCH=s390 CC=clang CROSS_COMPILE=s390x-linux-gnu- olddefconfig all s390x-linux-gnu-ld: arch/s390/boot/startup.o(.text+0x5b4): misaligned symbol `_decompressor_end' (0x35b0f) for relocation R_390_PC32DBL make[3]: *** [.../arch/s390/boot/Makefile:78: arch/s390/boot/vmlinux] Error 1 It turned out that the problem with misaligned symbols on s390 was fixed with commit 80ddf5ce1c92 ("s390: always build relocatable kernel") for the kernel image, but did not take into account that the decompressor uses its own set of CFLAGS, which come without -fPIE. Add the -fPIE flag also to the decompresser CFLAGS to fix this. Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Reported-by: CKI Suggested-by: Ulrich Weigand Link: https://github.com/ClangBuiltLinux/linux/issues/1747 Link: https://lore.kernel.org/32935.123062114500601371@us-mta-9.us.mimecast.lan/ Link: https://lore.kernel.org/r/20230622125508.1068457-1-hca@linux.ibm.com Cc: Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev Signed-off-by: Greg Kroah-Hartman --- arch/s390/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Makefile b/arch/s390/Makefile index ed646c583e4f..5ed242897b0d 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -27,6 +27,7 @@ KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float -mbac KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables KBUILD_CFLAGS_DECOMPRESSOR += -ffreestanding KBUILD_CFLAGS_DECOMPRESSOR += -fno-stack-protector +KBUILD_CFLAGS_DECOMPRESSOR += -fPIE KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g) KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,)) From 0a1dc6377afce97143c78e5d1ddccfe93767972f Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 27 Jun 2023 20:28:01 +0000 Subject: [PATCH 172/224] dm: verity-loadpin: Add NULL pointer check for 'bdev' parameter commit 47f04616f2c9b2f4f0c9127e30ca515a078db591 upstream. Add a NULL check for the 'bdev' parameter of dm_verity_loadpin_is_bdev_trusted(). The function is called by loadpin_check(), which passes the block device that corresponds to the super block of the file system from which a file is being loaded. Generally a super_block structure has an associated block device, however that is not always the case (e.g. tmpfs). Cc: stable@vger.kernel.org # v6.0+ Fixes: b6c1c5745ccc ("dm: Add verity helpers for LoadPin") Signed-off-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/20230627202800.1.Id63f7f59536d20f1ab83e1abdc1fda1471c7d031@changeid Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-loadpin.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/dm-verity-loadpin.c b/drivers/md/dm-verity-loadpin.c index 4f78cc55c251..0666699b6858 100644 --- a/drivers/md/dm-verity-loadpin.c +++ b/drivers/md/dm-verity-loadpin.c @@ -58,6 +58,9 @@ bool dm_verity_loadpin_is_bdev_trusted(struct block_device *bdev) int srcu_idx; bool trusted = false; + if (bdev == NULL) + return false; + if (list_empty(&dm_verity_loadpin_trusted_root_digests)) return false; From 5fd32eb6fa0ac795aa5a64bc004ab68d7b44196a Mon Sep 17 00:00:00 2001 From: Mohamed Khalfella Date: Wed, 12 Jul 2023 22:30:21 +0000 Subject: [PATCH 173/224] tracing/histograms: Add histograms to hist_vars if they have referenced variables commit 6018b585e8c6fa7d85d4b38d9ce49a5b67be7078 upstream. Hist triggers can have referenced variables without having direct variables fields. This can be the case if referenced variables are added for trigger actions. In this case the newly added references will not have field variables. Not taking such referenced variables into consideration can result in a bug where it would be possible to remove hist trigger with variables being refenced. This will result in a bug that is easily reproducable like so $ cd /sys/kernel/tracing $ echo 'synthetic_sys_enter char[] comm; long id' >> synthetic_events $ echo 'hist:keys=common_pid.execname,id.syscall:vals=hitcount:comm=common_pid.execname' >> events/raw_syscalls/sys_enter/trigger $ echo 'hist:keys=common_pid.execname,id.syscall:onmatch(raw_syscalls.sys_enter).synthetic_sys_enter($comm, id)' >> events/raw_syscalls/sys_enter/trigger $ echo '!hist:keys=common_pid.execname,id.syscall:vals=hitcount:comm=common_pid.execname' >> events/raw_syscalls/sys_enter/trigger [ 100.263533] ================================================================== [ 100.264634] BUG: KASAN: slab-use-after-free in resolve_var_refs+0xc7/0x180 [ 100.265520] Read of size 8 at addr ffff88810375d0f0 by task bash/439 [ 100.266320] [ 100.266533] CPU: 2 PID: 439 Comm: bash Not tainted 6.5.0-rc1 #4 [ 100.267277] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-20220807_005459-localhost 04/01/2014 [ 100.268561] Call Trace: [ 100.268902] [ 100.269189] dump_stack_lvl+0x4c/0x70 [ 100.269680] print_report+0xc5/0x600 [ 100.270165] ? resolve_var_refs+0xc7/0x180 [ 100.270697] ? kasan_complete_mode_report_info+0x80/0x1f0 [ 100.271389] ? resolve_var_refs+0xc7/0x180 [ 100.271913] kasan_report+0xbd/0x100 [ 100.272380] ? resolve_var_refs+0xc7/0x180 [ 100.272920] __asan_load8+0x71/0xa0 [ 100.273377] resolve_var_refs+0xc7/0x180 [ 100.273888] event_hist_trigger+0x749/0x860 [ 100.274505] ? kasan_save_stack+0x2a/0x50 [ 100.275024] ? kasan_set_track+0x29/0x40 [ 100.275536] ? __pfx_event_hist_trigger+0x10/0x10 [ 100.276138] ? ksys_write+0xd1/0x170 [ 100.276607] ? do_syscall_64+0x3c/0x90 [ 100.277099] ? entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 100.277771] ? destroy_hist_data+0x446/0x470 [ 100.278324] ? event_hist_trigger_parse+0xa6c/0x3860 [ 100.278962] ? __pfx_event_hist_trigger_parse+0x10/0x10 [ 100.279627] ? __kasan_check_write+0x18/0x20 [ 100.280177] ? mutex_unlock+0x85/0xd0 [ 100.280660] ? __pfx_mutex_unlock+0x10/0x10 [ 100.281200] ? kfree+0x7b/0x120 [ 100.281619] ? ____kasan_slab_free+0x15d/0x1d0 [ 100.282197] ? event_trigger_write+0xac/0x100 [ 100.282764] ? __kasan_slab_free+0x16/0x20 [ 100.283293] ? __kmem_cache_free+0x153/0x2f0 [ 100.283844] ? sched_mm_cid_remote_clear+0xb1/0x250 [ 100.284550] ? __pfx_sched_mm_cid_remote_clear+0x10/0x10 [ 100.285221] ? event_trigger_write+0xbc/0x100 [ 100.285781] ? __kasan_check_read+0x15/0x20 [ 100.286321] ? __bitmap_weight+0x66/0xa0 [ 100.286833] ? _find_next_bit+0x46/0xe0 [ 100.287334] ? task_mm_cid_work+0x37f/0x450 [ 100.287872] event_triggers_call+0x84/0x150 [ 100.288408] trace_event_buffer_commit+0x339/0x430 [ 100.289073] ? ring_buffer_event_data+0x3f/0x60 [ 100.292189] trace_event_raw_event_sys_enter+0x8b/0xe0 [ 100.295434] syscall_trace_enter.constprop.0+0x18f/0x1b0 [ 100.298653] syscall_enter_from_user_mode+0x32/0x40 [ 100.301808] do_syscall_64+0x1a/0x90 [ 100.304748] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 [ 100.307775] RIP: 0033:0x7f686c75c1cb [ 100.310617] Code: 73 01 c3 48 8b 0d 65 3c 10 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa b8 21 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 35 3c 10 00 f7 d8 64 89 01 48 [ 100.317847] RSP: 002b:00007ffc60137a38 EFLAGS: 00000246 ORIG_RAX: 0000000000000021 [ 100.321200] RAX: ffffffffffffffda RBX: 000055f566469ea0 RCX: 00007f686c75c1cb [ 100.324631] RDX: 0000000000000001 RSI: 0000000000000001 RDI: 000000000000000a [ 100.328104] RBP: 00007ffc60137ac0 R08: 00007f686c818460 R09: 000000000000000a [ 100.331509] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000009 [ 100.334992] R13: 0000000000000007 R14: 000000000000000a R15: 0000000000000007 [ 100.338381] We hit the bug because when second hist trigger has was created has_hist_vars() returned false because hist trigger did not have variables. As a result of that save_hist_vars() was not called to add the trigger to trace_array->hist_vars. Later on when we attempted to remove the first histogram find_any_var_ref() failed to detect it is being used because it did not find the second trigger in hist_vars list. With this change we wait until trigger actions are created so we can take into consideration if hist trigger has variable references. Also, now we check the return value of save_hist_vars() and fail trigger creation if save_hist_vars() fails. Link: https://lore.kernel.org/linux-trace-kernel/20230712223021.636335-1-mkhalfella@purestorage.com Cc: stable@vger.kernel.org Fixes: 067fe038e70f6 ("tracing: Add variable reference handling to hist triggers") Signed-off-by: Mohamed Khalfella Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_hist.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 75244d9e2bf9..105253b9bc31 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6555,13 +6555,15 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, if (get_named_trigger_data(trigger_data)) goto enable; - if (has_hist_vars(hist_data)) - save_hist_vars(hist_data); - ret = create_actions(hist_data); if (ret) goto out_unreg; + if (has_hist_vars(hist_data) || hist_data->n_var_refs) { + if (save_hist_vars(hist_data)) + goto out_unreg; + } + ret = tracing_map_init(hist_data->map); if (ret) goto out_unreg; From be970e22c53d5572b2795b79da9716ada937023b Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Thu, 13 Jul 2023 22:14:35 +0800 Subject: [PATCH 174/224] tracing: Fix memory leak of iter->temp when reading trace_pipe commit d5a821896360cc8b93a15bd888fabc858c038dc0 upstream. kmemleak reports: unreferenced object 0xffff88814d14e200 (size 256): comm "cat", pid 336, jiffies 4294871818 (age 779.490s) hex dump (first 32 bytes): 04 00 01 03 00 00 00 00 08 00 00 00 00 00 00 00 ................ 0c d8 c8 9b ff ff ff ff 04 5a ca 9b ff ff ff ff .........Z...... backtrace: [] __kmalloc+0x4f/0x140 [] trace_find_next_entry+0xbb/0x1d0 [] trace_print_lat_context+0xaf/0x4e0 [] print_trace_line+0x3e0/0x950 [] tracing_read_pipe+0x2d9/0x5a0 [] vfs_read+0x143/0x520 [] ksys_read+0xbd/0x160 [] do_syscall_64+0x3f/0x90 [] entry_SYSCALL_64_after_hwframe+0x6e/0xd8 when reading file 'trace_pipe', 'iter->temp' is allocated or relocated in trace_find_next_entry() but not freed before 'trace_pipe' is closed. To fix it, free 'iter->temp' in tracing_release_pipe(). Link: https://lore.kernel.org/linux-trace-kernel/20230713141435.1133021-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: ff895103a84ab ("tracing: Save off entry when peeking at next entry") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5c1087df2f1c..2849faac4cf2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6679,6 +6679,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) free_cpumask_var(iter->started); kfree(iter->fmt); + kfree(iter->temp); mutex_destroy(&iter->mutex); kfree(iter); From 9d6a260bbfbc606bfd5089afc080b0e068be6ac0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jul 2023 15:30:42 +0200 Subject: [PATCH 175/224] nvme: don't reject probe due to duplicate IDs for single-ported PCIe devices commit ac522fc6c3165fd0daa2f8da7e07d5f800586daa upstream. While duplicate IDs are still very harmful, including the potential to easily see changing devices in /dev/disk/by-id, it turn out they are extremely common for cheap end user NVMe devices. Relax our check for them for so that it doesn't reject the probe on single-ported PCIe devices, but prints a big warning instead. In doubt we'd still like to see quirk entries to disable the potential for changing supposed stable device identifier links, but this will at least allow users how have two (or more) of these devices to use them without having to manually add a new PCI ID entry with the quirk through sysfs or by patching the kernel. Fixes: 2079f41ec6ff ("nvme: check that EUI/GUID/UUID are globally unique") Cc: stable@vger.kernel.org # 6.0+ Co-developed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/core.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 560ce2f05a96..fd73db8a535d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4175,10 +4175,40 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids); if (ret) { - dev_err(ctrl->device, - "globally duplicate IDs for nsid %d\n", info->nsid); + /* + * We've found two different namespaces on two different + * subsystems that report the same ID. This is pretty nasty + * for anything that actually requires unique device + * identification. In the kernel we need this for multipathing, + * and in user space the /dev/disk/by-id/ links rely on it. + * + * If the device also claims to be multi-path capable back off + * here now and refuse the probe the second device as this is a + * recipe for data corruption. If not this is probably a + * cheap consumer device if on the PCIe bus, so let the user + * proceed and use the shiny toy, but warn that with changing + * probing order (which due to our async probing could just be + * device taking longer to startup) the other device could show + * up at any time. + */ nvme_print_device_info(ctrl); - return ret; + if ((ns->ctrl->ops->flags & NVME_F_FABRICS) || /* !PCIe */ + ((ns->ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) && + info->is_shared)) { + dev_err(ctrl->device, + "ignoring nsid %d because of duplicate IDs\n", + info->nsid); + return ret; + } + + dev_err(ctrl->device, + "clearing duplicate IDs for nsid %d\n", info->nsid); + dev_err(ctrl->device, + "use of /dev/disk/by-id/ may cause data corruption\n"); + memset(&info->ids.nguid, 0, sizeof(info->ids.nguid)); + memset(&info->ids.uuid, 0, sizeof(info->ids.uuid)); + memset(&info->ids.eui64, 0, sizeof(info->ids.eui64)); + ctrl->quirks |= NVME_QUIRK_BOGUS_NID; } mutex_lock(&ctrl->subsys->lock); From e2c7a05a48e589af6153718b046dd9cd1d38854b Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Thu, 27 Apr 2023 16:06:59 +0200 Subject: [PATCH 176/224] samples: ftrace: Save required argument registers in sample trampolines commit 8564c315876ab86fcaf8e7f558d6a84cb2ce5590 upstream. The ftrace-direct-too sample traces the handle_mm_fault function whose signature changed since the introduction of the sample. Since: commit bce617edecad ("mm: do page fault accounting in handle_mm_fault") handle_mm_fault now has 4 arguments. Therefore, the sample trampoline should save 4 argument registers. s390 saves all argument registers already so it does not need a change but x86_64 needs an extra push and pop. This also evolves the signature of the tracing function to make it mirror the signature of the traced function. Link: https://lkml.kernel.org/r/20230427140700.625241-2-revest@chromium.org Cc: stable@vger.kernel.org Fixes: bce617edecad ("mm: do page fault accounting in handle_mm_fault") Reviewed-by: Steven Rostedt (Google) Reviewed-by: Mark Rutland Acked-by: Catalin Marinas Signed-off-by: Florent Revest Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- samples/ftrace/ftrace-direct-too.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 6690468c5cc2..14c6859e1ac4 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -5,14 +5,14 @@ #include #include -extern void my_direct_func(struct vm_area_struct *vma, - unsigned long address, unsigned int flags); +extern void my_direct_func(struct vm_area_struct *vma, unsigned long address, + unsigned int flags, struct pt_regs *regs); -void my_direct_func(struct vm_area_struct *vma, - unsigned long address, unsigned int flags) +void my_direct_func(struct vm_area_struct *vma, unsigned long address, + unsigned int flags, struct pt_regs *regs) { - trace_printk("handle mm fault vma=%p address=%lx flags=%x\n", - vma, address, flags); + trace_printk("handle mm fault vma=%p address=%lx flags=%x regs=%p\n", + vma, address, flags, regs); } extern void my_tramp(void *); @@ -32,7 +32,9 @@ asm ( " pushq %rdi\n" " pushq %rsi\n" " pushq %rdx\n" +" pushq %rcx\n" " call my_direct_func\n" +" popq %rcx\n" " popq %rdx\n" " popq %rsi\n" " popq %rdi\n" From aeb62beaf9cbd0a72e7f97c9af6d3e7f76ce2946 Mon Sep 17 00:00:00 2001 From: Eric Lin Date: Mon, 10 Jul 2023 15:43:28 +0000 Subject: [PATCH 177/224] perf: RISC-V: Remove PERF_HES_STOPPED flag checking in riscv_pmu_start() commit 66843b14fb71825fdd73ab12f6594f2243b402be upstream. Since commit 096b52fd2bb4 ("perf: RISC-V: throttle perf events") the perf_sample_event_took() function was added to report time spent in overflow interrupts. If the interrupt takes too long, the perf framework will lower the sysctl_perf_event_sample_rate and max_samples_per_tick. When hwc->interrupts is larger than max_samples_per_tick, the hwc->interrupts will be set to MAX_INTERRUPTS, and events will be throttled within the __perf_event_account_interrupt() function. However, the RISC-V PMU driver doesn't call riscv_pmu_stop() to update the PERF_HES_STOPPED flag after perf_event_overflow() in pmu_sbi_ovf_handler() function to avoid throttling. When the perf framework unthrottled the event in the timer interrupt handler, it triggers riscv_pmu_start() function and causes a WARN_ON_ONCE() warning, as shown below: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 240 at drivers/perf/riscv_pmu.c:184 riscv_pmu_start+0x7c/0x8e Modules linked in: CPU: 0 PID: 240 Comm: ls Not tainted 6.4-rc4-g19d0788e9ef2 #1 Hardware name: SiFive (DT) epc : riscv_pmu_start+0x7c/0x8e ra : riscv_pmu_start+0x28/0x8e epc : ffffffff80aef864 ra : ffffffff80aef810 sp : ffff8f80004db6f0 gp : ffffffff81c83750 tp : ffffaf80069f9bc0 t0 : ffff8f80004db6c0 t1 : 0000000000000000 t2 : 000000000000001f s0 : ffff8f80004db720 s1 : ffffaf8008ca1068 a0 : 0000ffffffffffff a1 : 0000000000000000 a2 : 0000000000000001 a3 : 0000000000000870 a4 : 0000000000000000 a5 : 0000000000000000 a6 : 0000000000000840 a7 : 0000000000000030 s2 : 0000000000000000 s3 : ffffaf8005165800 s4 : ffffaf800424da00 s5 : ffffffffffffffff s6 : ffffffff81cc7590 s7 : 0000000000000000 s8 : 0000000000000006 s9 : 0000000000000001 s10: ffffaf807efbc340 s11: ffffaf807efbbf00 t3 : ffffaf8006a16028 t4 : 00000000dbfbb796 t5 : 0000000700000000 t6 : ffffaf8005269870 status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [] riscv_pmu_start+0x7c/0x8e [] perf_adjust_freq_unthr_context+0x15e/0x174 [] perf_event_task_tick+0x88/0x9c [] scheduler_tick+0xfe/0x27c [] update_process_times+0x9a/0xba [] tick_sched_handle+0x32/0x66 [] tick_sched_timer+0x64/0xb0 [] __hrtimer_run_queues+0x156/0x2f4 [] hrtimer_interrupt+0xe2/0x1fe [] riscv_timer_interrupt+0x38/0x42 [] handle_percpu_devid_irq+0x90/0x1d2 [] generic_handle_domain_irq+0x28/0x36 After referring other PMU drivers like Arm, Loongarch, Csky, and Mips, they don't call *_pmu_stop() to update with PERF_HES_STOPPED flag after perf_event_overflow() function nor do they add PERF_HES_STOPPED flag checking in *_pmu_start() which don't cause this warning. Thus, it's recommended to remove this unnecessary check in riscv_pmu_start() function to prevent this warning. Signed-off-by: Eric Lin Link: https://lore.kernel.org/r/20230710154328.19574-1-eric.lin@sifive.com Fixes: 096b52fd2bb4 ("perf: RISC-V: throttle perf events") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- drivers/perf/riscv_pmu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index ebca5eab9c9b..56897d4d4fd3 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -181,9 +181,6 @@ void riscv_pmu_start(struct perf_event *event, int flags) uint64_t max_period = riscv_pmu_ctr_get_width_mask(event); u64 init_val; - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - if (flags & PERF_EF_RELOAD) WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); From b1a726ad33e585e3d9fa70712df31ae105e4532c Mon Sep 17 00:00:00 2001 From: "Isaac J. Manjarres" Date: Tue, 11 Jul 2023 12:30:58 -0700 Subject: [PATCH 178/224] regmap-irq: Fix out-of-bounds access when allocating config buffers commit 963b54df82b6d6206d7def273390bf3f7af558e1 upstream. When allocating the 2D array for handling IRQ type registers in regmap_add_irq_chip_fwnode(), the intent is to allocate a matrix with num_config_bases rows and num_config_regs columns. This is currently handled by allocating a buffer to hold a pointer for each row (i.e. num_config_bases). After that, the logic attempts to allocate the memory required to hold the register configuration for each row. However, instead of doing this allocation for each row (i.e. num_config_bases allocations), the logic erroneously does this allocation num_config_regs number of times. This scenario can lead to out-of-bounds accesses when num_config_regs is greater than num_config_bases. Fix this by updating the terminating condition of the loop that allocates the memory for holding the register configuration to allocate memory only for each row in the matrix. Amit Pundir reported a crash that was occurring on his db845c device due to memory corruption (see "Closes" tag for Amit's report). The KASAN report below helped narrow it down to this issue: [ 14.033877][ T1] ================================================================== [ 14.042507][ T1] BUG: KASAN: invalid-access in regmap_add_irq_chip_fwnode+0x594/0x1364 [ 14.050796][ T1] Write of size 8 at addr 06ffff8081021850 by task init/1 [ 14.242004][ T1] The buggy address belongs to the object at ffffff8081021850 [ 14.242004][ T1] which belongs to the cache kmalloc-8 of size 8 [ 14.255669][ T1] The buggy address is located 0 bytes inside of [ 14.255669][ T1] 8-byte region [ffffff8081021850, ffffff8081021858) Fixes: faa87ce9196d ("regmap-irq: Introduce config registers for irq types") Reported-by: Amit Pundir Closes: https://lore.kernel.org/all/CAMi1Hd04mu6JojT3y6wyN2YeVkPR5R3qnkKJ8iR8if_YByCn4w@mail.gmail.com/ Tested-by: John Stultz Tested-by: Amit Pundir # tested on Dragonboard 845c Cc: stable@vger.kernel.org # v6.0+ Cc: Aidan MacDonald Cc: Saravana Kannan Cc: Catalin Marinas Signed-off-by: "Isaac J. Manjarres" Link: https://lore.kernel.org/r/20230711193059.2480971-1-isaacmanjarres@google.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/base/regmap/regmap-irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 3de89795f584..18fc1c436081 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -843,7 +843,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, if (!d->config_buf) goto err_alloc; - for (i = 0; i < chip->num_config_regs; i++) { + for (i = 0; i < chip->num_config_bases; i++) { d->config_buf[i] = kcalloc(chip->num_config_regs, sizeof(**d->config_buf), GFP_KERNEL); From 90947ebf8794e3c229fb2e16e37f1bfea6877f14 Mon Sep 17 00:00:00 2001 From: Krister Johansen Date: Mon, 10 Jul 2023 18:36:21 -0700 Subject: [PATCH 179/224] net: ena: fix shift-out-of-bounds in exponential backoff commit 1e9cb763e9bacf0c932aa948f50dcfca6f519a26 upstream. The ENA adapters on our instances occasionally reset. Once recently logged a UBSAN failure to console in the process: UBSAN: shift-out-of-bounds in build/linux/drivers/net/ethernet/amazon/ena/ena_com.c:540:13 shift exponent 32 is too large for 32-bit type 'unsigned int' CPU: 28 PID: 70012 Comm: kworker/u72:2 Kdump: loaded not tainted 5.15.117 Hardware name: Amazon EC2 c5d.9xlarge/, BIOS 1.0 10/16/2017 Workqueue: ena ena_fw_reset_device [ena] Call Trace: dump_stack_lvl+0x4a/0x63 dump_stack+0x10/0x16 ubsan_epilogue+0x9/0x36 __ubsan_handle_shift_out_of_bounds.cold+0x61/0x10e ? __const_udelay+0x43/0x50 ena_delay_exponential_backoff_us.cold+0x16/0x1e [ena] wait_for_reset_state+0x54/0xa0 [ena] ena_com_dev_reset+0xc8/0x110 [ena] ena_down+0x3fe/0x480 [ena] ena_destroy_device+0xeb/0xf0 [ena] ena_fw_reset_device+0x30/0x50 [ena] process_one_work+0x22b/0x3d0 worker_thread+0x4d/0x3f0 ? process_one_work+0x3d0/0x3d0 kthread+0x12a/0x150 ? set_kthread_struct+0x50/0x50 ret_from_fork+0x22/0x30 Apparently, the reset delays are getting so large they can trigger a UBSAN panic. Looking at the code, the current timeout is capped at 5000us. Using a base value of 100us, the current code will overflow after (1<<29). Even at values before 32, this function wraps around, perhaps unintentionally. Cap the value of the exponent used for this backoff at (1<<16) which is larger than currently necessary, but large enough to support bigger values in the future. Cc: stable@vger.kernel.org Fixes: 4bb7f4cf60e3 ("net: ena: reduce driver load time") Signed-off-by: Krister Johansen Reviewed-by: Leon Romanovsky Reviewed-by: Shay Agroskin Link: https://lore.kernel.org/r/20230711013621.GE1926@templeofstupid.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/amazon/ena/ena_com.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 451c3a1b6255..633b321d7fdd 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -35,6 +35,8 @@ #define ENA_REGS_ADMIN_INTR_MASK 1 +#define ENA_MAX_BACKOFF_DELAY_EXP 16U + #define ENA_MIN_ADMIN_POLL_US 100 #define ENA_MAX_ADMIN_POLL_US 5000 @@ -536,6 +538,7 @@ static int ena_com_comp_status_to_errno(struct ena_com_admin_queue *admin_queue, static void ena_delay_exponential_backoff_us(u32 exp, u32 delay_us) { + exp = min_t(u32, exp, ENA_MAX_BACKOFF_DELAY_EXP); delay_us = max_t(u32, ENA_MIN_ADMIN_POLL_US, delay_us); delay_us = min_t(u32, delay_us * (1U << exp), ENA_MAX_ADMIN_POLL_US); usleep_range(delay_us, 2 * delay_us); From 8b0b63fdac6b70a45614e7d4b30e5bbb93deb007 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Sun, 9 Jul 2023 06:51:44 +0800 Subject: [PATCH 180/224] ring-buffer: Fix deadloop issue on reading trace_pipe commit 7e42907f3a7b4ce3a2d1757f6d78336984daf8f5 upstream. Soft lockup occurs when reading file 'trace_pipe': watchdog: BUG: soft lockup - CPU#6 stuck for 22s! [cat:4488] [...] RIP: 0010:ring_buffer_empty_cpu+0xed/0x170 RSP: 0018:ffff88810dd6fc48 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000246 RCX: ffffffff93d1aaeb RDX: ffff88810a280040 RSI: 0000000000000008 RDI: ffff88811164b218 RBP: ffff88811164b218 R08: 0000000000000000 R09: ffff88815156600f R10: ffffed102a2acc01 R11: 0000000000000001 R12: 0000000051651901 R13: 0000000000000000 R14: ffff888115e49500 R15: 0000000000000000 [...] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f8d853c2000 CR3: 000000010dcd8000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __find_next_entry+0x1a8/0x4b0 ? peek_next_entry+0x250/0x250 ? down_write+0xa5/0x120 ? down_write_killable+0x130/0x130 trace_find_next_entry_inc+0x3b/0x1d0 tracing_read_pipe+0x423/0xae0 ? tracing_splice_read_pipe+0xcb0/0xcb0 vfs_read+0x16b/0x490 ksys_read+0x105/0x210 ? __ia32_sys_pwrite64+0x200/0x200 ? switch_fpu_return+0x108/0x220 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x61/0xc6 Through the vmcore, I found it's because in tracing_read_pipe(), ring_buffer_empty_cpu() found some buffer is not empty but then it cannot read anything due to "rb_num_of_entries() == 0" always true, Then it infinitely loop the procedure due to user buffer not been filled, see following code path: tracing_read_pipe() { ... ... waitagain: tracing_wait_pipe() // 1. find non-empty buffer here trace_find_next_entry_inc() // 2. loop here try to find an entry __find_next_entry() ring_buffer_empty_cpu(); // 3. find non-empty buffer peek_next_entry() // 4. but peek always return NULL ring_buffer_peek() rb_buffer_peek() rb_get_reader_page() // 5. because rb_num_of_entries() == 0 always true here // then return NULL // 6. user buffer not been filled so goto 'waitgain' // and eventually leads to an deadloop in kernel!!! } By some analyzing, I found that when resetting ringbuffer, the 'entries' of its pages are not all cleared (see rb_reset_cpu()). Then when reducing the ringbuffer, and if some reduced pages exist dirty 'entries' data, they will be added into 'cpu_buffer->overrun' (see rb_remove_pages()), which cause wrong 'overrun' count and eventually cause the deadloop issue. To fix it, we need to clear every pages in rb_reset_cpu(). Link: https://lore.kernel.org/linux-trace-kernel/20230708225144.3785600-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: a5fb833172eca ("ring-buffer: Fix uninitialized read_stamp") Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 4acc27cb856f..c264421c4ecd 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5238,28 +5238,34 @@ unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_size); +static void rb_clear_buffer_page(struct buffer_page *page) +{ + local_set(&page->write, 0); + local_set(&page->entries, 0); + rb_init_page(page->page); + page->read = 0; +} + static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) { + struct buffer_page *page; + rb_head_page_deactivate(cpu_buffer); cpu_buffer->head_page = list_entry(cpu_buffer->pages, struct buffer_page, list); - local_set(&cpu_buffer->head_page->write, 0); - local_set(&cpu_buffer->head_page->entries, 0); - local_set(&cpu_buffer->head_page->page->commit, 0); - - cpu_buffer->head_page->read = 0; + rb_clear_buffer_page(cpu_buffer->head_page); + list_for_each_entry(page, cpu_buffer->pages, list) { + rb_clear_buffer_page(page); + } cpu_buffer->tail_page = cpu_buffer->head_page; cpu_buffer->commit_page = cpu_buffer->head_page; INIT_LIST_HEAD(&cpu_buffer->reader_page->list); INIT_LIST_HEAD(&cpu_buffer->new_pages); - local_set(&cpu_buffer->reader_page->write, 0); - local_set(&cpu_buffer->reader_page->entries, 0); - local_set(&cpu_buffer->reader_page->page->commit, 0); - cpu_buffer->reader_page->read = 0; + rb_clear_buffer_page(cpu_buffer->reader_page); local_set(&cpu_buffer->entries_bytes, 0); local_set(&cpu_buffer->overrun, 0); From 99fe81d219dfd603e6bd676364dbc707823c745d Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Wed, 12 Jul 2023 14:04:52 +0800 Subject: [PATCH 181/224] ftrace: Fix possible warning on checking all pages used in ftrace_process_locs() commit 26efd79c4624294e553aeaa3439c646729bad084 upstream. As comments in ftrace_process_locs(), there may be NULL pointers in mcount_loc section: > Some architecture linkers will pad between > the different mcount_loc sections of different > object files to satisfy alignments. > Skip any NULL pointers. After commit 20e5227e9f55 ("ftrace: allow NULL pointers in mcount_loc"), NULL pointers will be accounted when allocating ftrace pages but skipped before adding into ftrace pages, this may result in some pages not being used. Then after commit 706c81f87f84 ("ftrace: Remove extra helper functions"), warning may occur at: WARN_ON(pg->next); To fix it, only warn for case that no pointers skipped but pages not used up, then free those unused pages after releasing ftrace_lock. Link: https://lore.kernel.org/linux-trace-kernel/20230712060452.3175675-1-zhengyejian1@huawei.com Cc: stable@vger.kernel.org Fixes: 706c81f87f84 ("ftrace: Remove extra helper functions") Suggested-by: Steven Rostedt Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ftrace.c | 45 +++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 57db50c2dce8..552956ccb91c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3213,6 +3213,22 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) return cnt; } +static void ftrace_free_pages(struct ftrace_page *pages) +{ + struct ftrace_page *pg = pages; + + while (pg) { + if (pg->records) { + free_pages((unsigned long)pg->records, pg->order); + ftrace_number_of_pages -= 1 << pg->order; + } + pages = pg->next; + kfree(pg); + pg = pages; + ftrace_number_of_groups--; + } +} + static struct ftrace_page * ftrace_allocate_pages(unsigned long num_to_init) { @@ -3251,17 +3267,7 @@ ftrace_allocate_pages(unsigned long num_to_init) return start_pg; free_pages: - pg = start_pg; - while (pg) { - if (pg->records) { - free_pages((unsigned long)pg->records, pg->order); - ftrace_number_of_pages -= 1 << pg->order; - } - start_pg = pg->next; - kfree(pg); - pg = start_pg; - ftrace_number_of_groups--; - } + ftrace_free_pages(start_pg); pr_info("ftrace: FAILED to allocate memory for functions\n"); return NULL; } @@ -6666,9 +6672,11 @@ static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) { + struct ftrace_page *pg_unuse = NULL; struct ftrace_page *start_pg; struct ftrace_page *pg; struct dyn_ftrace *rec; + unsigned long skipped = 0; unsigned long count; unsigned long *p; unsigned long addr; @@ -6731,8 +6739,10 @@ static int ftrace_process_locs(struct module *mod, * object files to satisfy alignments. * Skip any NULL pointers. */ - if (!addr) + if (!addr) { + skipped++; continue; + } end_offset = (pg->index+1) * sizeof(pg->records[0]); if (end_offset > PAGE_SIZE << pg->order) { @@ -6746,8 +6756,10 @@ static int ftrace_process_locs(struct module *mod, rec->ip = addr; } - /* We should have used all pages */ - WARN_ON(pg->next); + if (pg->next) { + pg_unuse = pg->next; + pg->next = NULL; + } /* Assign the last page to ftrace_pages */ ftrace_pages = pg; @@ -6769,6 +6781,11 @@ static int ftrace_process_locs(struct module *mod, out: mutex_unlock(&ftrace_lock); + /* We should have used all pages unless we skipped some */ + if (pg_unuse) { + WARN_ON(!skipped); + ftrace_free_pages(pg_unuse); + } return ret; } From 11dc77a645b78b2fa8e730232f530778af4f710d Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 7 Jul 2023 14:31:34 -0500 Subject: [PATCH 182/224] drm/amd/pm: share the code around SMU13 pcie parameters update commit dcb489bae65d92cfd26da22c7a0d6665b06ecc63 upstream. So that SMU13.0.0 and SMU13.0.7 do not need to have one copy each. Signed-off-by: Evan Quan Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 4 +++ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 31 +++++++++++++++++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 33 +------------------ .../drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 33 +------------------ 4 files changed, 37 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index bffa6247c3cd..d6479a808855 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -297,5 +297,9 @@ int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu, uint32_t *size, uint32_t pptable_id); +int smu_v13_0_update_pcie_parameters(struct smu_context *smu, + uint32_t pcie_gen_cap, + uint32_t pcie_width_cap); + #endif #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index cd627ee8bcea..78bbbf7da0a8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2489,3 +2489,34 @@ int smu_v13_0_mode1_reset(struct smu_context *smu) return ret; } + +int smu_v13_0_update_pcie_parameters(struct smu_context *smu, + uint32_t pcie_gen_cap, + uint32_t pcie_width_cap) +{ + struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; + struct smu_13_0_pcie_table *pcie_table = + &dpm_context->dpm_tables.pcie_table; + uint32_t smu_pcie_arg; + int ret, i; + + for (i = 0; i < pcie_table->num_of_link_levels; i++) { + if (pcie_table->pcie_gen[i] > pcie_gen_cap) + pcie_table->pcie_gen[i] = pcie_gen_cap; + if (pcie_table->pcie_lane[i] > pcie_width_cap) + pcie_table->pcie_lane[i] = pcie_width_cap; + + smu_pcie_arg = i << 16; + smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; + smu_pcie_arg |= pcie_table->pcie_lane[i]; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_OverridePcieParameters, + smu_pcie_arg, + NULL); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index e5e03455622a..f7ac488a3da2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -1216,37 +1216,6 @@ static int smu_v13_0_0_force_clk_levels(struct smu_context *smu, return ret; } -static int smu_v13_0_0_update_pcie_parameters(struct smu_context *smu, - uint32_t pcie_gen_cap, - uint32_t pcie_width_cap) -{ - struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; - struct smu_13_0_pcie_table *pcie_table = - &dpm_context->dpm_tables.pcie_table; - uint32_t smu_pcie_arg; - int ret, i; - - for (i = 0; i < pcie_table->num_of_link_levels; i++) { - if (pcie_table->pcie_gen[i] > pcie_gen_cap) - pcie_table->pcie_gen[i] = pcie_gen_cap; - if (pcie_table->pcie_lane[i] > pcie_width_cap) - pcie_table->pcie_lane[i] = pcie_width_cap; - - smu_pcie_arg = i << 16; - smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; - smu_pcie_arg |= pcie_table->pcie_lane[i]; - - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_OverridePcieParameters, - smu_pcie_arg, - NULL); - if (ret) - return ret; - } - - return 0; -} - static const struct smu_temperature_range smu13_thermal_policy[] = { {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000}, @@ -2033,7 +2002,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .feature_is_enabled = smu_cmn_feature_is_enabled, .print_clk_levels = smu_v13_0_0_print_clk_levels, .force_clk_levels = smu_v13_0_0_force_clk_levels, - .update_pcie_parameters = smu_v13_0_0_update_pcie_parameters, + .update_pcie_parameters = smu_v13_0_update_pcie_parameters, .get_thermal_temperature_range = smu_v13_0_0_get_thermal_temperature_range, .register_irq_handler = smu_v13_0_register_irq_handler, .enable_thermal_alert = smu_v13_0_enable_thermal_alert, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 084597d0ae4e..d980eff2b616 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -1225,37 +1225,6 @@ static int smu_v13_0_7_force_clk_levels(struct smu_context *smu, return ret; } -static int smu_v13_0_7_update_pcie_parameters(struct smu_context *smu, - uint32_t pcie_gen_cap, - uint32_t pcie_width_cap) -{ - struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; - struct smu_13_0_pcie_table *pcie_table = - &dpm_context->dpm_tables.pcie_table; - uint32_t smu_pcie_arg; - int ret, i; - - for (i = 0; i < pcie_table->num_of_link_levels; i++) { - if (pcie_table->pcie_gen[i] > pcie_gen_cap) - pcie_table->pcie_gen[i] = pcie_gen_cap; - if (pcie_table->pcie_lane[i] > pcie_width_cap) - pcie_table->pcie_lane[i] = pcie_width_cap; - - smu_pcie_arg = i << 16; - smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; - smu_pcie_arg |= pcie_table->pcie_lane[i]; - - ret = smu_cmn_send_smc_msg_with_param(smu, - SMU_MSG_OverridePcieParameters, - smu_pcie_arg, - NULL); - if (ret) - return ret; - } - - return 0; -} - static const struct smu_temperature_range smu13_thermal_policy[] = { {-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000}, @@ -1750,7 +1719,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .feature_is_enabled = smu_cmn_feature_is_enabled, .print_clk_levels = smu_v13_0_7_print_clk_levels, .force_clk_levels = smu_v13_0_7_force_clk_levels, - .update_pcie_parameters = smu_v13_0_7_update_pcie_parameters, + .update_pcie_parameters = smu_v13_0_update_pcie_parameters, .get_thermal_temperature_range = smu_v13_0_7_get_thermal_temperature_range, .register_irq_handler = smu_v13_0_register_irq_handler, .enable_thermal_alert = smu_v13_0_enable_thermal_alert, From bd8cd38d3ac6b6410ac4e7401ef3dca057a9b285 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 7 Jul 2023 14:31:35 -0500 Subject: [PATCH 183/224] drm/amd/pm: conditionally disable pcie lane/speed switching for SMU13 commit 31c7a3b378a136adc63296a2ff17645896fcf303 upstream. Intel platforms such as Sapphire Rapids and Raptor Lake don't support dynamic pcie lane or speed switching. This limitation seems to carry over from one generation to another. To be safer, disable dynamic pcie lane width and speed switching when running on an Intel platform. Link: https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2663 Co-developed-by: Evan Quan Signed-off-by: Evan Quan Signed-off-by: Mario Limonciello Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.1.x Signed-off-by: Greg Kroah-Hartman --- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 46 +++++++++++++++++-- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 78bbbf7da0a8..2456f2a72def 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2490,6 +2490,25 @@ int smu_v13_0_mode1_reset(struct smu_context *smu) return ret; } +/* + * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic + * speed switching. Until we have confirmation from Intel that a specific host + * supports it, it's safer that we keep it disabled for all. + * + * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ + * https://gitlab.freedesktop.org/drm/amd/-/issues/2663 + */ +static bool smu_v13_0_is_pcie_dynamic_switching_supported(void) +{ +#if IS_ENABLED(CONFIG_X86) + struct cpuinfo_x86 *c = &cpu_data(0); + + if (c->x86_vendor == X86_VENDOR_INTEL) + return false; +#endif + return true; +} + int smu_v13_0_update_pcie_parameters(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap) @@ -2497,15 +2516,32 @@ int smu_v13_0_update_pcie_parameters(struct smu_context *smu, struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context; struct smu_13_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table; + int num_of_levels = pcie_table->num_of_link_levels; uint32_t smu_pcie_arg; int ret, i; - for (i = 0; i < pcie_table->num_of_link_levels; i++) { - if (pcie_table->pcie_gen[i] > pcie_gen_cap) - pcie_table->pcie_gen[i] = pcie_gen_cap; - if (pcie_table->pcie_lane[i] > pcie_width_cap) - pcie_table->pcie_lane[i] = pcie_width_cap; + if (!smu_v13_0_is_pcie_dynamic_switching_supported()) { + if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap) + pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1]; + if (pcie_table->pcie_lane[num_of_levels - 1] < pcie_width_cap) + pcie_width_cap = pcie_table->pcie_lane[num_of_levels - 1]; + + /* Force all levels to use the same settings */ + for (i = 0; i < num_of_levels; i++) { + pcie_table->pcie_gen[i] = pcie_gen_cap; + pcie_table->pcie_lane[i] = pcie_width_cap; + } + } else { + for (i = 0; i < num_of_levels; i++) { + if (pcie_table->pcie_gen[i] > pcie_gen_cap) + pcie_table->pcie_gen[i] = pcie_gen_cap; + if (pcie_table->pcie_lane[i] > pcie_width_cap) + pcie_table->pcie_lane[i] = pcie_width_cap; + } + } + + for (i = 0; i < num_of_levels; i++) { smu_pcie_arg = i << 16; smu_pcie_arg |= pcie_table->pcie_gen[i] << 8; smu_pcie_arg |= pcie_table->pcie_lane[i]; From 6a05de6da58a67abec06167b32c291d7a9b6770b Mon Sep 17 00:00:00 2001 From: Bharath SM Date: Fri, 7 Jul 2023 15:29:01 +0000 Subject: [PATCH 184/224] cifs: if deferred close is disabled then close files immediately commit df9d70c18616760c6504b97fec66b6379c172dbb upstream. If defer close timeout value is set to 0, then there is no need to include files in the deferred close list and utilize the delayed worker for closing. Instead, we can close them immediately. Signed-off-by: Bharath SM Reviewed-by: Shyam Prasad N Cc: stable@vger.kernel.org Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index 9a367d4c74e4..27c6d14e369f 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -954,8 +954,8 @@ int cifs_close(struct inode *inode, struct file *file) cfile = file->private_data; file->private_data = NULL; dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL); - if ((cinode->oplock == CIFS_CACHE_RHW_FLG) && - cinode->lease_granted && + if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG) + && cinode->lease_granted && !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) && dclose) { if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { From 22fc9fd7230727d00de456e567e82bc56c2c5ca3 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 3 Jul 2023 11:01:42 -0700 Subject: [PATCH 185/224] xtensa: ISS: fix call to split_if_spec commit bc8d5916541fa19ca5bc598eb51a5f78eb891a36 upstream. split_if_spec expects a NULL-pointer as an end marker for the argument list, but tuntap_probe never supplied that terminating NULL. As a result incorrectly formatted interface specification string may cause a crash because of the random memory access. Fix that by adding NULL terminator to the split_if_spec argument list. Cc: stable@vger.kernel.org Fixes: 7282bee78798 ("[PATCH] xtensa: Architecture support for Tensilica Xtensa Part 8") Signed-off-by: Max Filippov Signed-off-by: Greg Kroah-Hartman --- arch/xtensa/platforms/iss/network.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index 9ac46ab3a296..119345eeb04c 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -237,7 +237,7 @@ static int tuntap_probe(struct iss_net_private *lp, int index, char *init) init += sizeof(TRANSPORT_TUNTAP_NAME) - 1; if (*init == ',') { - rem = split_if_spec(init + 1, &mac_str, &dev_name); + rem = split_if_spec(init + 1, &mac_str, &dev_name, NULL); if (rem != NULL) { pr_err("%s: extra garbage on specification : '%s'\n", dev->name, rem); From 15ec83da431184c74b73ce2874357961e587f8ca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 4 Jul 2023 11:15:15 -0700 Subject: [PATCH 186/224] perf/x86: Fix lockdep warning in for_each_sibling_event() on SPR commit 27c68c216ee1f1b086e789a64486e6511e380b8a upstream. On SPR, the load latency event needs an auxiliary event in the same group to work properly. There's a check in intel_pmu_hw_config() for this to iterate sibling events and find a mem-loads-aux event. The for_each_sibling_event() has a lockdep assert to make sure if it disabled hardirq or hold leader->ctx->mutex. This works well if the given event has a separate leader event since perf_try_init_event() grabs the leader->ctx->mutex to protect the sibling list. But it can cause a problem when the event itself is a leader since the event is not initialized yet and there's no ctx for the event. Actually I got a lockdep warning when I run the below command on SPR, but I guess it could be a NULL pointer dereference. $ perf record -d -e cpu/mem-loads/uP true The code path to the warning is: sys_perf_event_open() perf_event_alloc() perf_init_event() perf_try_init_event() x86_pmu_event_init() hsw_hw_config() intel_pmu_hw_config() for_each_sibling_event() lockdep_assert_event_ctx() We don't need for_each_sibling_event() when it's a standalone event. Let's return the error code directly. Fixes: f3c0eba28704 ("perf: Add a few assertions") Reported-by: Greg Thelen Signed-off-by: Namhyung Kim Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20230704181516.3293665-1-namhyung@kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 5a1d0ea402e4..2fb5e1541efc 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3975,6 +3975,13 @@ static int intel_pmu_hw_config(struct perf_event *event) struct perf_event *leader = event->group_leader; struct perf_event *sibling = NULL; + /* + * When this memload event is also the first event (no group + * exists yet), then there is no aux event before it. + */ + if (leader == event) + return -ENODATA; + if (!is_mem_loads_aux_event(leader)) { for_each_sibling_event(sibling, leader) { if (is_mem_loads_aux_event(sibling)) From 9a2c57fd328486b6f36ba99435a1ec4907617a32 Mon Sep 17 00:00:00 2001 From: Chungkai Yang Date: Wed, 5 Jul 2023 16:59:07 +0800 Subject: [PATCH 187/224] PM: QoS: Restore support for default value on frequency QoS commit 3a8395b565b5b4f019b3dc182be4c4541eb35ac8 upstream. Commit 8d36694245f2 ("PM: QoS: Add check to make sure CPU freq is non-negative") makes sure CPU freq is non-negative to avoid negative value converting to unsigned data type. However, when the value is PM_QOS_DEFAULT_VALUE, pm_qos_update_target specifically uses c->default_value which is set to FREQ_QOS_MIN/MAX_DEFAULT_VALUE when cpufreq_policy_alloc is executed, for this case handling. Adding check for PM_QOS_DEFAULT_VALUE to let default setting work will fix this problem. Fixes: 8d36694245f2 ("PM: QoS: Add check to make sure CPU freq is non-negative") Link: https://lore.kernel.org/lkml/20230626035144.19717-1-Chung-kai.Yang@mediatek.com/ Link: https://lore.kernel.org/lkml/20230627071727.16646-1-Chung-kai.Yang@mediatek.com/ Link: https://lore.kernel.org/lkml/CAJZ5v0gxNOWhC58PHeUhW_tgf6d1fGJVZ1x91zkDdht11yUv-A@mail.gmail.com/ Signed-off-by: Chungkai Yang Cc: 6.0+ # 6.0+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/qos.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/power/qos.c b/kernel/power/qos.c index af51ed6d45ef..782d3b41c1f3 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -426,6 +426,11 @@ late_initcall(cpu_latency_qos_init); /* Definitions related to the frequency QoS below. */ +static inline bool freq_qos_value_invalid(s32 value) +{ + return value < 0 && value != PM_QOS_DEFAULT_VALUE; +} + /** * freq_constraints_init - Initialize frequency QoS constraints. * @qos: Frequency QoS constraints to initialize. @@ -531,7 +536,7 @@ int freq_qos_add_request(struct freq_constraints *qos, { int ret; - if (IS_ERR_OR_NULL(qos) || !req || value < 0) + if (IS_ERR_OR_NULL(qos) || !req || freq_qos_value_invalid(value)) return -EINVAL; if (WARN(freq_qos_request_active(req), @@ -563,7 +568,7 @@ EXPORT_SYMBOL_GPL(freq_qos_add_request); */ int freq_qos_update_request(struct freq_qos_request *req, s32 new_value) { - if (!req || new_value < 0) + if (!req || freq_qos_value_invalid(new_value)) return -EINVAL; if (WARN(!freq_qos_request_active(req), From ba1ede19e601b579fd73eaa0ffc55d40a15318a6 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 24 May 2023 21:47:43 +0200 Subject: [PATCH 188/224] pwm: meson: modify and simplify calculation in meson_pwm_get_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6b9352f3f8a1a35faf0efc1ad1807ee303467796 upstream. I don't see a reason why we should treat the case lo < hi differently and return 0 as period and duty_cycle. The current logic was added with c375bcbaabdb ("pwm: meson: Read the full hardware state in meson_pwm_get_state()"), Martin as original author doesn't remember why it was implemented this way back then. So let's handle it as normal use case and also remove the optimization for lo == 0. I think the improved readability is worth it. Fixes: c375bcbaabdb ("pwm: meson: Read the full hardware state in meson_pwm_get_state()") Reviewed-by: Uwe Kleine-König Reviewed-by: Dmitry Rokosov Acked-by: Martin Blumenstingl Cc: stable@vger.kernel.org Signed-off-by: Heiner Kallweit Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-meson.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 5732300eb004..3865538dd2d6 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -351,18 +351,8 @@ static int meson_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm, channel->lo = FIELD_GET(PWM_LOW_MASK, value); channel->hi = FIELD_GET(PWM_HIGH_MASK, value); - if (channel->lo == 0) { - state->period = meson_pwm_cnt_to_ns(chip, pwm, channel->hi); - state->duty_cycle = state->period; - } else if (channel->lo >= channel->hi) { - state->period = meson_pwm_cnt_to_ns(chip, pwm, - channel->lo + channel->hi); - state->duty_cycle = meson_pwm_cnt_to_ns(chip, pwm, - channel->hi); - } else { - state->period = 0; - state->duty_cycle = 0; - } + state->period = meson_pwm_cnt_to_ns(chip, pwm, channel->lo + channel->hi); + state->duty_cycle = meson_pwm_cnt_to_ns(chip, pwm, channel->hi); state->polarity = PWM_POLARITY_NORMAL; From 2e9a46e46786bc8822dd8575cdd9292e2796d6bc Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 24 May 2023 21:48:36 +0200 Subject: [PATCH 189/224] pwm: meson: fix handling of period/duty if greater than UINT_MAX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 87a2cbf02d7701255f9fcca7e5bd864a7bb397cf upstream. state->period/duty are of type u64, and if their value is greater than UINT_MAX, then the cast to uint will cause problems. Fix this by changing the type of the respective local variables to u64. Fixes: b79c3670e120 ("pwm: meson: Don't duplicate the polarity internally") Cc: stable@vger.kernel.org Suggested-by: Uwe Kleine-König Reviewed-by: Martin Blumenstingl Signed-off-by: Heiner Kallweit Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- drivers/pwm/pwm-meson.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 3865538dd2d6..33107204a951 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -156,8 +156,9 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, const struct pwm_state *state) { struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; - unsigned int duty, period, pre_div, cnt, duty_cnt; + unsigned int pre_div, cnt, duty_cnt; unsigned long fin_freq; + u64 duty, period; duty = state->duty_cycle; period = state->period; @@ -179,19 +180,19 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, dev_dbg(meson->chip.dev, "fin_freq: %lu Hz\n", fin_freq); - pre_div = div64_u64(fin_freq * (u64)period, NSEC_PER_SEC * 0xffffLL); + pre_div = div64_u64(fin_freq * period, NSEC_PER_SEC * 0xffffLL); if (pre_div > MISC_CLK_DIV_MASK) { dev_err(meson->chip.dev, "unable to get period pre_div\n"); return -EINVAL; } - cnt = div64_u64(fin_freq * (u64)period, NSEC_PER_SEC * (pre_div + 1)); + cnt = div64_u64(fin_freq * period, NSEC_PER_SEC * (pre_div + 1)); if (cnt > 0xffff) { dev_err(meson->chip.dev, "unable to get period cnt\n"); return -EINVAL; } - dev_dbg(meson->chip.dev, "period=%u pre_div=%u cnt=%u\n", period, + dev_dbg(meson->chip.dev, "period=%llu pre_div=%u cnt=%u\n", period, pre_div, cnt); if (duty == period) { @@ -204,14 +205,13 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, channel->lo = cnt; } else { /* Then check is we can have the duty with the same pre_div */ - duty_cnt = div64_u64(fin_freq * (u64)duty, - NSEC_PER_SEC * (pre_div + 1)); + duty_cnt = div64_u64(fin_freq * duty, NSEC_PER_SEC * (pre_div + 1)); if (duty_cnt > 0xffff) { dev_err(meson->chip.dev, "unable to get duty cycle\n"); return -EINVAL; } - dev_dbg(meson->chip.dev, "duty=%u pre_div=%u duty_cnt=%u\n", + dev_dbg(meson->chip.dev, "duty=%llu pre_div=%u duty_cnt=%u\n", duty, pre_div, duty_cnt); channel->pre_div = pre_div; From ce3ec57faff559ccae1e0150c1f077eb2df648a4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 15 Jun 2023 13:52:36 +0200 Subject: [PATCH 190/224] fprobe: Release rethook after the ftrace_ops is unregistered commit 5f81018753dfd4989e33ece1f0cb6b8aae498b82 upstream. While running bpf selftests it's possible to get following fault: general protection fault, probably for non-canonical address \ 0x6b6b6b6b6b6b6b6b: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC NOPTI ... Call Trace: fprobe_handler+0xc1/0x270 ? __pfx_bpf_testmod_init+0x10/0x10 ? __pfx_bpf_testmod_init+0x10/0x10 ? bpf_fentry_test1+0x5/0x10 ? bpf_fentry_test1+0x5/0x10 ? bpf_testmod_init+0x22/0x80 ? do_one_initcall+0x63/0x2e0 ? rcu_is_watching+0xd/0x40 ? kmalloc_trace+0xaf/0xc0 ? do_init_module+0x60/0x250 ? __do_sys_finit_module+0xac/0x120 ? do_syscall_64+0x37/0x90 ? entry_SYSCALL_64_after_hwframe+0x72/0xdc In unregister_fprobe function we can't release fp->rethook while it's possible there are some of its users still running on another cpu. Moving rethook_free call after fp->ops is unregistered with unregister_ftrace_function call. Link: https://lore.kernel.org/all/20230615115236.3476617-1-jolsa@kernel.org/ Fixes: 5b0ab78998e3 ("fprobe: Add exit_handler support") Cc: stable@vger.kernel.org Reviewed-by: Steven Rostedt (Google) Signed-off-by: Jiri Olsa Acked-by: Masami Hiramatsu (Google) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/fprobe.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index e8143e368074..ab8cd71f410d 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -307,19 +307,13 @@ int unregister_fprobe(struct fprobe *fp) fp->ops.saved_func != fprobe_kprobe_handler)) return -EINVAL; - /* - * rethook_free() starts disabling the rethook, but the rethook handlers - * may be running on other processors at this point. To make sure that all - * current running handlers are finished, call unregister_ftrace_function() - * after this. - */ - if (fp->rethook) - rethook_free(fp->rethook); - ret = unregister_ftrace_function(&fp->ops); if (ret < 0) return ret; + if (fp->rethook) + rethook_free(fp->rethook); + ftrace_free_filter(&fp->ops); return ret; From fbcd0c2b569f7b8e25c3f2c51fd59be2cb327530 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Fri, 7 Jul 2023 23:03:19 +0900 Subject: [PATCH 191/224] fprobe: Ensure running fprobe_exit_handler() finished before calling rethook_free() commit 195b9cb5b288fec1c871ef89f78cc9a7461aad3a upstream. Ensure running fprobe_exit_handler() has finished before calling rethook_free() in the unregister_fprobe() so that caller can free the fprobe right after unregister_fprobe(). unregister_fprobe() ensured that all running fprobe_entry/exit_handler() have finished by calling unregister_ftrace_function() which synchronizes RCU. But commit 5f81018753df ("fprobe: Release rethook after the ftrace_ops is unregistered") changed to call rethook_free() after unregister_ftrace_function(). So call rethook_stop() to make rethook disabled before unregister_ftrace_function() and ensure it again. Here is the possible code flow that can call the exit handler after unregister_fprobe(). ------ CPU1 CPU2 call unregister_fprobe(fp) ... __fprobe_handler() rethook_hook() on probed function unregister_ftrace_function() return from probed function rethook hooks find rh->handler == fprobe_exit_handler call fprobe_exit_handler() rethook_free(): set rh->handler = NULL; return from unreigster_fprobe; call fp->exit_handler() <- (*) ------ (*) At this point, the exit handler is called after returning from unregister_fprobe(). This fixes it as following; ------ CPU1 CPU2 call unregister_fprobe() ... rethook_stop(): set rh->handler = NULL; __fprobe_handler() rethook_hook() on probed function unregister_ftrace_function() return from probed function rethook hooks find rh->handler == NULL return from rethook rethook_free() return from unreigster_fprobe; ------ Link: https://lore.kernel.org/all/168873859949.156157.13039240432299335849.stgit@devnote2/ Fixes: 5f81018753df ("fprobe: Release rethook after the ftrace_ops is unregistered") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- include/linux/rethook.h | 1 + kernel/trace/fprobe.c | 3 +++ kernel/trace/rethook.c | 13 +++++++++++++ 3 files changed, 17 insertions(+) diff --git a/include/linux/rethook.h b/include/linux/rethook.h index c8ac1e5afcd1..bdbe6717f45a 100644 --- a/include/linux/rethook.h +++ b/include/linux/rethook.h @@ -59,6 +59,7 @@ struct rethook_node { }; struct rethook *rethook_alloc(void *data, rethook_handler_t handler); +void rethook_stop(struct rethook *rh); void rethook_free(struct rethook *rh); void rethook_add_node(struct rethook *rh, struct rethook_node *node); struct rethook_node *rethook_try_get(struct rethook *rh); diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index ab8cd71f410d..1322247ce648 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -307,6 +307,9 @@ int unregister_fprobe(struct fprobe *fp) fp->ops.saved_func != fprobe_kprobe_handler)) return -EINVAL; + if (fp->rethook) + rethook_stop(fp->rethook); + ret = unregister_ftrace_function(&fp->ops); if (ret < 0) return ret; diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c index 60f6cb2b486b..468006cce7ca 100644 --- a/kernel/trace/rethook.c +++ b/kernel/trace/rethook.c @@ -53,6 +53,19 @@ static void rethook_free_rcu(struct rcu_head *head) kfree(rh); } +/** + * rethook_stop() - Stop using a rethook. + * @rh: the struct rethook to stop. + * + * Stop using a rethook to prepare for freeing it. If you want to wait for + * all running rethook handler before calling rethook_free(), you need to + * call this first and wait RCU, and call rethook_free(). + */ +void rethook_stop(struct rethook *rh) +{ + WRITE_ONCE(rh->handler, NULL); +} + /** * rethook_free() - Free struct rethook. * @rh: the struct rethook to be freed. From 938d5b7a75e18264887387ddf9169db6d8aeef98 Mon Sep 17 00:00:00 2001 From: Mateusz Stachyra Date: Tue, 4 Jul 2023 12:27:06 +0200 Subject: [PATCH 192/224] tracing: Fix null pointer dereference in tracing_err_log_open() commit 02b0095e2fbbc060560c1065f86a211d91e27b26 upstream. Fix an issue in function 'tracing_err_log_open'. The function doesn't call 'seq_open' if the file is opened only with write permissions, which results in 'file->private_data' being left as null. If we then use 'lseek' on that opened file, 'seq_lseek' dereferences 'file->private_data' in 'mutex_lock(&m->lock)', resulting in a kernel panic. Writing to this node requires root privileges, therefore this bug has very little security impact. Tracefs node: /sys/kernel/tracing/error_log Example Kernel panic: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000038 Call trace: mutex_lock+0x30/0x110 seq_lseek+0x34/0xb8 __arm64_sys_lseek+0x6c/0xb8 invoke_syscall+0x58/0x13c el0_svc_common+0xc4/0x10c do_el0_svc+0x24/0x98 el0_svc+0x24/0x88 el0t_64_sync_handler+0x84/0xe4 el0t_64_sync+0x1b4/0x1b8 Code: d503201f aa0803e0 aa1f03e1 aa0103e9 (c8e97d02) ---[ end trace 561d1b49c12cf8a5 ]--- Kernel panic - not syncing: Oops: Fatal exception Link: https://lore.kernel.org/linux-trace-kernel/20230703155237eucms1p4dfb6a19caa14c79eb6c823d127b39024@eucms1p4 Link: https://lore.kernel.org/linux-trace-kernel/20230704102706eucms1p30d7ecdcc287f46ad67679fc8491b2e0f@eucms1p3 Cc: stable@vger.kernel.org Fixes: 8a062902be725 ("tracing: Add tracing error log") Signed-off-by: Mateusz Stachyra Suggested-by: Steven Rostedt Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2849faac4cf2..27bbe180a2ef 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8062,7 +8062,7 @@ static const struct file_operations tracing_err_log_fops = { .open = tracing_err_log_open, .write = tracing_err_log_write, .read = seq_read, - .llseek = seq_lseek, + .llseek = tracing_lseek, .release = tracing_err_log_release, }; From 671486793f729fa8d79f5bd1f6224a2af00c2d63 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:35 +0200 Subject: [PATCH 193/224] selftests: mptcp: connect: fail if nft supposed to work commit 221e4550454a822f9a11834e30694c7d1d65747c upstream. In case of "external" errors when preparing the environment for the TProxy tests, the subtests were marked as skipped. This is fine but it means these errors are ignored. On MPTCP Public CI, we do want to catch such issues and mark the selftest as failed if there are such issues. We can then use mptcp_lib_fail_if_expected_feature() helper that has been recently added to fail if needed. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 5fb62e9cd3ad ("selftests: mptcp: add tproxy test case") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 36dc2bab7a13..18c9b00ca058 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -719,6 +719,7 @@ table inet mangle { EOF if [ $? -ne 0 ]; then echo "SKIP: $msg, could not load nft ruleset" + mptcp_lib_fail_if_expected_feature "nft rules" return fi @@ -734,6 +735,7 @@ EOF if [ $? -ne 0 ]; then ip netns exec "$listener_ns" nft flush ruleset echo "SKIP: $msg, ip $r6flag rule failed" + mptcp_lib_fail_if_expected_feature "ip rule" return fi @@ -742,6 +744,7 @@ EOF ip netns exec "$listener_ns" nft flush ruleset ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 echo "SKIP: $msg, ip route add local $local_addr failed" + mptcp_lib_fail_if_expected_feature "ip route" return fi From c118baa05fb9a9f6acd806bb45e0c9c0e41aaf16 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:37 +0200 Subject: [PATCH 194/224] selftests: mptcp: sockopt: return error if wrong mark commit 9ac4c28eb70cd5ea5472a5e1c495dcdd597d4597 upstream. When an error was detected when checking the marks, a message was correctly printed mentioning the error but followed by another one saying everything was OK and the selftest was not marked as failed as expected. Now the 'ret' variable is directly set to 1 in order to make sure the exit is done with an error, similar to what is done in other functions. While at it, the error is correctly propagated to the caller. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: dc65fe82fb07 ("selftests: mptcp: add packet mark test case") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/mptcp_sockopt.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index a493eaf8633f..af4fccd4f5cc 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -121,6 +121,7 @@ check_mark() for v in $values; do if [ $v -ne 0 ]; then echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2 + ret=1 return 1 fi done @@ -220,11 +221,11 @@ do_transfer() fi if [ $local_addr = "::" ];then - check_mark $listener_ns 6 - check_mark $connector_ns 6 + check_mark $listener_ns 6 || retc=1 + check_mark $connector_ns 6 || retc=1 else - check_mark $listener_ns 4 - check_mark $connector_ns 4 + check_mark $listener_ns 4 || retc=1 + check_mark $connector_ns 4 || retc=1 fi check_transfer $cin $sout "file received by server" From 4098a43182984a96a570b9b901b0d151168cc1c4 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:38 +0200 Subject: [PATCH 195/224] selftests: mptcp: userspace_pm: use correct server port commit d8566d0e03922217f70d9be2d401fcb860986374 upstream. "server4_port" variable is not set but "app4_port" is the server port in v4 and the correct variable name to use. The port is optional so there was no visible impact. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: ca188a25d43f ("selftests: mptcp: userspace PM support for MP_PRIO signals") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index eb0f4f6afebd..5e6ce5ded260 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -847,7 +847,7 @@ test_prio() local count # Send MP_PRIO signal from client to server machine - ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$server4_port" + ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$app4_port" sleep 0.5 # Check TX From 08daab11f344cae8b8577b68e9ce8efd076dcca7 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:39 +0200 Subject: [PATCH 196/224] selftests: mptcp: userspace_pm: report errors with 'remove' tests commit 966c6c3adfb1257ea8a839cdfad2b74092cc5532 upstream. A message was mentioning an issue with the "remove" tests but the selftest was not marked as failed. Directly exit with an error like it is done everywhere else in this selftest. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/368 Fixes: 259a834fadda ("selftests: mptcp: functional tests for the userspace PM type") Cc: stable@vger.kernel.org Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 5e6ce5ded260..cb6c28d40129 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -387,6 +387,7 @@ test_remove() stdbuf -o0 -e0 printf "[OK]\n" else stdbuf -o0 -e0 printf "[FAIL]\n" + exit 1 fi # RM_ADDR using an invalid addr id should result in no action @@ -401,6 +402,7 @@ test_remove() stdbuf -o0 -e0 printf "[OK]\n" else stdbuf -o0 -e0 printf "[FAIL]\n" + exit 1 fi # RM_ADDR from the client to server machine From ee352299a678b756e4ab6d851aa35dc7d597791e Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:40 +0200 Subject: [PATCH 197/224] selftests: mptcp: depend on SYN_COOKIES commit 6c8880fcaa5c45355179b759c1d11737775e31fc upstream. MPTCP selftests are using TCP SYN Cookies for quite a while now, since v5.9. Some CIs don't have this config option enabled and this is causing issues in the tests: # ns1 MPTCP -> ns1 (10.0.1.1:10000 ) MPTCP (duration 167ms) sysctl: cannot stat /proc/sys/net/ipv4/tcp_syncookies: No such file or directory # [ OK ]./mptcp_connect.sh: line 554: [: -eq: unary operator expected There is no impact in the results but the test is not doing what it is supposed to do. Fixes: fed61c4b584c ("selftests: mptcp: make 2nd net namespace use tcp syn cookies unconditionally") Cc: stable@vger.kernel.org Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/config | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 6032f9b23c4c..e317c2e44dae 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -6,6 +6,7 @@ CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m +CONFIG_SYN_COOKIES=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m From 2f41d35b58c896f4b104d31c2f8363b58ae478e4 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Tue, 4 Jul 2023 22:44:41 +0200 Subject: [PATCH 198/224] selftests: mptcp: pm_nl_ctl: fix 32-bit support commit 61d9658050260dbcbf9055479b7ac5bbbe1e8831 upstream. When using pm_nl_ctl to validate userspace path-manager's behaviours, it was failing on 32-bit architectures ~half of the time. pm_nl_ctl was not reporting any error but the command was not doing what it was expected to do. As a result, the expected linked event was not triggered after and the test failed. This is due to the fact the token given in argument to the application was parsed as an integer with atoi(): in a 32-bit arch, if the number was bigger than INT_MAX, 2147483647 was used instead. This can simply be fixed by using strtoul() instead of atoi(). The errors have been seen "by chance" when manually looking at the results from LKFT. Fixes: 9a0b36509df0 ("selftests: mptcp: support MPTCP_PM_CMD_ANNOUNCE") Cc: stable@vger.kernel.org Fixes: ecd2a77d672f ("selftests: mptcp: support MPTCP_PM_CMD_REMOVE") Fixes: cf8d0a6dfd64 ("selftests: mptcp: support MPTCP_PM_CMD_SUBFLOW_CREATE") Fixes: 57cc361b8d38 ("selftests: mptcp: support MPTCP_PM_CMD_SUBFLOW_DESTROY") Fixes: ca188a25d43f ("selftests: mptcp: userspace PM support for MP_PRIO signals") Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/net/mptcp/pm_nl_ctl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index abddf4c63e79..1887bd61bd9a 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -425,7 +425,7 @@ int dsf(int fd, int pm_family, int argc, char *argv[]) } /* token */ - token = atoi(params[4]); + token = strtoul(params[4], NULL, 10); rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ATTR_TOKEN; rta->rta_len = RTA_LENGTH(4); @@ -551,7 +551,7 @@ int csf(int fd, int pm_family, int argc, char *argv[]) } /* token */ - token = atoi(params[4]); + token = strtoul(params[4], NULL, 10); rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ATTR_TOKEN; rta->rta_len = RTA_LENGTH(4); @@ -598,7 +598,7 @@ int remove_addr(int fd, int pm_family, int argc, char *argv[]) if (++arg >= argc) error(1, 0, " missing token value"); - token = atoi(argv[arg]); + token = strtoul(argv[arg], NULL, 10); rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ATTR_TOKEN; rta->rta_len = RTA_LENGTH(4); @@ -710,7 +710,7 @@ int announce_addr(int fd, int pm_family, int argc, char *argv[]) if (++arg >= argc) error(1, 0, " missing token value"); - token = atoi(argv[arg]); + token = strtoul(argv[arg], NULL, 10); } else error(1, 0, "unknown keyword %s", argv[arg]); } @@ -1347,7 +1347,7 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) error(1, 0, " missing token value"); /* token */ - token = atoi(argv[arg]); + token = strtoul(argv[arg], NULL, 10); } else if (!strcmp(argv[arg], "flags")) { char *tok, *str; From 837f92d27f5544468b5b9f3ace6fde5ec965fd53 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 11 Jul 2023 23:15:38 +0900 Subject: [PATCH 199/224] tracing/probes: Fix not to count error code to total length commit b41326b5e0f82e93592c4366359917b5d67b529f upstream. Fix not to count the error code (which is minus value) to the total used length of array, because it can mess up the return code of process_fetch_insn_bottom(). Also clear the 'ret' value because it will be used for calculating next data_loc entry. Link: https://lore.kernel.org/all/168908493827.123124.2175257289106364229.stgit@devnote2/ Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/8819b154-2ba1-43c3-98a2-cbde20892023@moroto.mountain/ Fixes: 9b960a38835f ("tracing: probeevent: Unify fetch_insn processing common part") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_probe_tmpl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index b3bdb8ddb862..5db3e691392f 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -143,6 +143,8 @@ stage3: array: /* the last stage: Loop on array */ if (code->op == FETCH_OP_LP_ARRAY) { + if (ret < 0) + ret = 0; total += ret; if (++i < code->param) { code = s3; From a95c1fede27d143733116231ce47a969ab7500c0 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Tue, 11 Jul 2023 23:15:48 +0900 Subject: [PATCH 200/224] tracing/probes: Fix to update dynamic data counter if fetcharg uses it commit e38e2c6a9efc435f9de344b7c91f7697e01b47d5 upstream. Fix to update dynamic data counter ('dyndata') and max length ('maxlen') only if the fetcharg uses the dynamic data. Also get out arg->dynamic from unlikely(). This makes dynamic data address wrong if process_fetch_insn() returns error on !arg->dynamic case. Link: https://lore.kernel.org/all/168908494781.123124.8160245359962103684.stgit@devnote2/ Suggested-by: Steven Rostedt Link: https://lore.kernel.org/all/20230710233400.5aaf024e@gandalf.local.home/ Fixes: 9178412ddf5a ("tracing: probeevent: Return consumed bytes of dynamic area") Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu (Google) Reviewed-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_probe_tmpl.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace_probe_tmpl.h b/kernel/trace/trace_probe_tmpl.h index 5db3e691392f..c293a607d536 100644 --- a/kernel/trace/trace_probe_tmpl.h +++ b/kernel/trace/trace_probe_tmpl.h @@ -206,11 +206,13 @@ store_trace_args(void *data, struct trace_probe *tp, void *rec, if (unlikely(arg->dynamic)) *dl = make_data_loc(maxlen, dyndata - base); ret = process_fetch_insn(arg->code, rec, dl, base); - if (unlikely(ret < 0 && arg->dynamic)) { - *dl = make_data_loc(0, dyndata - base); - } else { - dyndata += ret; - maxlen -= ret; + if (arg->dynamic) { + if (unlikely(ret < 0)) { + *dl = make_data_loc(0, dyndata - base); + } else { + dyndata += ret; + maxlen -= ret; + } } } } From 95e34129f37ef68db70c8f3effc92f93ec3c5607 Mon Sep 17 00:00:00 2001 From: Beau Belgrave Date: Thu, 29 Jun 2023 23:50:48 +0000 Subject: [PATCH 201/224] tracing/user_events: Fix struct arg size match check commit d0a3022f30629a208e5944022caeca3568add9e7 upstream. When users register an event the name of the event and it's argument are checked to ensure they match if the event already exists. Normally all arguments are in the form of "type name", except for when the type starts with "struct ". In those cases, the size of the struct is passed in addition to the name, IE: "struct my_struct a 20" for an argument that is of type "struct my_struct" with a field name of "a" and has the size of 20 bytes. The current code does not honor the above case properly when comparing a match. This causes the event register to fail even when the same string was used for events that contain a struct argument within them. The example above "struct my_struct a 20" generates a match string of "struct my_struct a" omitting the size field. Add the struct size of the existing field when generating a comparison string for a struct field to ensure proper match checking. Link: https://lkml.kernel.org/r/20230629235049.581-2-beaub@linux.microsoft.com Cc: stable@vger.kernel.org Fixes: e6f89a149872 ("tracing/user_events: Ensure user provided strings are safely formatted") Signed-off-by: Beau Belgrave Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_user.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 492837609294..bff699fe9e71 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -707,6 +707,9 @@ static int user_field_set_string(struct ftrace_event_field *field, pos += snprintf(buf + pos, LEN_OR_ZERO, " "); pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", field->name); + if (str_has_prefix(field->type, "struct ")) + pos += snprintf(buf + pos, LEN_OR_ZERO, " %d", field->size); + if (colon) pos += snprintf(buf + pos, LEN_OR_ZERO, ";"); From ff92567d906e825355e6530278eb336239511149 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 28 Apr 2023 00:53:33 -0700 Subject: [PATCH 202/224] scsi: qla2xxx: Multi-que support for TMF commit d90171dd0da50212f5950cc708240831e82f2f91 upstream. Add queue flush for task management command, before placing it on the wire. Do IO flush for all Request Q's. Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202304271702.GpIL391S-lkp@intel.com/ Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230428075339.32551-2-njavali@marvell.com Reviewed-by: Himanshu Madhani > Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_def.h | 8 ++++ drivers/scsi/qla2xxx/qla_gbl.h | 2 +- drivers/scsi/qla2xxx/qla_init.c | 73 +++++++++++++++++++++++++-------- drivers/scsi/qla2xxx/qla_iocb.c | 5 ++- 4 files changed, 68 insertions(+), 20 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index cd4eb11b0707..525a694bd738 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -457,6 +457,14 @@ static inline be_id_t port_id_to_be_id(port_id_t port_id) return res; } +struct tmf_arg { + struct qla_qpair *qpair; + struct fc_port *fcport; + struct scsi_qla_host *vha; + u64 lun; + u32 flags; +}; + struct els_logo_payload { uint8_t opcode; uint8_t rsvd[3]; diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index ee54207fc531..316122709b0e 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -69,7 +69,7 @@ extern int qla2x00_async_logout(struct scsi_qla_host *, fc_port_t *); extern int qla2x00_async_prlo(struct scsi_qla_host *, fc_port_t *); extern int qla2x00_async_adisc(struct scsi_qla_host *, fc_port_t *, uint16_t *); -extern int qla2x00_async_tm_cmd(fc_port_t *, uint32_t, uint32_t, uint32_t); +extern int qla2x00_async_tm_cmd(fc_port_t *, uint32_t, uint64_t, uint32_t); struct qla_work_evt *qla2x00_alloc_work(struct scsi_qla_host *, enum qla_work_type); extern int qla24xx_async_gnl(struct scsi_qla_host *, fc_port_t *); diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 93f7f3dd5d82..d449e0f707c4 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2021,17 +2021,19 @@ static void qla2x00_tmf_sp_done(srb_t *sp, int res) complete(&tmf->u.tmf.comp); } -int -qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, - uint32_t tag) +static int +__qla2x00_async_tm_cmd(struct tmf_arg *arg) { - struct scsi_qla_host *vha = fcport->vha; + struct scsi_qla_host *vha = arg->vha; struct srb_iocb *tm_iocb; srb_t *sp; + unsigned long flags; int rval = QLA_FUNCTION_FAILED; + fc_port_t *fcport = arg->fcport; + /* ref: INIT */ - sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); + sp = qla2xxx_get_qpair_sp(vha, arg->qpair, fcport, GFP_KERNEL); if (!sp) goto done; @@ -2044,15 +2046,15 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, tm_iocb = &sp->u.iocb_cmd; init_completion(&tm_iocb->u.tmf.comp); - tm_iocb->u.tmf.flags = flags; - tm_iocb->u.tmf.lun = lun; - - ql_dbg(ql_dbg_taskm, vha, 0x802f, - "Async-tmf hdl=%x loop-id=%x portid=%02x%02x%02x.\n", - sp->handle, fcport->loop_id, fcport->d_id.b.domain, - fcport->d_id.b.area, fcport->d_id.b.al_pa); + tm_iocb->u.tmf.flags = arg->flags; + tm_iocb->u.tmf.lun = arg->lun; rval = qla2x00_start_sp(sp); + ql_dbg(ql_dbg_taskm, vha, 0x802f, + "Async-tmf hdl=%x loop-id=%x portid=%02x%02x%02x ctrl=%x.\n", + sp->handle, fcport->loop_id, fcport->d_id.b.domain, + fcport->d_id.b.area, fcport->d_id.b.al_pa, arg->flags); + if (rval != QLA_SUCCESS) goto done_free_sp; wait_for_completion(&tm_iocb->u.tmf.comp); @@ -2066,12 +2068,14 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, if (!test_bit(UNLOADING, &vha->dpc_flags) && !IS_QLAFX00(vha->hw)) { flags = tm_iocb->u.tmf.flags; - lun = (uint16_t)tm_iocb->u.tmf.lun; + if (flags & (TCF_LUN_RESET|TCF_ABORT_TASK_SET| + TCF_CLEAR_TASK_SET|TCF_CLEAR_ACA)) + flags = MK_SYNC_ID_LUN; + else + flags = MK_SYNC_ID; - /* Issue Marker IOCB */ - qla2x00_marker(vha, vha->hw->base_qpair, - fcport->loop_id, lun, - flags == TCF_LUN_RESET ? MK_SYNC_ID_LUN : MK_SYNC_ID); + qla2x00_marker(vha, sp->qpair, + sp->fcport->loop_id, arg->lun, flags); } done_free_sp: @@ -2081,6 +2085,41 @@ done: return rval; } +int +qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun, + uint32_t tag) +{ + struct scsi_qla_host *vha = fcport->vha; + struct qla_qpair *qpair; + struct tmf_arg a; + struct completion comp; + int i, rval; + + init_completion(&comp); + a.vha = fcport->vha; + a.fcport = fcport; + a.lun = lun; + + if (vha->hw->mqenable) { + for (i = 0; i < vha->hw->num_qpairs; i++) { + qpair = vha->hw->queue_pair_map[i]; + if (!qpair) + continue; + a.qpair = qpair; + a.flags = flags|TCF_NOTMCMD_TO_TARGET; + rval = __qla2x00_async_tm_cmd(&a); + if (rval) + break; + } + } + + a.qpair = vha->hw->base_qpair; + a.flags = flags; + rval = __qla2x00_async_tm_cmd(&a); + + return rval; +} + int qla24xx_async_abort_command(srb_t *sp) { diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 605e94f97318..6ae337cc5a4c 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2541,7 +2541,7 @@ qla24xx_tm_iocb(srb_t *sp, struct tsk_mgmt_entry *tsk) scsi_qla_host_t *vha = fcport->vha; struct qla_hw_data *ha = vha->hw; struct srb_iocb *iocb = &sp->u.iocb_cmd; - struct req_que *req = vha->req; + struct req_que *req = sp->qpair->req; flags = iocb->u.tmf.flags; lun = iocb->u.tmf.lun; @@ -2557,7 +2557,8 @@ qla24xx_tm_iocb(srb_t *sp, struct tsk_mgmt_entry *tsk) tsk->port_id[2] = fcport->d_id.b.domain; tsk->vp_index = fcport->vha->vp_idx; - if (flags == TCF_LUN_RESET) { + if (flags & (TCF_LUN_RESET | TCF_ABORT_TASK_SET| + TCF_CLEAR_TASK_SET|TCF_CLEAR_ACA)) { int_to_scsilun(lun, &tsk->lun); host_to_fcp_swap((uint8_t *)&tsk->lun, sizeof(tsk->lun)); From 843665c4266d61399c814fd721245cf10f2c3ba3 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 28 Apr 2023 00:53:34 -0700 Subject: [PATCH 203/224] scsi: qla2xxx: Fix task management cmd failure commit 9803fb5d27597ea98f2e05b0b6cfc48ae808458e upstream. Task management cmd failed with status 30h which means FW is not able to finish processing one task management before another task management for the same lun. Hence add wait for completion of marker to space it out. Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202304271802.uCZfwQC1-lkp@intel.com/ Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230428075339.32551-3-njavali@marvell.com Reviewed-by: Himanshu Madhani > Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_def.h | 6 ++ drivers/scsi/qla2xxx/qla_init.c | 102 +++++++++++++++++++++++++++----- drivers/scsi/qla2xxx/qla_iocb.c | 28 +++++++-- drivers/scsi/qla2xxx/qla_isr.c | 26 +++++++- 4 files changed, 139 insertions(+), 23 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 525a694bd738..62ee5d35115a 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -463,6 +463,7 @@ struct tmf_arg { struct scsi_qla_host *vha; u64 lun; u32 flags; + uint8_t modifier; }; struct els_logo_payload { @@ -544,6 +545,10 @@ struct srb_iocb { uint32_t data; struct completion comp; __le16 comp_status; + + uint8_t modifier; + uint8_t vp_index; + uint16_t loop_id; } tmf; struct { #define SRB_FXDISC_REQ_DMA_VALID BIT_0 @@ -647,6 +652,7 @@ struct srb_iocb { #define SRB_SA_UPDATE 25 #define SRB_ELS_CMD_HST_NOLOGIN 26 #define SRB_SA_REPLACE 27 +#define SRB_MARKER 28 struct qla_els_pt_arg { u8 els_opcode; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index d449e0f707c4..ca84190924f1 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2014,6 +2014,80 @@ qla2x00_tmf_iocb_timeout(void *data) } } +static void qla_marker_sp_done(srb_t *sp, int res) +{ + struct srb_iocb *tmf = &sp->u.iocb_cmd; + + if (res != QLA_SUCCESS) + ql_dbg(ql_dbg_taskm, sp->vha, 0x8004, + "Async-marker fail hdl=%x portid=%06x ctrl=%x lun=%lld qp=%d.\n", + sp->handle, sp->fcport->d_id.b24, sp->u.iocb_cmd.u.tmf.flags, + sp->u.iocb_cmd.u.tmf.lun, sp->qpair->id); + + complete(&tmf->u.tmf.comp); +} + +#define START_SP_W_RETRIES(_sp, _rval) \ +{\ + int cnt = 5; \ + do { \ + _rval = qla2x00_start_sp(_sp); \ + if (_rval == EAGAIN) \ + msleep(1); \ + else \ + break; \ + cnt--; \ + } while (cnt); \ +} + +static int +qla26xx_marker(struct tmf_arg *arg) +{ + struct scsi_qla_host *vha = arg->vha; + struct srb_iocb *tm_iocb; + srb_t *sp; + int rval = QLA_FUNCTION_FAILED; + fc_port_t *fcport = arg->fcport; + + /* ref: INIT */ + sp = qla2xxx_get_qpair_sp(vha, arg->qpair, fcport, GFP_KERNEL); + if (!sp) + goto done; + + sp->type = SRB_MARKER; + sp->name = "marker"; + qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha), qla_marker_sp_done); + sp->u.iocb_cmd.timeout = qla2x00_tmf_iocb_timeout; + + tm_iocb = &sp->u.iocb_cmd; + init_completion(&tm_iocb->u.tmf.comp); + tm_iocb->u.tmf.modifier = arg->modifier; + tm_iocb->u.tmf.lun = arg->lun; + tm_iocb->u.tmf.loop_id = fcport->loop_id; + tm_iocb->u.tmf.vp_index = vha->vp_idx; + + START_SP_W_RETRIES(sp, rval); + + ql_dbg(ql_dbg_taskm, vha, 0x8006, + "Async-marker hdl=%x loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d rval %d.\n", + sp->handle, fcport->loop_id, fcport->d_id.b24, + arg->modifier, arg->lun, sp->qpair->id, rval); + + if (rval != QLA_SUCCESS) { + ql_log(ql_log_warn, vha, 0x8031, + "Marker IOCB failed (%x).\n", rval); + goto done_free_sp; + } + + wait_for_completion(&tm_iocb->u.tmf.comp); + +done_free_sp: + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); +done: + return rval; +} + static void qla2x00_tmf_sp_done(srb_t *sp, int res) { struct srb_iocb *tmf = &sp->u.iocb_cmd; @@ -2027,7 +2101,6 @@ __qla2x00_async_tm_cmd(struct tmf_arg *arg) struct scsi_qla_host *vha = arg->vha; struct srb_iocb *tm_iocb; srb_t *sp; - unsigned long flags; int rval = QLA_FUNCTION_FAILED; fc_port_t *fcport = arg->fcport; @@ -2049,11 +2122,12 @@ __qla2x00_async_tm_cmd(struct tmf_arg *arg) tm_iocb->u.tmf.flags = arg->flags; tm_iocb->u.tmf.lun = arg->lun; - rval = qla2x00_start_sp(sp); + START_SP_W_RETRIES(sp, rval); + ql_dbg(ql_dbg_taskm, vha, 0x802f, - "Async-tmf hdl=%x loop-id=%x portid=%02x%02x%02x ctrl=%x.\n", - sp->handle, fcport->loop_id, fcport->d_id.b.domain, - fcport->d_id.b.area, fcport->d_id.b.al_pa, arg->flags); + "Async-tmf hdl=%x loop-id=%x portid=%06x ctrl=%x lun=%lld qp=%d rval=%x.\n", + sp->handle, fcport->loop_id, fcport->d_id.b24, + arg->flags, arg->lun, sp->qpair->id, rval); if (rval != QLA_SUCCESS) goto done_free_sp; @@ -2066,17 +2140,8 @@ __qla2x00_async_tm_cmd(struct tmf_arg *arg) "TM IOCB failed (%x).\n", rval); } - if (!test_bit(UNLOADING, &vha->dpc_flags) && !IS_QLAFX00(vha->hw)) { - flags = tm_iocb->u.tmf.flags; - if (flags & (TCF_LUN_RESET|TCF_ABORT_TASK_SET| - TCF_CLEAR_TASK_SET|TCF_CLEAR_ACA)) - flags = MK_SYNC_ID_LUN; - else - flags = MK_SYNC_ID; - - qla2x00_marker(vha, sp->qpair, - sp->fcport->loop_id, arg->lun, flags); - } + if (!test_bit(UNLOADING, &vha->dpc_flags) && !IS_QLAFX00(vha->hw)) + rval = qla26xx_marker(arg); done_free_sp: /* ref: INIT */ @@ -2100,6 +2165,11 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun, a.fcport = fcport; a.lun = lun; + if (flags & (TCF_LUN_RESET|TCF_ABORT_TASK_SET|TCF_CLEAR_TASK_SET|TCF_CLEAR_ACA)) + a.modifier = MK_SYNC_ID_LUN; + else + a.modifier = MK_SYNC_ID; + if (vha->hw->mqenable) { for (i = 0; i < vha->hw->num_qpairs; i++) { qpair = vha->hw->queue_pair_map[i]; diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index 6ae337cc5a4c..f6a9fdb3151d 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -522,21 +522,25 @@ __qla2x00_marker(struct scsi_qla_host *vha, struct qla_qpair *qpair, return (QLA_FUNCTION_FAILED); } + mrk24 = (struct mrk_entry_24xx *)mrk; + mrk->entry_type = MARKER_TYPE; mrk->modifier = type; if (type != MK_SYNC_ALL) { if (IS_FWI2_CAPABLE(ha)) { - mrk24 = (struct mrk_entry_24xx *) mrk; mrk24->nport_handle = cpu_to_le16(loop_id); int_to_scsilun(lun, (struct scsi_lun *)&mrk24->lun); host_to_fcp_swap(mrk24->lun, sizeof(mrk24->lun)); mrk24->vp_index = vha->vp_idx; - mrk24->handle = make_handle(req->id, mrk24->handle); } else { SET_TARGET_ID(ha, mrk->target, loop_id); mrk->lun = cpu_to_le16((uint16_t)lun); } } + + if (IS_FWI2_CAPABLE(ha)) + mrk24->handle = QLA_SKIP_HANDLE; + wmb(); qla2x00_start_iocbs(vha, req); @@ -3859,9 +3863,9 @@ int qla_get_iocbs_resource(struct srb *sp) case SRB_NACK_LOGO: case SRB_LOGOUT_CMD: case SRB_CTRL_VP: - push_it_through = true; - fallthrough; + case SRB_MARKER: default: + push_it_through = true; get_exch = false; } @@ -3877,6 +3881,19 @@ int qla_get_iocbs_resource(struct srb *sp) return qla_get_fw_resources(sp->qpair, &sp->iores); } +static void +qla_marker_iocb(srb_t *sp, struct mrk_entry_24xx *mrk) +{ + mrk->entry_type = MARKER_TYPE; + mrk->modifier = sp->u.iocb_cmd.u.tmf.modifier; + if (sp->u.iocb_cmd.u.tmf.modifier != MK_SYNC_ALL) { + mrk->nport_handle = cpu_to_le16(sp->u.iocb_cmd.u.tmf.loop_id); + int_to_scsilun(sp->u.iocb_cmd.u.tmf.lun, (struct scsi_lun *)&mrk->lun); + host_to_fcp_swap(mrk->lun, sizeof(mrk->lun)); + mrk->vp_index = sp->u.iocb_cmd.u.tmf.vp_index; + } +} + int qla2x00_start_sp(srb_t *sp) { @@ -3980,6 +3997,9 @@ qla2x00_start_sp(srb_t *sp) case SRB_SA_REPLACE: qla24xx_sa_replace_iocb(sp, pkt); break; + case SRB_MARKER: + qla_marker_iocb(sp, pkt); + break; default: break; } diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 86928a762a7a..23b8c020e6ac 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -3750,6 +3750,28 @@ static int qla_chk_cont_iocb_avail(struct scsi_qla_host *vha, return rc; } +static void qla_marker_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, + struct mrk_entry_24xx *pkt) +{ + const char func[] = "MRK-IOCB"; + srb_t *sp; + int res = QLA_SUCCESS; + + if (!IS_FWI2_CAPABLE(vha->hw)) + return; + + sp = qla2x00_get_sp_from_handle(vha, func, req, pkt); + if (!sp) + return; + + if (pkt->entry_status) { + ql_dbg(ql_dbg_taskm, vha, 0x8025, "marker failure.\n"); + res = QLA_COMMAND_ERROR; + } + sp->u.iocb_cmd.u.tmf.data = res; + sp->done(sp, res); +} + /** * qla24xx_process_response_queue() - Process response queue entries. * @vha: SCSI driver HA context @@ -3864,9 +3886,7 @@ process_err: (struct nack_to_isp *)pkt); break; case MARKER_TYPE: - /* Do nothing in this case, this check is to prevent it - * from falling into default case - */ + qla_marker_iocb_entry(vha, rsp->req, (struct mrk_entry_24xx *)pkt); break; case ABORT_IOCB_TYPE: qla24xx_abort_iocb_entry(vha, rsp->req, From 35985b0741c13eff2144ba4764906f472bf6cfe8 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 28 Apr 2023 00:53:35 -0700 Subject: [PATCH 204/224] scsi: qla2xxx: Fix task management cmd fail due to unavailable resource commit 6a87679626b51b53fbb6be417ad8eb083030b617 upstream. Task management command failed with status 2Ch which is a result of too many task management commands sent to the same target. Hence limit task management commands to 8 per target. Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202304271952.NKNmoFzv-lkp@intel.com/ Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230428075339.32551-4-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_def.h | 3 ++ drivers/scsi/qla2xxx/qla_init.c | 63 ++++++++++++++++++++++++++++++--- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 62ee5d35115a..888bd861a223 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2535,6 +2535,7 @@ enum rscn_addr_format { typedef struct fc_port { struct list_head list; struct scsi_qla_host *vha; + struct list_head tmf_pending; unsigned int conf_compl_supported:1; unsigned int deleted:2; @@ -2555,6 +2556,8 @@ typedef struct fc_port { unsigned int do_prli_nvme:1; uint8_t nvme_flag; + uint8_t active_tmf; +#define MAX_ACTIVE_TMF 8 uint8_t node_name[WWN_SIZE]; uint8_t port_name[WWN_SIZE]; diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index ca84190924f1..4987becaa87d 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -2150,6 +2150,54 @@ done: return rval; } +static void qla_put_tmf(fc_port_t *fcport) +{ + struct scsi_qla_host *vha = fcport->vha; + struct qla_hw_data *ha = vha->hw; + unsigned long flags; + + spin_lock_irqsave(&ha->tgt.sess_lock, flags); + fcport->active_tmf--; + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); +} + +static +int qla_get_tmf(fc_port_t *fcport) +{ + struct scsi_qla_host *vha = fcport->vha; + struct qla_hw_data *ha = vha->hw; + unsigned long flags; + int rc = 0; + LIST_HEAD(tmf_elem); + + spin_lock_irqsave(&ha->tgt.sess_lock, flags); + list_add_tail(&tmf_elem, &fcport->tmf_pending); + + while (fcport->active_tmf >= MAX_ACTIVE_TMF) { + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + + msleep(1); + + spin_lock_irqsave(&ha->tgt.sess_lock, flags); + if (fcport->deleted) { + rc = EIO; + break; + } + if (fcport->active_tmf < MAX_ACTIVE_TMF && + list_is_first(&tmf_elem, &fcport->tmf_pending)) + break; + } + + list_del(&tmf_elem); + + if (!rc) + fcport->active_tmf++; + + spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + + return rc; +} + int qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun, uint32_t tag) @@ -2157,18 +2205,19 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun, struct scsi_qla_host *vha = fcport->vha; struct qla_qpair *qpair; struct tmf_arg a; - struct completion comp; int i, rval; - init_completion(&comp); a.vha = fcport->vha; a.fcport = fcport; a.lun = lun; - - if (flags & (TCF_LUN_RESET|TCF_ABORT_TASK_SET|TCF_CLEAR_TASK_SET|TCF_CLEAR_ACA)) + if (flags & (TCF_LUN_RESET|TCF_ABORT_TASK_SET|TCF_CLEAR_TASK_SET|TCF_CLEAR_ACA)) { a.modifier = MK_SYNC_ID_LUN; - else + + if (qla_get_tmf(fcport)) + return QLA_FUNCTION_FAILED; + } else { a.modifier = MK_SYNC_ID; + } if (vha->hw->mqenable) { for (i = 0; i < vha->hw->num_qpairs; i++) { @@ -2187,6 +2236,9 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun, a.flags = flags; rval = __qla2x00_async_tm_cmd(&a); + if (a.modifier == MK_SYNC_ID_LUN) + qla_put_tmf(fcport); + return rval; } @@ -5422,6 +5474,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) INIT_WORK(&fcport->reg_work, qla_register_fcport_fn); INIT_LIST_HEAD(&fcport->gnl_entry); INIT_LIST_HEAD(&fcport->list); + INIT_LIST_HEAD(&fcport->tmf_pending); INIT_LIST_HEAD(&fcport->sess_cmd_list); spin_lock_init(&fcport->sess_cmd_lock); From 1802e5d0988ad4f1aec361041b94d8d95e7523e5 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 28 Apr 2023 00:53:36 -0700 Subject: [PATCH 205/224] scsi: qla2xxx: Fix hang in task management commit 9ae615c5bfd37bd091772969b1153de5335ea986 upstream. Task management command hangs where a side band chip reset failed to nudge the TMF from it's current send path. Add additional error check to block TMF from entering during chip reset and along the TMF path to cause it to bail out, skip over abort of marker. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230428075339.32551-5-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_def.h | 4 +++ drivers/scsi/qla2xxx/qla_init.c | 60 +++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 888bd861a223..63cecd25fe43 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -5499,4 +5499,8 @@ struct ql_vnd_tgt_stats_resp { _fp->disc_state, _fp->scan_state, _fp->loop_id, _fp->deleted, \ _fp->flags +#define TMF_NOT_READY(_fcport) \ + (!_fcport || IS_SESSION_DELETED(_fcport) || atomic_read(&_fcport->state) != FCS_ONLINE || \ + !_fcport->vha->hw->flags.fw_started) + #endif diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 4987becaa87d..30bbf33e3a6a 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1997,6 +1997,11 @@ qla2x00_tmf_iocb_timeout(void *data) int rc, h; unsigned long flags; + if (sp->type == SRB_MARKER) { + complete(&tmf->u.tmf.comp); + return; + } + rc = qla24xx_async_abort_cmd(sp, false); if (rc) { spin_lock_irqsave(sp->qpair->qp_lock_ptr, flags); @@ -2024,6 +2029,7 @@ static void qla_marker_sp_done(srb_t *sp, int res) sp->handle, sp->fcport->d_id.b24, sp->u.iocb_cmd.u.tmf.flags, sp->u.iocb_cmd.u.tmf.lun, sp->qpair->id); + sp->u.iocb_cmd.u.tmf.data = res; complete(&tmf->u.tmf.comp); } @@ -2040,6 +2046,11 @@ static void qla_marker_sp_done(srb_t *sp, int res) } while (cnt); \ } +/** + * qla26xx_marker: send marker IOCB and wait for the completion of it. + * @arg: pointer to argument list. + * It is assume caller will provide an fcport pointer and modifier + */ static int qla26xx_marker(struct tmf_arg *arg) { @@ -2049,6 +2060,14 @@ qla26xx_marker(struct tmf_arg *arg) int rval = QLA_FUNCTION_FAILED; fc_port_t *fcport = arg->fcport; + if (TMF_NOT_READY(arg->fcport)) { + ql_dbg(ql_dbg_taskm, vha, 0x8039, + "FC port not ready for marker loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d.\n", + fcport->loop_id, fcport->d_id.b24, + arg->modifier, arg->lun, arg->qpair->id); + return QLA_SUSPENDED; + } + /* ref: INIT */ sp = qla2xxx_get_qpair_sp(vha, arg->qpair, fcport, GFP_KERNEL); if (!sp) @@ -2075,11 +2094,19 @@ qla26xx_marker(struct tmf_arg *arg) if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x8031, - "Marker IOCB failed (%x).\n", rval); + "Marker IOCB send failure (%x).\n", rval); goto done_free_sp; } wait_for_completion(&tm_iocb->u.tmf.comp); + rval = tm_iocb->u.tmf.data; + + if (rval != QLA_SUCCESS) { + ql_log(ql_log_warn, vha, 0x8019, + "Marker failed hdl=%x loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d rval %d.\n", + sp->handle, fcport->loop_id, fcport->d_id.b24, + arg->modifier, arg->lun, sp->qpair->id, rval); + } done_free_sp: /* ref: INIT */ @@ -2092,6 +2119,8 @@ static void qla2x00_tmf_sp_done(srb_t *sp, int res) { struct srb_iocb *tmf = &sp->u.iocb_cmd; + if (res) + tmf->u.tmf.data = res; complete(&tmf->u.tmf.comp); } @@ -2105,6 +2134,14 @@ __qla2x00_async_tm_cmd(struct tmf_arg *arg) fc_port_t *fcport = arg->fcport; + if (TMF_NOT_READY(arg->fcport)) { + ql_dbg(ql_dbg_taskm, vha, 0x8032, + "FC port not ready for TM command loop-id=%x portid=%06x modifier=%x lun=%lld qp=%d.\n", + fcport->loop_id, fcport->d_id.b24, + arg->modifier, arg->lun, arg->qpair->id); + return QLA_SUSPENDED; + } + /* ref: INIT */ sp = qla2xxx_get_qpair_sp(vha, arg->qpair, fcport, GFP_KERNEL); if (!sp) @@ -2179,7 +2216,9 @@ int qla_get_tmf(fc_port_t *fcport) msleep(1); spin_lock_irqsave(&ha->tgt.sess_lock, flags); - if (fcport->deleted) { + if (TMF_NOT_READY(fcport)) { + ql_log(ql_log_warn, vha, 0x802c, + "Unable to acquire TM resource due to disruption.\n"); rc = EIO; break; } @@ -2205,7 +2244,10 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun, struct scsi_qla_host *vha = fcport->vha; struct qla_qpair *qpair; struct tmf_arg a; - int i, rval; + int i, rval = QLA_SUCCESS; + + if (TMF_NOT_READY(fcport)) + return QLA_SUSPENDED; a.vha = fcport->vha; a.fcport = fcport; @@ -2224,6 +2266,14 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun, qpair = vha->hw->queue_pair_map[i]; if (!qpair) continue; + + if (TMF_NOT_READY(fcport)) { + ql_log(ql_log_warn, vha, 0x8026, + "Unable to send TM due to disruption.\n"); + rval = QLA_SUSPENDED; + break; + } + a.qpair = qpair; a.flags = flags|TCF_NOTMCMD_TO_TARGET; rval = __qla2x00_async_tm_cmd(&a); @@ -2232,10 +2282,14 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint64_t lun, } } + if (rval) + goto bailout; + a.qpair = vha->hw->base_qpair; a.flags = flags; rval = __qla2x00_async_tm_cmd(&a); +bailout: if (a.modifier == MK_SYNC_ID_LUN) qla_put_tmf(fcport); From 90770dad1eb30967ebd8d37d82830bcf270b3293 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 28 Apr 2023 00:53:38 -0700 Subject: [PATCH 206/224] scsi: qla2xxx: Wait for io return on terminate rport commit fc0cba0c7be8261a1625098bd1d695077ec621c9 upstream. System crash due to use after free. Current code allows terminate_rport_io to exit before making sure all IOs has returned. For FCP-2 device, IO's can hang on in HW because driver has not tear down the session in FW at first sign of cable pull. When dev_loss_tmo timer pops, terminate_rport_io is called and upper layer is about to free various resources. Terminate_rport_io trigger qla to do the final cleanup, but the cleanup might not be fast enough where it leave qla still holding on to the same resource. Wait for IO's to return to upper layer before resources are freed. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230428075339.32551-7-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_attr.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index b67ad30d56e6..64734d6e8ccb 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2750,6 +2750,7 @@ static void qla2x00_terminate_rport_io(struct fc_rport *rport) { fc_port_t *fcport = *(fc_port_t **)rport->dd_data; + scsi_qla_host_t *vha; if (!fcport) return; @@ -2759,9 +2760,12 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags)) return; + vha = fcport->vha; if (unlikely(pci_channel_offline(fcport->vha->hw->pdev))) { qla2x00_abort_all_cmds(fcport->vha, DID_NO_CONNECT << 16); + qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, + 0, WAIT_TARGET); return; } /* @@ -2786,6 +2790,15 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) qla2x00_port_logout(fcport->vha, fcport); } } + + /* check for any straggling io left behind */ + if (qla2x00_eh_wait_for_pending_commands(fcport->vha, fcport->d_id.b24, 0, WAIT_TARGET)) { + ql_log(ql_log_warn, vha, 0x300b, + "IO not return. Resetting. \n"); + set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + qla2x00_wait_for_chip_reset(vha); + } } static int From d994ac7c7842e416c36eea3271206b9f830b93ce Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 28 Apr 2023 00:53:37 -0700 Subject: [PATCH 207/224] scsi: qla2xxx: Fix mem access after free commit b843adde8d490934d042fbe9e3e46697cb3a64d2 upstream. System crash, where driver is accessing scsi layer's memory (scsi_cmnd->device->host) to search for a well known internal pointer (vha). The scsi_cmnd was released back to upper layer which could be freed, but the driver is still accessing it. 7 [ffffa8e8d2c3f8d0] page_fault at ffffffff86c010fe [exception RIP: __qla2x00_eh_wait_for_pending_commands+240] RIP: ffffffffc0642350 RSP: ffffa8e8d2c3f988 RFLAGS: 00010286 RAX: 0000000000000165 RBX: 0000000000000002 RCX: 00000000000036d8 RDX: 0000000000000000 RSI: ffff9c5c56535188 RDI: 0000000000000286 RBP: ffff9c5bf7aa4a58 R8: ffff9c589aecdb70 R9: 00000000000003d1 R10: 0000000000000001 R11: 0000000000380000 R12: ffff9c5c5392bc78 R13: ffff9c57044ff5c0 R14: ffff9c56b5a3aa00 R15: 00000000000006db ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 8 [ffffa8e8d2c3f9c8] qla2x00_eh_wait_for_pending_commands at ffffffffc0646dd5 [qla2xxx] 9 [ffffa8e8d2c3fa00] __qla2x00_async_tm_cmd at ffffffffc0658094 [qla2xxx] Remove access of freed memory. Currently the driver was checking to see if scsi_done was called by seeing if the sp->type has changed. Instead, check to see if the command has left the oustanding_cmds[] array as sign of scsi_done was called. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230428075339.32551-6-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_isr.c | 38 ++++++++-- drivers/scsi/qla2xxx/qla_os.c | 130 ++++++++++++++++----------------- 2 files changed, 95 insertions(+), 73 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 23b8c020e6ac..b41d604ca9bc 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1862,9 +1862,9 @@ qla2x00_process_completed_request(struct scsi_qla_host *vha, } } -srb_t * -qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func, - struct req_que *req, void *iocb) +static srb_t * +qla_get_sp_from_handle(scsi_qla_host_t *vha, const char *func, + struct req_que *req, void *iocb, u16 *ret_index) { struct qla_hw_data *ha = vha->hw; sts_entry_t *pkt = iocb; @@ -1899,12 +1899,25 @@ qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func, return NULL; } - req->outstanding_cmds[index] = NULL; - + *ret_index = index; qla_put_fw_resources(sp->qpair, &sp->iores); return sp; } +srb_t * +qla2x00_get_sp_from_handle(scsi_qla_host_t *vha, const char *func, + struct req_que *req, void *iocb) +{ + uint16_t index; + srb_t *sp; + + sp = qla_get_sp_from_handle(vha, func, req, iocb, &index); + if (sp) + req->outstanding_cmds[index] = NULL; + + return sp; +} + static void qla2x00_mbx_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, struct mbx_entry *mbx) @@ -3237,13 +3250,13 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) return; } - req->outstanding_cmds[handle] = NULL; cp = GET_CMD_SP(sp); if (cp == NULL) { ql_dbg(ql_dbg_io, vha, 0x3018, "Command already returned (0x%x/%p).\n", sts->handle, sp); + req->outstanding_cmds[handle] = NULL; return; } @@ -3514,6 +3527,9 @@ out: if (rsp->status_srb == NULL) sp->done(sp, res); + + /* for io's, clearing of outstanding_cmds[handle] means scsi_done was called */ + req->outstanding_cmds[handle] = NULL; } /** @@ -3590,6 +3606,7 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt) uint16_t que = MSW(pkt->handle); struct req_que *req = NULL; int res = DID_ERROR << 16; + u16 index; ql_dbg(ql_dbg_async, vha, 0x502a, "iocb type %xh with error status %xh, handle %xh, rspq id %d\n", @@ -3608,7 +3625,6 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt) switch (pkt->entry_type) { case NOTIFY_ACK_TYPE: - case STATUS_TYPE: case STATUS_CONT_TYPE: case LOGINOUT_PORT_IOCB_TYPE: case CT_IOCB_TYPE: @@ -3628,6 +3644,14 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt) case CTIO_TYPE7: case CTIO_CRC2: return 1; + case STATUS_TYPE: + sp = qla_get_sp_from_handle(vha, func, req, pkt, &index); + if (sp) { + sp->done(sp, res); + req->outstanding_cmds[index] = NULL; + return 0; + } + break; } fatal: ql_log(ql_log_warn, vha, 0x5030, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 08dc825fbf4f..fc307473c94c 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1068,43 +1068,6 @@ qc24_fail_command: return 0; } -/* - * qla2x00_eh_wait_on_command - * Waits for the command to be returned by the Firmware for some - * max time. - * - * Input: - * cmd = Scsi Command to wait on. - * - * Return: - * Completed in time : QLA_SUCCESS - * Did not complete in time : QLA_FUNCTION_FAILED - */ -static int -qla2x00_eh_wait_on_command(struct scsi_cmnd *cmd) -{ -#define ABORT_POLLING_PERIOD 1000 -#define ABORT_WAIT_ITER ((2 * 1000) / (ABORT_POLLING_PERIOD)) - unsigned long wait_iter = ABORT_WAIT_ITER; - scsi_qla_host_t *vha = shost_priv(cmd->device->host); - struct qla_hw_data *ha = vha->hw; - srb_t *sp = scsi_cmd_priv(cmd); - int ret = QLA_SUCCESS; - - if (unlikely(pci_channel_offline(ha->pdev)) || ha->flags.eeh_busy) { - ql_dbg(ql_dbg_taskm, vha, 0x8005, - "Return:eh_wait.\n"); - return ret; - } - - while (sp->type && wait_iter--) - msleep(ABORT_POLLING_PERIOD); - if (sp->type) - ret = QLA_FUNCTION_FAILED; - - return ret; -} - /* * qla2x00_wait_for_hba_online * Wait till the HBA is online after going through @@ -1355,6 +1318,9 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) return ret; } +#define ABORT_POLLING_PERIOD 1000 +#define ABORT_WAIT_ITER ((2 * 1000) / (ABORT_POLLING_PERIOD)) + /* * Returns: QLA_SUCCESS or QLA_FUNCTION_FAILED. */ @@ -1368,41 +1334,73 @@ __qla2x00_eh_wait_for_pending_commands(struct qla_qpair *qpair, unsigned int t, struct req_que *req = qpair->req; srb_t *sp; struct scsi_cmnd *cmd; + unsigned long wait_iter = ABORT_WAIT_ITER; + bool found; + struct qla_hw_data *ha = vha->hw; status = QLA_SUCCESS; - spin_lock_irqsave(qpair->qp_lock_ptr, flags); - for (cnt = 1; status == QLA_SUCCESS && - cnt < req->num_outstanding_cmds; cnt++) { - sp = req->outstanding_cmds[cnt]; - if (!sp) - continue; - if (sp->type != SRB_SCSI_CMD) - continue; - if (vha->vp_idx != sp->vha->vp_idx) - continue; - match = 0; - cmd = GET_CMD_SP(sp); - switch (type) { - case WAIT_HOST: - match = 1; - break; - case WAIT_TARGET: - match = cmd->device->id == t; - break; - case WAIT_LUN: - match = (cmd->device->id == t && - cmd->device->lun == l); - break; - } - if (!match) - continue; + while (wait_iter--) { + found = false; - spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - status = qla2x00_eh_wait_on_command(cmd); spin_lock_irqsave(qpair->qp_lock_ptr, flags); + for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) { + sp = req->outstanding_cmds[cnt]; + if (!sp) + continue; + if (sp->type != SRB_SCSI_CMD) + continue; + if (vha->vp_idx != sp->vha->vp_idx) + continue; + match = 0; + cmd = GET_CMD_SP(sp); + switch (type) { + case WAIT_HOST: + match = 1; + break; + case WAIT_TARGET: + if (sp->fcport) + match = sp->fcport->d_id.b24 == t; + else + match = 0; + break; + case WAIT_LUN: + if (sp->fcport) + match = (sp->fcport->d_id.b24 == t && + cmd->device->lun == l); + else + match = 0; + break; + } + if (!match) + continue; + + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + + if (unlikely(pci_channel_offline(ha->pdev)) || + ha->flags.eeh_busy) { + ql_dbg(ql_dbg_taskm, vha, 0x8005, + "Return:eh_wait.\n"); + return status; + } + + /* + * SRB_SCSI_CMD is still in the outstanding_cmds array. + * it means scsi_done has not called. Wait for it to + * clear from outstanding_cmds. + */ + msleep(ABORT_POLLING_PERIOD); + spin_lock_irqsave(qpair->qp_lock_ptr, flags); + found = true; + } + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + + if (!found) + break; } - spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); + + if (!wait_iter && found) + status = QLA_FUNCTION_FAILED; return status; } From 2b3bdef089b920b4a19fefb4f4e6dda56a4bb583 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Wed, 7 Jun 2023 17:08:36 +0530 Subject: [PATCH 208/224] scsi: qla2xxx: Array index may go out of bound commit d721b591b95cf3f290f8a7cbe90aa2ee0368388d upstream. Klocwork reports array 'vha->host_str' of size 16 may use index value(s) 16..19. Use snprintf() instead of sprintf(). Cc: stable@vger.kernel.org Co-developed-by: Bikash Hazarika Signed-off-by: Bikash Hazarika Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-2-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_os.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index fc307473c94c..72047f7dbb18 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -5074,7 +5074,8 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, } INIT_DELAYED_WORK(&vha->scan.scan_work, qla_scan_work_fn); - sprintf(vha->host_str, "%s_%lu", QLA2XXX_DRIVER_NAME, vha->host_no); + snprintf(vha->host_str, sizeof(vha->host_str), "%s_%lu", + QLA2XXX_DRIVER_NAME, vha->host_no); ql_dbg(ql_dbg_init, vha, 0x0041, "Allocated the host=%p hw=%p vha=%p dev_name=%s", vha->host, vha->hw, vha, From 477bc74ad1add644b606bff6ba1284943c42818a Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Wed, 7 Jun 2023 17:08:38 +0530 Subject: [PATCH 209/224] scsi: qla2xxx: Avoid fcport pointer dereference commit 6b504d06976fe4a61cc05dedc68b84fadb397f77 upstream. Klocwork reported warning of NULL pointer may be dereferenced. The routine exits when sa_ctl is NULL and fcport is allocated after the exit call thus causing NULL fcport pointer to dereference at the time of exit. To avoid fcport pointer dereference, exit the routine when sa_ctl is NULL. Cc: stable@vger.kernel.org Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-4-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_edif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c index 1cafd27d5a60..7aee4d093969 100644 --- a/drivers/scsi/qla2xxx/qla_edif.c +++ b/drivers/scsi/qla2xxx/qla_edif.c @@ -2319,8 +2319,8 @@ qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e) if (!sa_ctl) { ql_dbg(ql_dbg_edif, vha, 0x70e6, "sa_ctl allocation failed\n"); - rval = -ENOMEM; - goto done; + rval = -ENOMEM; + return rval; } fcport = sa_ctl->fcport; From 2dddbf8de128289a3fb7ae38d9bc4b2217205ec1 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Wed, 7 Jun 2023 17:08:40 +0530 Subject: [PATCH 210/224] scsi: qla2xxx: Fix buffer overrun commit b68710a8094fdffe8dd4f7a82c82649f479bb453 upstream. Klocwork warning: Buffer Overflow - Array Index Out of Bounds Driver uses fc_els_flogi to calculate size of buffer. The actual buffer is nested inside of fc_els_flogi which is smaller. Replace structure name to allow proper size calculation. Cc: stable@vger.kernel.org Signed-off-by: Quinn Tran Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-6-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 30bbf33e3a6a..b597c782b95e 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5571,7 +5571,7 @@ static void qla_get_login_template(scsi_qla_host_t *vha) __be32 *q; memset(ha->init_cb, 0, ha->init_cb_size); - sz = min_t(int, sizeof(struct fc_els_flogi), ha->init_cb_size); + sz = min_t(int, sizeof(struct fc_els_csp), ha->init_cb_size); rval = qla24xx_get_port_login_templ(vha, ha->init_cb_dma, ha->init_cb, sz); if (rval != QLA_SUCCESS) { From ce2cdbe530b0066bae1f98dbab590a232d507eaa Mon Sep 17 00:00:00 2001 From: Bikash Hazarika Date: Wed, 7 Jun 2023 17:08:37 +0530 Subject: [PATCH 211/224] scsi: qla2xxx: Fix potential NULL pointer dereference commit 464ea494a40c6e3e0e8f91dd325408aaf21515ba upstream. Klocwork tool reported 'cur_dsd' may be dereferenced. Add fix to validate pointer before dereferencing the pointer. Cc: stable@vger.kernel.org Signed-off-by: Bikash Hazarika Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-3-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_iocb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index f6a9fdb3151d..c9a686f06d29 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -607,7 +607,8 @@ qla24xx_build_scsi_type_6_iocbs(srb_t *sp, struct cmd_type_6 *cmd_pkt, put_unaligned_le32(COMMAND_TYPE_6, &cmd_pkt->entry_type); /* No data transfer */ - if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) { + if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE || + tot_dsds == 0) { cmd_pkt->byte_count = cpu_to_le32(0); return 0; } From e466930717ef18c112585a39fc6174d8eb441df5 Mon Sep 17 00:00:00 2001 From: Nilesh Javali Date: Wed, 7 Jun 2023 17:08:39 +0530 Subject: [PATCH 212/224] scsi: qla2xxx: Check valid rport returned by fc_bsg_to_rport() commit af73f23a27206ffb3c477cac75b5fcf03410556e upstream. Klocwork reported warning of rport maybe NULL and will be dereferenced. rport returned by call to fc_bsg_to_rport() could be NULL and dereferenced. Check valid rport returned by fc_bsg_to_rport(). Cc: stable@vger.kernel.org Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-5-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_bsg.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index dba7bba788d7..c928b27061a9 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -283,6 +283,10 @@ qla2x00_process_els(struct bsg_job *bsg_job) if (bsg_request->msgcode == FC_BSG_RPT_ELS) { rport = fc_bsg_to_rport(bsg_job); + if (!rport) { + rval = -ENOMEM; + goto done; + } fcport = *(fc_port_t **) rport->dd_data; host = rport_to_shost(rport); vha = shost_priv(host); From b88b1241fb1cb8174a474f7f5bbf030433722e04 Mon Sep 17 00:00:00 2001 From: Bikash Hazarika Date: Wed, 7 Jun 2023 17:08:42 +0530 Subject: [PATCH 213/224] scsi: qla2xxx: Correct the index of array commit b1b9d3825df4c757d653d0b1df66f084835db9c3 upstream. Klocwork reported array 'port_dstate_str' of size 10 may use index value(s) 10..15. Add a fix to correct the index of array. Cc: stable@vger.kernel.org Signed-off-by: Bikash Hazarika Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-8-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_inline.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h index b0ee307b5d4b..a034699e58ae 100644 --- a/drivers/scsi/qla2xxx/qla_inline.h +++ b/drivers/scsi/qla2xxx/qla_inline.h @@ -109,11 +109,13 @@ qla2x00_set_fcport_disc_state(fc_port_t *fcport, int state) { int old_val; uint8_t shiftbits, mask; + uint8_t port_dstate_str_sz; /* This will have to change when the max no. of states > 16 */ shiftbits = 4; mask = (1 << shiftbits) - 1; + port_dstate_str_sz = sizeof(port_dstate_str) / sizeof(char *); fcport->disc_state = state; while (1) { old_val = atomic_read(&fcport->shadow_disc_state); @@ -121,7 +123,8 @@ qla2x00_set_fcport_disc_state(fc_port_t *fcport, int state) old_val, (old_val << shiftbits) | state)) { ql_dbg(ql_dbg_disc, fcport->vha, 0x2134, "FCPort %8phC disc_state transition: %s to %s - portid=%06x.\n", - fcport->port_name, port_dstate_str[old_val & mask], + fcport->port_name, (old_val & mask) < port_dstate_str_sz ? + port_dstate_str[old_val & mask] : "Unknown", port_dstate_str[state], fcport->d_id.b24); return; } From b06d1b525364bbcf4929b4b35d81945b10dc9883 Mon Sep 17 00:00:00 2001 From: Shreyas Deodhar Date: Wed, 7 Jun 2023 17:08:41 +0530 Subject: [PATCH 214/224] scsi: qla2xxx: Pointer may be dereferenced commit 00eca15319d9ce8c31cdf22f32a3467775423df4 upstream. Klocwork tool reported pointer 'rport' returned from call to function fc_bsg_to_rport() may be NULL and will be dereferenced. Add a fix to validate rport before dereferencing. Cc: stable@vger.kernel.org Signed-off-by: Shreyas Deodhar Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230607113843.37185-7-njavali@marvell.com Reviewed-by: Himanshu Madhani Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_bsg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index c928b27061a9..19bb64bdd88b 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -2996,6 +2996,8 @@ qla24xx_bsg_request(struct bsg_job *bsg_job) if (bsg_request->msgcode == FC_BSG_RPT_ELS) { rport = fc_bsg_to_rport(bsg_job); + if (!rport) + return ret; host = rport_to_shost(rport); vha = shost_priv(host); } else { From f459d586fdf12c53116c9fddf43065165fdd5969 Mon Sep 17 00:00:00 2001 From: Manish Rangankar Date: Thu, 15 Jun 2023 13:16:33 +0530 Subject: [PATCH 215/224] scsi: qla2xxx: Remove unused nvme_ls_waitq wait queue commit 20fce500b232b970e40312a9c97e7f3b6d7a709c upstream. System crash when qla2x00_start_sp(sp) returns error code EGAIN and wake_up gets called for uninitialized wait queue sp->nvme_ls_waitq. qla2xxx [0000:37:00.1]-2121:5: Returning existing qpair of ffff8ae2c0513400 for idx=0 qla2xxx [0000:37:00.1]-700e:5: qla2x00_start_sp failed = 11 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI Hardware name: HPE ProLiant DL360 Gen10/ProLiant DL360 Gen10, BIOS U32 09/03/2021 Workqueue: nvme-wq nvme_fc_connect_ctrl_work [nvme_fc] RIP: 0010:__wake_up_common+0x4c/0x190 RSP: 0018:ffff95f3e0cb7cd0 EFLAGS: 00010086 RAX: 0000000000000000 RBX: ffff8b08d3b26328 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000003 RDI: ffff8b08d3b26320 RBP: 0000000000000001 R08: 0000000000000000 R09: ffffffffffffffe8 R10: 0000000000000000 R11: ffff95f3e0cb7a60 R12: ffff95f3e0cb7d20 R13: 0000000000000003 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff8b2fdf6c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000002f1e410002 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: __wake_up_common_lock+0x7c/0xc0 qla_nvme_ls_req+0x355/0x4c0 [qla2xxx] ? __nvme_fc_send_ls_req+0x260/0x380 [nvme_fc] ? nvme_fc_send_ls_req.constprop.42+0x1a/0x45 [nvme_fc] ? nvme_fc_connect_ctrl_work.cold.63+0x1e3/0xa7d [nvme_fc] Remove unused nvme_ls_waitq wait queue. nvme_ls_waitq logic was removed previously in the commits tagged Fixed: below. Fixes: 219d27d7147e ("scsi: qla2xxx: Fix race conditions in the code for aborting SCSI commands") Fixes: 5621b0dd7453 ("scsi: qla2xxx: Simpify unregistration of FC-NVMe local/remote ports") Cc: stable@vger.kernel.org Signed-off-by: Manish Rangankar Signed-off-by: Nilesh Javali Link: https://lore.kernel.org/r/20230615074633.12721-1-njavali@marvell.com Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_def.h | 1 - drivers/scsi/qla2xxx/qla_nvme.c | 3 --- 2 files changed, 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 63cecd25fe43..5a2f629d18e6 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -695,7 +695,6 @@ typedef struct srb { struct iocb_resource iores; struct kref cmd_kref; /* need to migrate ref_count over to this */ void *priv; - wait_queue_head_t nvme_ls_waitq; struct fc_port *fcport; struct scsi_qla_host *vha; unsigned int start_timer:1; diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index c57e02a35521..57545d5e82b9 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -360,7 +360,6 @@ static int qla_nvme_ls_req(struct nvme_fc_local_port *lport, if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x700e, "qla2x00_start_sp failed = %d\n", rval); - wake_up(&sp->nvme_ls_waitq); sp->priv = NULL; priv->sp = NULL; qla2x00_rel_sp(sp); @@ -648,7 +647,6 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, if (!sp) return -EBUSY; - init_waitqueue_head(&sp->nvme_ls_waitq); kref_init(&sp->cmd_kref); spin_lock_init(&priv->cmd_lock); sp->priv = priv; @@ -667,7 +665,6 @@ static int qla_nvme_post_cmd(struct nvme_fc_local_port *lport, if (rval != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x212d, "qla2x00_start_nvme_mq failed = %d\n", rval); - wake_up(&sp->nvme_ls_waitq); sp->priv = NULL; priv->sp = NULL; qla2xxx_rel_qpair_sp(sp->qpair, sp); From fec55ec03545ca772206ae30dbc59af853d5f6fc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 22 May 2023 14:09:17 +0300 Subject: [PATCH 216/224] scsi: qla2xxx: Fix end of loop test commit 339020091e246e708c1381acf74c5f8e3fe4d2b5 upstream. This loop will exit successfully when "found" is false or in the failure case it times out with "wait_iter" set to -1. The test for timeouts is impossible as is. Fixes: b843adde8d49 ("scsi: qla2xxx: Fix mem access after free") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/cea5a62f-b873-4347-8f8e-c67527ced8d2@kili.mountain Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 72047f7dbb18..ed70eb884786 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1399,7 +1399,7 @@ __qla2x00_eh_wait_for_pending_commands(struct qla_qpair *qpair, unsigned int t, break; } - if (!wait_iter && found) + if (wait_iter == -1) status = QLA_FUNCTION_FAILED; return status; From fbfb6b7cb2f73f1ee41dd3d413fc5cb9b7588664 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 6 Jul 2023 18:36:10 +0200 Subject: [PATCH 217/224] MIPS: kvm: Fix build error with KVM_MIPS_DEBUG_COP0_COUNTERS enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3a6dbb691782e88e07e5c70b327495dbd58a2e7f upstream. Commit e4de20576986 ("MIPS: KVM: Fix NULL pointer dereference") missed converting one place accessing cop0 registers, which results in a build error, if KVM_MIPS_DEBUG_COP0_COUNTERS is enabled. Fixes: e4de20576986 ("MIPS: KVM: Fix NULL pointer dereference") Signed-off-by: Thomas Bogendoerfer Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/stats.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c index 53f851a61554..3e6682018fbe 100644 --- a/arch/mips/kvm/stats.c +++ b/arch/mips/kvm/stats.c @@ -54,9 +54,9 @@ void kvm_mips_dump_stats(struct kvm_vcpu *vcpu) kvm_info("\nKVM VCPU[%d] COP0 Access Profile:\n", vcpu->vcpu_id); for (i = 0; i < N_MIPS_COPROC_REGS; i++) { for (j = 0; j < N_MIPS_COPROC_SEL; j++) { - if (vcpu->arch.cop0->stat[i][j]) + if (vcpu->arch.cop0.stat[i][j]) kvm_info("%s[%d]: %lu\n", kvm_cop0_str[i], j, - vcpu->arch.cop0->stat[i][j]); + vcpu->arch.cop0.stat[i][j]); } } #endif From d64b70df23e8162c31d64f2c780a60e529da41ba Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Fri, 23 Jun 2023 10:05:22 -0500 Subject: [PATCH 218/224] Revert "drm/amd: Disable PSR-SU on Parade 0803 TCON" commit 1e66a17ce546eabad753178bbd4175cb52bafca8 upstream. This reverts commit 072030b1783056b5de8b0fac5303a5e9dbc6cfde. This is no longer necessary when using newer DMUB F/W. Cc: stable@vger.kernel.org Cc: Sean Wang Cc: Marc Rossi Cc: Hamza Mahfooz Cc: Tsung-hua (Ryan) Lin Reviewed-by: Leo Li Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/modules/power/power_helpers.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 67287ad07226..9edd39322c82 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -816,8 +816,6 @@ bool is_psr_su_specific_panel(struct dc_link *link) ((dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x08) || (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x07))) isPSRSUSupported = false; - else if (dpcd_caps->sink_dev_id_str[1] == 0x08 && dpcd_caps->sink_dev_id_str[0] == 0x03) - isPSRSUSupported = false; else if (dpcd_caps->psr_info.force_psrsu_cap == 0x1) isPSRSUSupported = true; } From ff06cd411aa0b70ee8cc50acf39fa37087ca3722 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 21 Feb 2023 23:04:11 -0800 Subject: [PATCH 219/224] swiotlb: mark swiotlb_memblock_alloc() as __init commit 9b07d27d0fbb7f7441aa986859a0f53ec93a0335 upstream. swiotlb_memblock_alloc() calls memblock_alloc(), which calls (__init) memblock_alloc_try_nid(). However, swiotlb_membloc_alloc() can be marked as __init since it is only called by swiotlb_init_remap(), which is already marked as __init. This prevents a modpost build warning/error: WARNING: modpost: vmlinux.o: section mismatch in reference: swiotlb_memblock_alloc (section: .text) -> memblock_alloc_try_nid (section: .init.text) WARNING: modpost: vmlinux.o: section mismatch in reference: swiotlb_memblock_alloc (section: .text) -> memblock_alloc_try_nid (section: .init.text) This fixes the build warning/error seen on ARM64, PPC64, S390, i386, and x86_64. Fixes: 8d58aa484920 ("swiotlb: reduce the swiotlb buffer size on allocation failure") Signed-off-by: Randy Dunlap Cc: Alexey Kardashevskiy Cc: Christoph Hellwig Cc: iommu@lists.linux.dev Cc: Mike Rapoport Cc: linux-mm@kvack.org Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- kernel/dma/swiotlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 491d3c86c228..ad6333c3fe1f 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -324,7 +324,8 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, return; } -static void *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags, +static void __init *swiotlb_memblock_alloc(unsigned long nslabs, + unsigned int flags, int (*remap)(void *tlb, unsigned long nslabs)) { size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT); From d34a3470ed4090eb1e3440f024c596ace6060506 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Tue, 11 Jul 2023 18:01:00 -0300 Subject: [PATCH 220/224] net/sched: sch_qfq: reintroduce lmax bound check for MTU commit 158810b261d02fc7dd92ca9c392d8f8a211a2401 upstream. 25369891fcef deletes a check for the case where no 'lmax' is specified which 3037933448f6 previously fixed as 'lmax' could be set to the device's MTU without any bound checking for QFQ_LMAX_MIN and QFQ_LMAX_MAX. Therefore, reintroduce the check. Fixes: 25369891fcef ("net/sched: sch_qfq: refactor parsing of netlink parameters") Acked-by: Jamal Hadi Salim Reviewed-by: Eric Dumazet Signed-off-by: Pedro Tammela Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_qfq.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d5610e145da2..e150d08f182d 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -428,10 +428,17 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, else weight = 1; - if (tb[TCA_QFQ_LMAX]) + if (tb[TCA_QFQ_LMAX]) { lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); - else + } else { + /* MTU size is user controlled */ lmax = psched_mtu(qdisc_dev(sch)); + if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) { + NL_SET_ERR_MSG_MOD(extack, + "MTU size out of bounds for qfq"); + return -EINVAL; + } + } inv_w = ONE_FP / weight; weight = ONE_FP / inv_w; From e4a0e09b79bd2c0895c508cdc5e0265a083cc05d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 21 Jul 2023 15:58:38 +0200 Subject: [PATCH 221/224] drm/atomic: Fix potential use-after-free in nonblocking commits commit 4e076c73e4f6e90816b30fcd4a0d7ab365087255 upstream. This requires a bit of background. Properly done a modeset driver's unload/remove sequence should be drm_dev_unplug(); drm_atomic_helper_shutdown(); drm_dev_put(); The trouble is that the drm_dev_unplugged() checks are by design racy, they do not synchronize against all outstanding ioctl. This is because those ioctl could block forever (both for modeset and for driver specific ioctls), leading to deadlocks in hotunplug. Instead the code sections that touch the hardware need to be annotated with drm_dev_enter/exit, to avoid accessing hardware resources after the unload/remove has finished. To avoid use-after-free issues all the involved userspace visible objects are supposed to hold a reference on the underlying drm_device, like drm_file does. The issue now is that we missed one, the atomic modeset ioctl can be run in a nonblocking fashion, and in that case it cannot rely on the implied drm_device reference provided by the ioctl calling context. This can result in a use-after-free if an nonblocking atomic commit is carefully raced against a driver unload. Fix this by unconditionally grabbing a drm_device reference for any drm_atomic_state structures. Strictly speaking this isn't required for blocking commits and TEST_ONLY calls, but it's the simpler approach. Thanks to shanzhulig for the initial idea of grabbing an unconditional reference, I just added comments, a condensed commit message and fixed a minor potential issue in where exactly we drop the final reference. Reported-by: shanzhulig Suggested-by: shanzhulig Reviewed-by: Maxime Ripard Cc: Maarten Lankhorst Cc: Thomas Zimmermann Cc: David Airlie Cc: stable@kernel.org Signed-off-by: Daniel Vetter Signed-off-by: Daniel Vetter Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_atomic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index c0dc5858a723..7115898258af 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -140,6 +140,12 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state) if (!state->planes) goto fail; + /* + * Because drm_atomic_state can be committed asynchronously we need our + * own reference and cannot rely on the on implied by drm_file in the + * ioctl call. + */ + drm_dev_get(dev); state->dev = dev; drm_dbg_atomic(dev, "Allocated atomic state %p\n", state); @@ -299,7 +305,8 @@ EXPORT_SYMBOL(drm_atomic_state_clear); void __drm_atomic_state_free(struct kref *ref) { struct drm_atomic_state *state = container_of(ref, typeof(*state), ref); - struct drm_mode_config *config = &state->dev->mode_config; + struct drm_device *dev = state->dev; + struct drm_mode_config *config = &dev->mode_config; drm_atomic_state_clear(state); @@ -311,6 +318,8 @@ void __drm_atomic_state_free(struct kref *ref) drm_atomic_state_default_release(state); kfree(state); } + + drm_dev_put(dev); } EXPORT_SYMBOL(__drm_atomic_state_free); From e2c3356907a58753d88f82d1365dc0921249f89b Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 7 Jun 2023 18:17:41 +0300 Subject: [PATCH 222/224] net/ncsi: make one oem_gma function for all mfr id commit 74b449b98dccdf24288d562f9d207fa066da793d upstream. Make the one Get Mac Address function for all manufacturers and change this call in handlers accordingly. Reviewed-by: Simon Horman Signed-off-by: Ivan Mikhaylov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ncsi/ncsi-rsp.c | 88 ++++++++++----------------------------------- 1 file changed, 19 insertions(+), 69 deletions(-) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 6447a09932f5..91c42253a711 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -611,14 +611,15 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr) return 0; } -/* Response handler for Mellanox command Get Mac Address */ -static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr) +/* Response handler for Get Mac Address command */ +static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id) { struct ncsi_dev_priv *ndp = nr->ndp; struct net_device *ndev = ndp->ndev.dev; const struct net_device_ops *ops = ndev->netdev_ops; struct ncsi_rsp_oem_pkt *rsp; struct sockaddr saddr; + u32 mac_addr_off = 0; int ret = 0; /* Get the response header */ @@ -626,7 +627,19 @@ static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr) saddr.sa_family = ndev->type; ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - memcpy(saddr.sa_data, &rsp->data[MLX_MAC_ADDR_OFFSET], ETH_ALEN); + if (mfr_id == NCSI_OEM_MFR_BCM_ID) + mac_addr_off = BCM_MAC_ADDR_OFFSET; + else if (mfr_id == NCSI_OEM_MFR_MLX_ID) + mac_addr_off = MLX_MAC_ADDR_OFFSET; + else if (mfr_id == NCSI_OEM_MFR_INTEL_ID) + mac_addr_off = INTEL_MAC_ADDR_OFFSET; + + memcpy(saddr.sa_data, &rsp->data[mac_addr_off], ETH_ALEN); + if (mfr_id == NCSI_OEM_MFR_BCM_ID || mfr_id == NCSI_OEM_MFR_INTEL_ID) + eth_addr_inc((u8 *)saddr.sa_data); + if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) + return -ENXIO; + /* Set the flag for GMA command which should only be called once */ ndp->gma_flag = 1; @@ -649,41 +662,10 @@ static int ncsi_rsp_handler_oem_mlx(struct ncsi_request *nr) if (mlx->cmd == NCSI_OEM_MLX_CMD_GMA && mlx->param == NCSI_OEM_MLX_CMD_GMA_PARAM) - return ncsi_rsp_handler_oem_mlx_gma(nr); + return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_MLX_ID); return 0; } -/* Response handler for Broadcom command Get Mac Address */ -static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr) -{ - struct ncsi_dev_priv *ndp = nr->ndp; - struct net_device *ndev = ndp->ndev.dev; - const struct net_device_ops *ops = ndev->netdev_ops; - struct ncsi_rsp_oem_pkt *rsp; - struct sockaddr saddr; - int ret = 0; - - /* Get the response header */ - rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); - - saddr.sa_family = ndev->type; - ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN); - /* Increase mac address by 1 for BMC's address */ - eth_addr_inc((u8 *)saddr.sa_data); - if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) - return -ENXIO; - - /* Set the flag for GMA command which should only be called once */ - ndp->gma_flag = 1; - - ret = ops->ndo_set_mac_address(ndev, &saddr); - if (ret < 0) - netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); - - return ret; -} - /* Response handler for Broadcom card */ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr) { @@ -695,42 +677,10 @@ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr) bcm = (struct ncsi_rsp_oem_bcm_pkt *)(rsp->data); if (bcm->type == NCSI_OEM_BCM_CMD_GMA) - return ncsi_rsp_handler_oem_bcm_gma(nr); + return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_BCM_ID); return 0; } -/* Response handler for Intel command Get Mac Address */ -static int ncsi_rsp_handler_oem_intel_gma(struct ncsi_request *nr) -{ - struct ncsi_dev_priv *ndp = nr->ndp; - struct net_device *ndev = ndp->ndev.dev; - const struct net_device_ops *ops = ndev->netdev_ops; - struct ncsi_rsp_oem_pkt *rsp; - struct sockaddr saddr; - int ret = 0; - - /* Get the response header */ - rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); - - saddr.sa_family = ndev->type; - ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - memcpy(saddr.sa_data, &rsp->data[INTEL_MAC_ADDR_OFFSET], ETH_ALEN); - /* Increase mac address by 1 for BMC's address */ - eth_addr_inc((u8 *)saddr.sa_data); - if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) - return -ENXIO; - - /* Set the flag for GMA command which should only be called once */ - ndp->gma_flag = 1; - - ret = ops->ndo_set_mac_address(ndev, &saddr); - if (ret < 0) - netdev_warn(ndev, - "NCSI: 'Writing mac address to device failed\n"); - - return ret; -} - /* Response handler for Intel card */ static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr) { @@ -742,7 +692,7 @@ static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr) intel = (struct ncsi_rsp_oem_intel_pkt *)(rsp->data); if (intel->cmd == NCSI_OEM_INTEL_CMD_GMA) - return ncsi_rsp_handler_oem_intel_gma(nr); + return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_INTEL_ID); return 0; } From 9879d6e1ca871515148997fcb04c25e208ad3a42 Mon Sep 17 00:00:00 2001 From: Ivan Mikhaylov Date: Wed, 7 Jun 2023 18:17:42 +0300 Subject: [PATCH 223/224] net/ncsi: change from ndo_set_mac_address to dev_set_mac_address commit 790071347a0a1a89e618eedcd51c687ea783aeb3 upstream. Change ndo_set_mac_address to dev_set_mac_address because dev_set_mac_address provides a way to notify network layer about MAC change. In other case, services may not aware about MAC change and keep using old one which set from network adapter driver. As example, DHCP client from systemd do not update MAC address without notification from net subsystem which leads to the problem with acquiring the right address from DHCP server. Fixes: cb10c7c0dfd9e ("net/ncsi: Add NCSI Broadcom OEM command") Cc: stable@vger.kernel.org # v6.0+ 2f38e84 net/ncsi: make one oem_gma function for all mfr id Signed-off-by: Paul Fertser Signed-off-by: Ivan Mikhaylov Reviewed-by: Simon Horman Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ncsi/ncsi-rsp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 91c42253a711..069c2659074b 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -616,7 +616,6 @@ static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id) { struct ncsi_dev_priv *ndp = nr->ndp; struct net_device *ndev = ndp->ndev.dev; - const struct net_device_ops *ops = ndev->netdev_ops; struct ncsi_rsp_oem_pkt *rsp; struct sockaddr saddr; u32 mac_addr_off = 0; @@ -643,7 +642,9 @@ static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id) /* Set the flag for GMA command which should only be called once */ ndp->gma_flag = 1; - ret = ops->ndo_set_mac_address(ndev, &saddr); + rtnl_lock(); + ret = dev_set_mac_address(ndev, &saddr, NULL); + rtnl_unlock(); if (ret < 0) netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n"); From 75389113731bb629fa5e971baf58e422414c8d23 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 23 Jul 2023 13:49:51 +0200 Subject: [PATCH 224/224] Linux 6.1.40 Link: https://lore.kernel.org/r/20230721160520.865493356@linuxfoundation.org Tested-by: SeongJae Park Tested-by: Takeshi Ogasawara Tested-by: Florian Fainelli Tested-by: Bagas Sanjaya Tested-by: Linux Kernel Functional Testing Tested-by: Ron Economos Tested-by: Guenter Roeck Tested-by: Jon Hunter Tested-by: Conor Dooley Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f0619754c29a..2bb0c01391a9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 39 +SUBLEVEL = 40 EXTRAVERSION = NAME = Curry Ramen