From 47d9c7690e4f20419c044f0af5710d4558a39341 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 26 Mar 2024 11:18:41 +0100 Subject: [PATCH 1/8] UPSTREAM: inet: inet_defrag: prevent sk release while still in use ip_local_out() and other functions can pass skb->sk as function argument. If the skb is a fragment and reassembly happens before such function call returns, the sk must not be released. This affects skb fragments reassembled via netfilter or similar modules, e.g. openvswitch or ct_act.c, when run as part of tx pipeline. Eric Dumazet made an initial analysis of this bug. Quoting Eric: Calling ip_defrag() in output path is also implying skb_orphan(), which is buggy because output path relies on sk not disappearing. A relevant old patch about the issue was : 8282f27449bf ("inet: frag: Always orphan skbs inside ip_defrag()") [..] net/ipv4/ip_output.c depends on skb->sk being set, and probably to an inet socket, not an arbitrary one. If we orphan the packet in ipvlan, then downstream things like FQ packet scheduler will not work properly. We need to change ip_defrag() to only use skb_orphan() when really needed, ie whenever frag_list is going to be used. Eric suggested to stash sk in fragment queue and made an initial patch. However there is a problem with this: If skb is refragmented again right after, ip_do_fragment() will copy head->sk to the new fragments, and sets up destructor to sock_wfree. IOW, we have no choice but to fix up sk_wmem accouting to reflect the fully reassembled skb, else wmem will underflow. This change moves the orphan down into the core, to last possible moment. As ip_defrag_offset is aliased with sk_buff->sk member, we must move the offset into the FRAG_CB, else skb->sk gets clobbered. This allows to delay the orphaning long enough to learn if the skb has to be queued or if the skb is completing the reasm queue. In the former case, things work as before, skb is orphaned. This is safe because skb gets queued/stolen and won't continue past reasm engine. In the latter case, we will steal the skb->sk reference, reattach it to the head skb, and fix up wmem accouting when inet_frag inflates truesize. Fixes: 7026b1ddb6b8 ("netfilter: Pass socket pointer down through okfn().") Diagnosed-by: Eric Dumazet Reported-by: xingwei lee Reported-by: yue sun Reported-by: syzbot+e5167d7144a62715044c@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240326101845.30836-1-fw@strlen.de Signed-off-by: Paolo Abeni (cherry picked from commit 18685451fc4e546fc0e718580d32df3c0e5c8272) Bug: 335584858 Change-Id: I008a7b5fc4f51c9ad0ee14cf05ba21ca3ff5d6b3 Cc: Lee Jones Signed-off-by: Greg Kroah-Hartman --- include/linux/skbuff.h | 7 +-- net/ipv4/inet_fragment.c | 70 ++++++++++++++++++++----- net/ipv4/ip_fragment.c | 2 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 4 files changed, 60 insertions(+), 21 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 34653a282af3..a9e8875e3394 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -749,8 +749,6 @@ typedef unsigned char *sk_buff_data_t; * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by - * @ip_defrag_offset: (aka @sk) alternate use of @sk, used in - * fragmentation management * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here @@ -874,10 +872,7 @@ struct sk_buff { struct llist_node ll_node; }; - union { - struct sock *sk; - int ip_defrag_offset; - }; + struct sock *sk; union { ktime_t tstamp; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c9f9ac5013a7..834cdc57755f 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,6 +24,8 @@ #include #include +#include "../core/sock_destructor.h" + /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. @@ -39,6 +41,7 @@ struct ipfrag_skb_cb { }; struct sk_buff *next_frag; int frag_run_len; + int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) @@ -390,12 +393,12 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, */ if (!last) fragrun_create(q, skb); /* First fragment. */ - else if (last->ip_defrag_offset + last->len < end) { + else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ - if (offset < last->ip_defrag_offset + last->len) + if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; - if (offset == last->ip_defrag_offset + last->len) + if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); @@ -412,13 +415,13 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, parent = *rbn; curr = rb_to_skb(parent); - curr_run_end = curr->ip_defrag_offset + + curr_run_end = FRAG_CB(curr)->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; - if (end <= curr->ip_defrag_offset) + if (end <= FRAG_CB(curr)->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; - else if (offset >= curr->ip_defrag_offset && + else if (offset >= FRAG_CB(curr)->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else @@ -432,7 +435,7 @@ int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, rb_insert_color(&skb->rbnode, &q->rb_fragments); } - skb->ip_defrag_offset = offset; + FRAG_CB(skb)->ip_defrag_offset = offset; return IPFRAG_OK; } @@ -442,13 +445,28 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); - struct sk_buff **nextp; + void (*destructor)(struct sk_buff *); + unsigned int orig_truesize = 0; + struct sk_buff **nextp = NULL; + struct sock *sk = skb->sk; int delta; + if (sk && is_skb_wmem(skb)) { + /* TX: skb->sk might have been passed as argument to + * dst->output and must remain valid until tx completes. + * + * Move sk to reassembled skb and fix up wmem accounting. + */ + orig_truesize = skb->truesize; + destructor = skb->destructor; + } + if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); - if (!fp) - return NULL; + if (!fp) { + head = skb; + goto out_restore_sk; + } FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; @@ -457,6 +475,12 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; + + if (orig_truesize) { + /* prevent skb_morph from releasing sk */ + skb->sk = NULL; + skb->destructor = NULL; + } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, @@ -464,13 +488,13 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, consume_skb(head); head = skb; } - WARN_ON(head->ip_defrag_offset != 0); + WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) - return NULL; + goto out_restore_sk; delta += head->truesize; if (delta) @@ -486,7 +510,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, clone = alloc_skb(0, GFP_ATOMIC); if (!clone) - return NULL; + goto out_restore_sk; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) @@ -503,6 +527,21 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, nextp = &skb_shinfo(head)->frag_list; } +out_restore_sk: + if (orig_truesize) { + int ts_delta = head->truesize - orig_truesize; + + /* if this reassembled skb is fragmented later, + * fraglist skbs will get skb->sk assigned from head->sk, + * and each frag skb will be released via sock_wfree. + * + * Update sk_wmem_alloc. + */ + head->sk = sk; + head->destructor = destructor; + refcount_add(ts_delta, &sk->sk_wmem_alloc); + } + return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); @@ -510,6 +549,8 @@ EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { + struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; + const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; @@ -573,6 +614,9 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, head->prev = NULL; head->tstamp = q->stamp; head->mono_delivery_time = q->mono_delivery_time; + + if (sk) + refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fb153569889e..6c309c1ec3b0 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -378,6 +378,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -480,7 +481,6 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user) struct ipq *qp; __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); - skb_orphan(skb); /* Lookup (or create) queue header */ qp = ip_find(net, ip_hdr(skb), user, vif); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 38db0064d661..87a394179092 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -293,6 +293,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, } skb_dst_drop(skb); + skb_orphan(skb); return -EINPROGRESS; insert_error: @@ -468,7 +469,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user) hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); - skb_orphan(skb); fq = fq_find(net, fhdr->identification, user, hdr, skb->dev ? skb->dev->ifindex : 0); if (fq == NULL) { From 4965ad067b76c7365552b68411299cfc55f16667 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 15 Jul 2024 15:17:23 +0000 Subject: [PATCH 2/8] ANDROID: Fix abi issues in struct sk_buff In commit 18685451fc4e ("inet: inet_defrag: prevent sk release while still in use"), struct sk_buff dropped an unneeded union structure. This did not change the actual structure size or layout at all, but the abi checker didn't like it. So trick it by putting some __GENKSYMS__ markers in to preserve the abi correctly. Bug: 335584858 Fixes: 18685451fc4e ("inet: inet_defrag: prevent sk release while still in use") Change-Id: I78ca54f9df3e03cccebc326babf1d84ccb5dc781 Signed-off-by: Greg Kroah-Hartman --- android/abi_gki_aarch64.stg | 21 +-------------------- include/linux/skbuff.h | 7 +++++++ net/ipv4/inet_fragment.c | 2 ++ 3 files changed, 10 insertions(+), 20 deletions(-) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 4c5ad5163610..3779f8437f76 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -41288,11 +41288,6 @@ member { id: 0x3e4f626d type_id: 0x7613f9a7 } -member { - id: 0x3e5d0376 - type_id: 0x765a5eef - offset: 192 -} member { id: 0x3e670f18 type_id: 0x76b2536c @@ -111885,11 +111880,6 @@ member { type_id: 0x18bd6530 offset: 4992 } -member { - id: 0x0e850e04 - name: "ip_defrag_offset" - type_id: 0x6720d32f -} member { id: 0xbc6a844a name: "ip_epilogue" @@ -218986,15 +218976,6 @@ struct_union { member_id: 0xa70d7362 } } -struct_union { - id: 0x765a5eef - kind: UNION - definition { - bytesize: 8 - member_id: 0x828c89b5 - member_id: 0x0e850e04 - } -} struct_union { id: 0x7689fdd5 kind: UNION @@ -258615,7 +258596,7 @@ struct_union { definition { bytesize: 248 member_id: 0x34366909 - member_id: 0x3e5d0376 + member_id: 0x828c817c member_id: 0x318fd08b member_id: 0x53e7b9ea member_id: 0x345f6930 diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a9e8875e3394..b7520dd30e4a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -872,7 +872,14 @@ struct sk_buff { struct llist_node ll_node; }; +#ifdef __GENKSYMS__ + union { + struct sock *sk; + int ip_defrag_offset; + }; +#else struct sock *sk; +#endif union { ktime_t tstamp; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 834cdc57755f..63968d081d96 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -24,7 +24,9 @@ #include #include +#ifndef __GENKSYMS__ #include "../core/sock_destructor.h" +#endif /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will From 7de5ae52b126f4f2c0ff614a9906eb12529f5b08 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 4 Mar 2024 11:53:39 +0800 Subject: [PATCH 3/8] UPSTREAM: erofs: fix uninitialized page cache reported by KMSAN syzbot reports a KMSAN reproducer [1] which generates a crafted filesystem image and causes IMA to read uninitialized page cache. Later, (rq->outputsize > rq->inputsize) will be formally supported after either large uncompressed pclusters (> block size) or big lclusters are landed. However, currently there is no way to generate such filesystems by using mkfs.erofs. Thus, let's mark this condition as unsupported for now. [1] https://lore.kernel.org/r/0000000000002be12a0611ca7ff8@google.com Bug: 353575918 Reported-and-tested-by: syzbot+7bc44a489f0ef0670bd5@syzkaller.appspotmail.com Fixes: 1ca01520148a ("erofs: refine z_erofs_transform_plain() for sub-page block support") Reviewed-by: Sandeep Dhavale Reviewed-by: Yue Hu Reviewed-by: Chao Yu Change-Id: I7a8e0bc81c1f4512d3640b243e8124a61d849028 Signed-off-by: Gao Xiang Link: https://lore.kernel.org/r/20240304035339.425857-1-hsiangkao@linux.alibaba.com (cherry picked from commit 893e5e9b7369a02e7ceaa6d98db6739162005b03) Signed-off-by: Sandeep Dhavale --- fs/erofs/decompressor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 943a35ccc953..7e87c43cf27e 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -330,7 +330,8 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq, unsigned int cur = 0, ni = 0, no, pi, po, insz, cnt; u8 *kin; - DBG_BUGON(rq->outputsize > rq->inputsize); + if (rq->outputsize > rq->inputsize) + return -EOPNOTSUPP; if (rq->alg == Z_EROFS_COMPRESSION_INTERLACED) { cur = bs - (rq->pageofs_out & (bs - 1)); pi = (rq->pageofs_in + rq->inputsize - cur) & ~PAGE_MASK; From c78828e3832d88fb82e41f3d972ac6850c93fdfc Mon Sep 17 00:00:00 2001 From: Kuen-Han Tsai Date: Tue, 16 Jan 2024 22:16:17 +0800 Subject: [PATCH 4/8] FROMLIST: usb: gadget: u_serial: Add null pointer checks after RX/TX submission Commit ffd603f21423 ("usb: gadget: u_serial: Add null pointer check in gs_start_io") adds null pointer checks to gs_start_io(), but it doesn't fully fix the potential null pointer dereference issue. While gserial_connect() calls gs_start_io() with port_lock held, gs_start_rx() and gs_start_tx() release the lock during endpoint request submission. This creates a window where gs_close() could set port->port_tty to NULL, leading to a dereference when the lock is reacquired. This patch adds a null pointer check for port->port_tty after RX/TX submission, and removes the initial null pointer check in gs_start_io() since the caller must hold port_lock and guarantee non-null values for port_usb and port_tty. Fixes: ffd603f21423 ("usb: gadget: u_serial: Add null pointer check in gs_start_io") Cc: stable@vger.kernel.org Signed-off-by: Kuen-Han Tsai Bug: 283247551 Link: https://lore.kernel.org/lkml/20240116141801.396398-1-khtsai@google.com/ Change-Id: Ib850c7d313194074941576a7fdd3a9f58486ad78 Signed-off-by: Kuen-Han Tsai --- drivers/usb/gadget/function/u_serial.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index b8854e8a771f..e466ac1cd9cb 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -538,20 +538,16 @@ static int gs_alloc_requests(struct usb_ep *ep, struct list_head *head, static int gs_start_io(struct gs_port *port) { struct list_head *head = &port->read_pool; - struct usb_ep *ep; + struct usb_ep *ep = port->port_usb->out; int status; unsigned started; - if (!port->port_usb || !port->port.tty) - return -EIO; - /* Allocate RX and TX I/O buffers. We can't easily do this much * earlier (with GFP_KERNEL) because the requests are coupled to * endpoints, as are the packet sizes we'll be using. Different * configurations may use different endpoints with a given port; * and high speed vs full speed changes packet sizes too. */ - ep = port->port_usb->out; status = gs_alloc_requests(ep, head, gs_read_complete, &port->read_allocated); if (status) @@ -568,12 +564,22 @@ static int gs_start_io(struct gs_port *port) port->n_read = 0; started = gs_start_rx(port); + /* + * The TTY may be set to NULL by gs_close() after gs_start_rx() or + * gs_start_tx() release locks for endpoint request submission. + */ + if (!port->port.tty) + goto out; + if (started) { gs_start_tx(port); /* Unblock any pending writes into our circular buffer, in case * we didn't in gs_start_tx() */ + if (!port->port.tty) + goto out; tty_wakeup(port->port.tty); } else { +out: gs_free_requests(ep, head, &port->read_allocated); gs_free_requests(port->port_usb->in, &port->write_pool, &port->write_allocated); From fc94b39f66878415b12b374c8712d504857b9c28 Mon Sep 17 00:00:00 2001 From: quic_anane Date: Thu, 18 Jul 2024 05:26:33 +0530 Subject: [PATCH 5/8] ANDROID: abi_gki_aarch64_qcom: Add groups_free symbol Add groups_free to abi_gki_aarch64_qcom. 1 function symbol(s) added 'void groups_free(struct group_info*)' Bug: 352712755 Change-Id: I4ee87435608c3050c2ebbe82e3d1b43bfb4700db Signed-off-by: quic_anane --- android/abi_gki_aarch64.stg | 15 +++++++++++++++ android/abi_gki_aarch64_qcom | 1 + 2 files changed, 16 insertions(+) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 3779f8437f76..53f70fca4b5a 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -298993,6 +298993,11 @@ function { parameter_id: 0x18bd6530 parameter_id: 0x6720d32f } +function { + id: 0x17956035 + return_type_id: 0x48b5725f + parameter_id: 0x1c34c69b +} function { id: 0x179e48e8 return_type_id: 0x48b5725f @@ -373047,6 +373052,15 @@ elf_symbol { type_id: 0x9622351a full_name: "gro_cells_receive" } +elf_symbol { + id: 0xa589de7f + name: "groups_free" + is_defined: true + symbol_type: FUNCTION + crc: 0x732dd326 + type_id: 0x17956035 + full_name: "groups_free" +} elf_symbol { id: 0x3f69092d name: "gs_alloc_req" @@ -415136,6 +415150,7 @@ interface { symbol_id: 0x0ac30c0d symbol_id: 0x61ad5a29 symbol_id: 0x4b6846bd + symbol_id: 0xa589de7f symbol_id: 0x3f69092d symbol_id: 0x61776461 symbol_id: 0xfeac8d5f diff --git a/android/abi_gki_aarch64_qcom b/android/abi_gki_aarch64_qcom index 75a2935d180b..77fd16ac9585 100644 --- a/android/abi_gki_aarch64_qcom +++ b/android/abi_gki_aarch64_qcom @@ -1326,6 +1326,7 @@ gro_cells_destroy gro_cells_init gro_cells_receive + groups_free h4_recv_buf handle_bad_irq handle_edge_irq From f3437db87063f624f189e1cd38347a971fdd3fa0 Mon Sep 17 00:00:00 2001 From: Kalesh Singh Date: Wed, 17 Jul 2024 23:06:48 +0000 Subject: [PATCH 6/8] ANDROID: 16K: Don't set padding vm_flags on 32-bit archs vma_pad_fixup_flags() and is_mergable_pad_vma() were inadvertently affecting the vm_flags on 32-bit arch, making some VMAs not mergable. This causes zygote to crash as the Art GC's heap compaction fails. The compaction depends on mremap() which will fail when operating on a range that spans multiple VMAs [1]. This can happen now due to the incorrect is_mergable_pad_vma() check. Make all the pgsize_migration APIs no-ops in 32-bit architectures, since Android only performs ELF segment extension in 64-bit archs. [1] https://github.com/torvalds/linux/blob/v6.9/mm/mremap.c#L841-L843 Bug: 353667356 Change-Id: Id9b0076ef173d75a4afc85577355d340fce03e65 Signed-off-by: Kalesh Singh --- include/linux/pgsize_migration.h | 38 ++++++++++++++------------------ mm/pgsize_migration.c | 32 +++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 22 deletions(-) diff --git a/include/linux/pgsize_migration.h b/include/linux/pgsize_migration.h index fbfb1b9b9196..e3dc1de19f83 100644 --- a/include/linux/pgsize_migration.h +++ b/include/linux/pgsize_migration.h @@ -62,6 +62,14 @@ extern void show_map_pad_vma(struct vm_area_struct *vma, extern void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new, unsigned long addr, int new_below); + +extern unsigned long vma_pad_fixup_flags(struct vm_area_struct *vma, + unsigned long newflags); + +extern bool is_mergable_pad_vma(struct vm_area_struct *vma, + unsigned long vm_flags); + +extern unsigned long vma_data_pages(struct vm_area_struct *vma); #else /* PAGE_SIZE != SZ_4K || !defined(CONFIG_64BIT) */ static inline void vma_set_pad_pages(struct vm_area_struct *vma, unsigned long nr_pages) @@ -98,36 +106,22 @@ static inline void split_pad_vma(struct vm_area_struct *vma, struct vm_area_stru unsigned long addr, int new_below) { } -#endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */ -static inline unsigned long vma_data_pages(struct vm_area_struct *vma) -{ - return vma_pages(vma) - vma_pad_pages(vma); -} - -/* - * Sets the correct padding bits / flags for a VMA split. - */ static inline unsigned long vma_pad_fixup_flags(struct vm_area_struct *vma, unsigned long newflags) { - if (newflags & VM_PAD_MASK) - return (newflags & ~VM_PAD_MASK) | (vma->vm_flags & VM_PAD_MASK); - else - return newflags; + return newflags; } -/* - * Merging of padding VMAs is uncommon, as padding is only allowed - * from the linker context. - * - * To simplify the semantics, adjacent VMAs with padding are not - * allowed to merge. - */ static inline bool is_mergable_pad_vma(struct vm_area_struct *vma, unsigned long vm_flags) { - /* Padding VMAs cannot be merged with other padding or real VMAs */ - return !((vma->vm_flags | vm_flags) & VM_PAD_MASK); + return true; } + +static inline unsigned long vma_data_pages(struct vm_area_struct *vma) +{ + return vma_pages(vma); +} +#endif /* PAGE_SIZE == SZ_4K && defined(CONFIG_64BIT) */ #endif /* _LINUX_PAGE_SIZE_MIGRATION_H */ diff --git a/mm/pgsize_migration.c b/mm/pgsize_migration.c index bca3c4aca982..3b89b6489159 100644 --- a/mm/pgsize_migration.c +++ b/mm/pgsize_migration.c @@ -421,5 +421,37 @@ void split_pad_vma(struct vm_area_struct *vma, struct vm_area_struct *new, vma_set_pad_pages(second, nr_vma2_pages); } } + +/* + * Sets the correct padding bits / flags for a VMA split. + */ +unsigned long vma_pad_fixup_flags(struct vm_area_struct *vma, + unsigned long newflags) +{ + if (newflags & VM_PAD_MASK) + return (newflags & ~VM_PAD_MASK) | (vma->vm_flags & VM_PAD_MASK); + else + return newflags; +} + +/* + * Merging of padding VMAs is uncommon, as padding is only allowed + * from the linker context. + * + * To simplify the semantics, adjacent VMAs with padding are not + * allowed to merge. + */ +bool is_mergable_pad_vma(struct vm_area_struct *vma, + unsigned long vm_flags) +{ + /* Padding VMAs cannot be merged with other padding or real VMAs */ + return !((vma->vm_flags | vm_flags) & VM_PAD_MASK); +} + +unsigned long vma_data_pages(struct vm_area_struct *vma) +{ + return vma_pages(vma) - vma_pad_pages(vma); +} + #endif /* PAGE_SIZE == SZ_4K */ #endif /* CONFIG_64BIT */ From f858f0ff4e470be1c9ae06900f7d23f155f0ce4a Mon Sep 17 00:00:00 2001 From: Manish Pandey Date: Mon, 15 Jul 2024 13:54:42 +0530 Subject: [PATCH 7/8] FROMGIT: scsi: ufs: qcom: Enable suspending clk scaling on no request Enable suspending clk scaling on no request for Qualcomm SoC. Signed-off-by: Ram Prakash Gupta Link: https://lore.kernel.org/r/20240627083756.25340-3-quic_rampraka@quicinc.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Bug: 351099319 (cherry picked from commit ed7dac86f1406d73aed21d0cd1563922031a2fd8 https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git 6.11/scsi-staging) Change-Id: Ifbdc14ceae5a59c04a5977b56a198fd295253b3e Signed-off-by: Manish Pandey (cherry picked from commit fcfa54b63051a70ad2467a444001b302e97fe9eb) --- drivers/ufs/host/ufs-qcom.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index a87fb1e7e9d4..d700438e5aa7 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -1378,6 +1378,8 @@ static void ufs_qcom_config_scaling_param(struct ufs_hba *hba, p->polling_ms = 60; d->upthreshold = 70; d->downdifferential = 5; + + hba->clk_scaling.suspend_on_no_request = true; } #else static void ufs_qcom_config_scaling_param(struct ufs_hba *hba, From 6d6afa9d3f8fada34bfa4725d5b3079bccf4cf0b Mon Sep 17 00:00:00 2001 From: Manish Pandey Date: Mon, 15 Jul 2024 13:50:22 +0530 Subject: [PATCH 8/8] FROMGIT: scsi: ufs: core: Suspend clk scaling on no request Currently UFS clk scaling is getting suspended only when the clks are scaled down. When high load is generated, a huge amount of latency is added due to scaling up the clk and completing the request post that. Suspending the scaling in its existing state when high load is generated improves the random performance KPI by 28%. So suspending the scaling when there are no requests. And the clk would be put in low scaled state when the actual request load is low. Make this change optional by having the check enabled using vops since for some devices suspending without bringing the clk in low scaled state might have impact on power consumption of the SoC. The change takes advantage of the hole in 'struct ufs_clk_scaling' before ANDROID_KABI_RESERVE and does not change the size of the struct. Use __GENKSYMS__ marker to preserve the abi correctly. Signed-off-by: Ram Prakash Gupta Link: https://lore.kernel.org/r/20240627083756.25340-2-quic_rampraka@quicinc.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Bug: 351099319 (cherry picked from commit 50183ac2cfb54e027dd36fb22ea1bd1e91e3a08b https://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git 6.11/scsi-staging) Change-Id: I6e11beab7bc56746e7e1ae1ea25f9ab205394ef7 Signed-off-by: Manish Pandey (cherry picked from commit 78931ac60273825b4141796eb8631424b3690712) --- android/abi_gki_aarch64.stg | 7 +++++++ drivers/ufs/core/ufshcd.c | 3 ++- include/ufs/ufshcd.h | 5 +++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/android/abi_gki_aarch64.stg b/android/abi_gki_aarch64.stg index 53f70fca4b5a..bd632daa5065 100644 --- a/android/abi_gki_aarch64.stg +++ b/android/abi_gki_aarch64.stg @@ -189224,6 +189224,12 @@ member { type_id: 0x351ca0f0 offset: 64 } +member { + id: 0x0489a9d9 + name: "suspend_on_no_request" + type_id: 0x6d7f5ff6 + offset: 1672 +} member { id: 0x0b4c6d3f name: "suspend_opp" @@ -266594,6 +266600,7 @@ struct_union { member_id: 0xbbefba04 member_id: 0xb768ef54 member_id: 0x2061f6ba + member_id: 0x0489a9d9 member_id: 0x2d081217 } } diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index f91088fd2480..aabea7aaf048 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1503,7 +1503,8 @@ static int ufshcd_devfreq_target(struct device *dev, ktime_to_us(ktime_sub(ktime_get(), start)), ret); out: - if (sched_clk_scaling_suspend_work && !scale_up) + if (sched_clk_scaling_suspend_work && + (!scale_up || hba->clk_scaling.suspend_on_no_request)) queue_work(hba->clk_scaling.workq, &hba->clk_scaling.suspend_work); diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 12965946d194..b817bd525a69 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -425,6 +425,7 @@ struct ufs_saved_pwr_info { * @is_initialized: Indicates whether clock scaling is initialized or not * @is_busy_started: tracks if busy period has started or not * @is_suspended: tracks if devfreq is suspended or not + * @suspend_on_no_request: Flag to suspend clk scaling when there is no request */ struct ufs_clk_scaling { int active_reqs; @@ -442,6 +443,10 @@ struct ufs_clk_scaling { bool is_initialized; bool is_busy_started; bool is_suspended; + /* using hole here would not alter the overall size of the structure. */ +#ifndef __GENKSYMS__ + bool suspend_on_no_request; +#endif ANDROID_KABI_RESERVE(1); };