From 81cc178d7c879899421408e7b568569fe3b5693c Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 16 May 2024 13:45:24 +0000 Subject: [PATCH] Revert "io_uring: drop any code related to SCM_RIGHTS" This reverts commit a3812a47a32022ca76bf46ddacdd823dc2aabf8b which is commit 6e5e6d274956305f1fc0340522b38f5f5be74bdb upstream. It breaks the Android kernel abi and can be brought back in the future in an abi-safe way if it is really needed. Bug: 161946584 Change-Id: I84857293b288c8a160b2567d4018568c36893eff Signed-off-by: Greg Kroah-Hartman --- include/linux/io_uring_types.h | 3 + io_uring/filetable.c | 10 ++- io_uring/io_uring.c | 31 ++++++- io_uring/rsrc.c | 151 ++++++++++++++++++++++++++++++++- io_uring/rsrc.h | 15 ++++ 5 files changed, 202 insertions(+), 8 deletions(-) diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 37aeea266ebb..f5b687a787a3 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -330,6 +330,9 @@ struct io_ring_ctx { struct list_head io_buffers_pages; + #if defined(CONFIG_UNIX) + struct socket *ring_sock; + #endif /* hashed buffered write serialization */ struct io_wq_hash *hash_map; diff --git a/io_uring/filetable.c b/io_uring/filetable.c index 4660cb89ea9f..b80614e7d605 100644 --- a/io_uring/filetable.c +++ b/io_uring/filetable.c @@ -95,10 +95,12 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file, needs_switch = true; } - *io_get_tag_slot(ctx->file_data, slot_index) = 0; - io_fixed_file_set(file_slot, file); - io_file_bitmap_set(&ctx->file_table, slot_index); - return 0; + ret = io_scm_file_account(ctx, file); + if (!ret) { + *io_get_tag_slot(ctx->file_data, slot_index) = 0; + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, slot_index); + } err: if (needs_switch) io_rsrc_node_switch(ctx, ctx->file_data); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 0bb392e015f2..06385141319b 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -2627,6 +2628,12 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); +#if defined(CONFIG_UNIX) + if (ctx->ring_sock) { + ctx->ring_sock->file = NULL; /* so that iput() is called */ + sock_release(ctx->ring_sock); + } +#endif WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); if (ctx->mm_account) { @@ -3431,12 +3438,32 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) /* * Allocate an anonymous fd, this is what constitutes the application * visible backing of an io_uring instance. The application mmaps this - * fd to gain access to the SQ/CQ ring details. + * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, + * we have to tie this fd to a socket for file garbage collection purposes. */ static struct file *io_uring_get_file(struct io_ring_ctx *ctx) { - return anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, + struct file *file; +#if defined(CONFIG_UNIX) + int ret; + + ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, + &ctx->ring_sock); + if (ret) + return ERR_PTR(ret); +#endif + + file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, O_RDWR | O_CLOEXEC, NULL); +#if defined(CONFIG_UNIX) + if (IS_ERR(file)) { + sock_release(ctx->ring_sock); + ctx->ring_sock = NULL; + } else { + ctx->ring_sock->file = file; + } +#endif + return file; } static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index ac658cfa89c6..7ada0339b387 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -494,6 +494,11 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx, err = -EBADF; break; } + err = io_scm_file_account(ctx, file); + if (err) { + fput(file); + break; + } *io_get_tag_slot(data, i) = tag; io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); @@ -757,12 +762,22 @@ void __io_sqe_files_unregister(struct io_ring_ctx *ctx) for (i = 0; i < ctx->nr_user_files; i++) { struct file *file = io_file_from_index(&ctx->file_table, i); - if (!file) + /* skip scm accounted files, they'll be freed by ->ring_sock */ + if (!file || io_file_need_scm(file)) continue; io_file_bitmap_clear(&ctx->file_table, i); fput(file); } +#if defined(CONFIG_UNIX) + if (ctx->ring_sock) { + struct sock *sock = ctx->ring_sock->sk; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) + kfree_skb(skb); + } +#endif io_free_file_tables(&ctx->file_table); io_file_table_set_alloc_range(ctx, 0, 0); io_rsrc_data_free(ctx->file_data); @@ -790,11 +805,134 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx) return ret; } +/* + * Ensure the UNIX gc is aware of our file set, so we are certain that + * the io_uring can be safely unregistered on process exit, even if we have + * loops in the file referencing. We account only files that can hold other + * files because otherwise they can't form a loop and so are not interesting + * for GC. + */ +int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) +{ +#if defined(CONFIG_UNIX) + struct sock *sk = ctx->ring_sock->sk; + struct sk_buff_head *head = &sk->sk_receive_queue; + struct scm_fp_list *fpl; + struct sk_buff *skb; + + if (likely(!io_file_need_scm(file))) + return 0; + + /* + * See if we can merge this file into an existing skb SCM_RIGHTS + * file set. If there's no room, fall back to allocating a new skb + * and filling it in. + */ + spin_lock_irq(&head->lock); + skb = skb_peek(head); + if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD) + __skb_unlink(skb, head); + else + skb = NULL; + spin_unlock_irq(&head->lock); + + if (!skb) { + fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); + if (!fpl) + return -ENOMEM; + + skb = alloc_skb(0, GFP_KERNEL); + if (!skb) { + kfree(fpl); + return -ENOMEM; + } + + fpl->user = get_uid(current_user()); + fpl->max = SCM_MAX_FD; + fpl->count = 0; + + UNIXCB(skb).fp = fpl; + skb->sk = sk; + skb->scm_io_uring = 1; + skb->destructor = unix_destruct_scm; + refcount_add(skb->truesize, &sk->sk_wmem_alloc); + } + + fpl = UNIXCB(skb).fp; + fpl->fp[fpl->count++] = get_file(file); + unix_inflight(fpl->user, file); + skb_queue_head(head, skb); + fput(file); +#endif + return 0; +} + static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) { struct file *file = prsrc->file; +#if defined(CONFIG_UNIX) + struct sock *sock = ctx->ring_sock->sk; + struct sk_buff_head list, *head = &sock->sk_receive_queue; + struct sk_buff *skb; + int i; + if (!io_file_need_scm(file)) { + fput(file); + return; + } + + __skb_queue_head_init(&list); + + /* + * Find the skb that holds this file in its SCM_RIGHTS. When found, + * remove this entry and rearrange the file array. + */ + skb = skb_dequeue(head); + while (skb) { + struct scm_fp_list *fp; + + fp = UNIXCB(skb).fp; + for (i = 0; i < fp->count; i++) { + int left; + + if (fp->fp[i] != file) + continue; + + unix_notinflight(fp->user, fp->fp[i]); + left = fp->count - 1 - i; + if (left) { + memmove(&fp->fp[i], &fp->fp[i + 1], + left * sizeof(struct file *)); + } + fp->count--; + if (!fp->count) { + kfree_skb(skb); + skb = NULL; + } else { + __skb_queue_tail(&list, skb); + } + fput(file); + file = NULL; + break; + } + + if (!file) + break; + + __skb_queue_tail(&list, skb); + + skb = skb_dequeue(head); + } + + if (skb_peek(&list)) { + spin_lock_irq(&head->lock); + while ((skb = __skb_dequeue(&list)) != NULL) + __skb_queue_tail(head, skb); + spin_unlock_irq(&head->lock); + } +#else fput(file); +#endif } int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, @@ -848,12 +986,21 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, goto fail; /* - * Don't allow io_uring instances to be registered. + * Don't allow io_uring instances to be registered. If UNIX + * isn't enabled, then this causes a reference cycle and this + * instance can never get freed. If UNIX is enabled we'll + * handle it just fine, but there's still no point in allowing + * a ring fd as it doesn't support regular read/write anyway. */ if (io_is_uring_fops(file)) { fput(file); goto fail; } + ret = io_scm_file_account(ctx, file); + if (ret) { + fput(file); + goto fail; + } file_slot = io_fixed_file_slot(&ctx->file_table, i); io_fixed_file_set(file_slot, file); io_file_bitmap_set(&ctx->file_table, i); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index 85f145607c62..acaf8dad0540 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -77,6 +77,21 @@ int io_sqe_files_unregister(struct io_ring_ctx *ctx); int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args, u64 __user *tags); +int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file); + +static inline bool io_file_need_scm(struct file *filp) +{ + return false; +} + +static inline int io_scm_file_account(struct io_ring_ctx *ctx, + struct file *file) +{ + if (likely(!io_file_need_scm(file))) + return 0; + return __io_scm_file_account(ctx, file); +} + int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args); int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,