diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 54a83a744538..f33c804a922a 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -95,6 +95,9 @@ static int kvm_mmu_rmaps_stat_show(struct seq_file *m, void *v) unsigned int *log[KVM_NR_PAGE_SIZES], *cur; int i, j, k, l, ret; + if (!kvm_memslots_have_rmaps(kvm)) + return 0; + ret = -ENOMEM; memset(log, 0, sizeof(log)); for (i = 0; i < KVM_NR_PAGE_SIZES; i++) { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 7656a2c5662a..be2883141220 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1565,7 +1565,7 @@ static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm) r = -EINTR; if (mutex_lock_killable(&dst_kvm->lock)) goto release_src; - if (mutex_lock_killable(&src_kvm->lock)) + if (mutex_lock_killable_nested(&src_kvm->lock, SINGLE_DEPTH_NESTING)) goto unlock_dst; return 0; diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 83345bfac246..6cf50ee0b77c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -358,6 +358,9 @@ static int i10nm_get_hbm_munits(void) mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); if (!mbase) { + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", base + off); return -ENOMEM; @@ -368,6 +371,12 @@ static int i10nm_get_hbm_munits(void) mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); if (!I10NM_IS_HBM_IMC(mcmtr)) { + iounmap(d->imc[lmc].mbase); + d->imc[lmc].mbase = NULL; + d->imc[lmc].hbm_mc = false; + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); return -ENODEV; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b85b67a88a3d..7d67aec6f4a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1077,6 +1077,7 @@ struct amdgpu_device { bool runpm; bool in_runpm; bool has_pr3; + bool is_fw_fb; bool pm_sysfs_en; bool ucode_sysfs_en; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 86ca80da9eea..99370bdd8c5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "amdgpu.h" #include "amdgpu_irq.h" @@ -1890,6 +1891,26 @@ MODULE_DEVICE_TABLE(pci, pciidlist); static const struct drm_driver amdgpu_kms_driver; +static bool amdgpu_is_fw_framebuffer(resource_size_t base, + resource_size_t size) +{ + bool found = false; +#if IS_REACHABLE(CONFIG_FB) + struct apertures_struct *a; + + a = alloc_apertures(1); + if (!a) + return false; + + a->ranges[0].base = base; + a->ranges[0].size = size; + + found = is_firmware_framebuffer(a); + kfree(a); +#endif + return found; +} + static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1898,6 +1919,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, unsigned long flags = ent->driver_data; int ret, retry = 0, i; bool supports_atomic = false; + bool is_fw_fb; + resource_size_t base, size; /* skip devices which are owned by radeon */ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { @@ -1966,6 +1989,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, } #endif + base = pci_resource_start(pdev, 0); + size = pci_resource_len(pdev, 0); + is_fw_fb = amdgpu_is_fw_framebuffer(base, size); + /* Get rid of things like offb */ ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); if (ret) @@ -1978,6 +2005,7 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, adev->dev = &pdev->dev; adev->pdev = pdev; ddev = adev_to_drm(adev); + adev->is_fw_fb = is_fw_fb; if (!supports_atomic) ddev->driver_features &= ~DRIVER_ATOMIC; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 651c7abfde03..09ad17944eb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -206,6 +206,12 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) adev->runpm = true; break; } + /* XXX: disable runtime pm if we are the primary adapter + * to avoid displays being re-enabled after DPMS. + * This needs to be sorted out and fixed properly. + */ + if (adev->is_fw_fb) + adev->runpm = false; if (adev->runpm) dev_info(adev->dev, "Using BACO for runtime pm\n"); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8a817932acdf..9d7d64fdf410 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1400,8 +1400,14 @@ static int smu_disable_dpms(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; int ret = 0; + /* + * TODO: (adev->in_suspend && !adev->in_s0ix) is added to pair + * the workaround which always reset the asic in suspend. + * It's likely that workaround will be dropped in the future. + * Then the change here should be dropped together. + */ bool use_baco = !smu->is_apu && - ((amdgpu_in_reset(adev) && + (((amdgpu_in_reset(adev) || (adev->in_suspend && !adev->in_s0ix)) && (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) || ((adev->in_runpm || adev->in_s4) && amdgpu_asic_supports_baco(adev))); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7dc8026cf6ee..85505424f7a4 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1496,12 +1496,13 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, if (!r1_bio->bios[i]) continue; - if (first_clone && test_bit(WriteMostly, &rdev->flags)) { + if (first_clone) { /* do behind I/O ? * Not if there are too many, or cannot * allocate memory, or a reader on WriteMostly * is waiting for behind writes to flush */ if (bitmap && + test_bit(WriteMostly, &rdev->flags) && (atomic_read(&bitmap->behind_writes) < mddev->bitmap_info.max_write_behind) && !waitqueue_active(&bitmap->behind_wait)) { diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 826175ad88a2..0fa7ede94fa6 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1762,6 +1762,53 @@ int remove_conflicting_framebuffers(struct apertures_struct *a, } EXPORT_SYMBOL(remove_conflicting_framebuffers); +/** + * is_firmware_framebuffer - detect if firmware-configured framebuffer matches + * @a: memory range, users of which are to be checked + * + * This function checks framebuffer devices (initialized by firmware/bootloader) + * which use memory range described by @a. If @a matchesm the function returns + * true, otherwise false. + */ +bool is_firmware_framebuffer(struct apertures_struct *a) +{ + bool do_free = false; + bool found = false; + int i; + + if (!a) { + a = alloc_apertures(1); + if (!a) + return false; + + a->ranges[0].base = 0; + a->ranges[0].size = ~0; + do_free = true; + } + + mutex_lock(®istration_lock); + /* check all firmware fbs and kick off if the base addr overlaps */ + for_each_registered_fb(i) { + struct apertures_struct *gen_aper; + + if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE)) + continue; + + gen_aper = registered_fb[i]->apertures; + if (fb_do_apertures_overlap(gen_aper, a)) { + found = true; + break; + } + } + mutex_unlock(®istration_lock); + + if (do_free) + kfree(a); + + return found; +} +EXPORT_SYMBOL(is_firmware_framebuffer); + /** * remove_conflicting_pci_framebuffers - remove firmware-configured framebuffers for PCI devices * @pdev: PCI device diff --git a/include/linux/fb.h b/include/linux/fb.h index 6f3db99ab990..3da95842b207 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -610,6 +610,7 @@ extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, const char *name); extern int remove_conflicting_framebuffers(struct apertures_struct *a, const char *name, bool primary); +extern bool is_firmware_framebuffer(struct apertures_struct *a); extern int fb_prepare_logo(struct fb_info *fb_info, int rotate); extern int fb_show_logo(struct fb_info *fb_info, int rotate); extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size); diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 02f32b13d9e5..6aca805a25c4 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -65,6 +65,25 @@ static inline struct cgroup_fs_context *cgroup_fc2context(struct fs_context *fc) return container_of(kfc, struct cgroup_fs_context, kfc); } +struct cgroup_pidlist; + +struct cgroup_file_ctx { + struct cgroup_namespace *ns; + + struct { + void *trigger; + } psi; + + struct { + bool started; + struct css_task_iter iter; + } procs; + + struct { + struct cgroup_pidlist *pidlist; + } procs1; +}; + /* * A cgroup can be associated with multiple css_sets as different tasks may * belong to different cgroups on different hierarchies. In the other diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 2ece597474fb..b40d558a6838 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -395,6 +395,7 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) * next pid to display, if any */ struct kernfs_open_file *of = s->private; + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = seq_css(s)->cgroup; struct cgroup_pidlist *l; enum cgroup_filetype type = seq_cft(s)->private; @@ -404,25 +405,24 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) mutex_lock(&cgrp->pidlist_mutex); /* - * !NULL @of->priv indicates that this isn't the first start() - * after open. If the matching pidlist is around, we can use that. - * Look for it. Note that @of->priv can't be used directly. It - * could already have been destroyed. + * !NULL @ctx->procs1.pidlist indicates that this isn't the first + * start() after open. If the matching pidlist is around, we can use + * that. Look for it. Note that @ctx->procs1.pidlist can't be used + * directly. It could already have been destroyed. */ - if (of->priv) - of->priv = cgroup_pidlist_find(cgrp, type); + if (ctx->procs1.pidlist) + ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type); /* * Either this is the first start() after open or the matching * pidlist has been destroyed inbetween. Create a new one. */ - if (!of->priv) { - ret = pidlist_array_load(cgrp, type, - (struct cgroup_pidlist **)&of->priv); + if (!ctx->procs1.pidlist) { + ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist); if (ret) return ERR_PTR(ret); } - l = of->priv; + l = ctx->procs1.pidlist; if (pid) { int end = l->length; @@ -450,7 +450,8 @@ static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos) static void cgroup_pidlist_stop(struct seq_file *s, void *v) { struct kernfs_open_file *of = s->private; - struct cgroup_pidlist *l = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct cgroup_pidlist *l = ctx->procs1.pidlist; if (l) mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, @@ -461,7 +462,8 @@ static void cgroup_pidlist_stop(struct seq_file *s, void *v) static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; - struct cgroup_pidlist *l = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct cgroup_pidlist *l = ctx->procs1.pidlist; pid_t *p = v; pid_t *end = l->list + l->length; /* @@ -505,10 +507,11 @@ static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of, goto out_unlock; /* - * Even if we're attaching all tasks in the thread group, we only - * need to check permissions on one of them. + * Even if we're attaching all tasks in the thread group, we only need + * to check permissions on one of them. Check permissions using the + * credentials from file open to protect against inherited fd attacks. */ - cred = current_cred(); + cred = of->file->f_cred; tcred = get_task_cred(task); if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 4859641b5e88..42daae762826 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3641,6 +3641,7 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, enum psi_res res) { + struct cgroup_file_ctx *ctx = of->priv; struct psi_trigger *new; struct cgroup *cgrp; struct psi_group *psi; @@ -3659,7 +3660,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, return PTR_ERR(new); } - psi_trigger_replace(&of->priv, new); + psi_trigger_replace(&ctx->psi.trigger, new); cgroup_put(cgrp); @@ -3690,12 +3691,16 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of, static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, poll_table *pt) { - return psi_trigger_poll(&of->priv, of->file, pt); + struct cgroup_file_ctx *ctx = of->priv; + + return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); } static void cgroup_pressure_release(struct kernfs_open_file *of) { - psi_trigger_replace(&of->priv, NULL); + struct cgroup_file_ctx *ctx = of->priv; + + psi_trigger_replace(&ctx->psi.trigger, NULL); } bool cgroup_psi_enabled(void) @@ -3822,24 +3827,43 @@ static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, static int cgroup_file_open(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); + struct cgroup_file_ctx *ctx; + int ret; - if (cft->open) - return cft->open(of); - return 0; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->ns = current->nsproxy->cgroup_ns; + get_cgroup_ns(ctx->ns); + of->priv = ctx; + + if (!cft->open) + return 0; + + ret = cft->open(of); + if (ret) { + put_cgroup_ns(ctx->ns); + kfree(ctx); + } + return ret; } static void cgroup_file_release(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); + struct cgroup_file_ctx *ctx = of->priv; if (cft->release) cft->release(of); + put_cgroup_ns(ctx->ns); + kfree(ctx); } static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = of->kn->parent->priv; struct cftype *cft = of_cft(of); struct cgroup_subsys_state *css; @@ -3856,7 +3880,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, */ if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && !(cft->flags & CFTYPE_NS_DELEGATABLE) && - ns != &init_cgroup_ns && ns->root_cset->dfl_cgrp == cgrp) + ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) return -EPERM; if (cft->write) @@ -4763,21 +4787,21 @@ void css_task_iter_end(struct css_task_iter *it) static void cgroup_procs_release(struct kernfs_open_file *of) { - if (of->priv) { - css_task_iter_end(of->priv); - kfree(of->priv); - } + struct cgroup_file_ctx *ctx = of->priv; + + if (ctx->procs.started) + css_task_iter_end(&ctx->procs.iter); } static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; - struct css_task_iter *it = of->priv; + struct cgroup_file_ctx *ctx = of->priv; if (pos) (*pos)++; - return css_task_iter_next(it); + return css_task_iter_next(&ctx->procs.iter); } static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, @@ -4785,21 +4809,18 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, { struct kernfs_open_file *of = s->private; struct cgroup *cgrp = seq_css(s)->cgroup; - struct css_task_iter *it = of->priv; + struct cgroup_file_ctx *ctx = of->priv; + struct css_task_iter *it = &ctx->procs.iter; /* * When a seq_file is seeked, it's always traversed sequentially * from position 0, so we can simply keep iterating on !0 *pos. */ - if (!it) { + if (!ctx->procs.started) { if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); - - it = kzalloc(sizeof(*it), GFP_KERNEL); - if (!it) - return ERR_PTR(-ENOMEM); - of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); + ctx->procs.started = true; } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); @@ -4850,9 +4871,9 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb) static int cgroup_procs_write_permission(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, - struct super_block *sb) + struct super_block *sb, + struct cgroup_namespace *ns) { - struct cgroup_namespace *ns = current->nsproxy->cgroup_ns; struct cgroup *com_cgrp = src_cgrp; int ret; @@ -4881,11 +4902,12 @@ static int cgroup_procs_write_permission(struct cgroup *src_cgrp, static int cgroup_attach_permissions(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, - struct super_block *sb, bool threadgroup) + struct super_block *sb, bool threadgroup, + struct cgroup_namespace *ns) { int ret = 0; - ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb); + ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns); if (ret) return ret; @@ -4902,8 +4924,10 @@ static int cgroup_attach_permissions(struct cgroup *src_cgrp, static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, bool threadgroup) { + struct cgroup_file_ctx *ctx = of->priv; struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; + const struct cred *saved_cred; ssize_t ret; bool locked; @@ -4921,9 +4945,16 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); spin_unlock_irq(&css_set_lock); - /* process and thread migrations follow same delegation rule */ + /* + * Process and thread migrations follow same delegation rule. Check + * permissions using the credentials from file open to protect against + * inherited fd attacks. + */ + saved_cred = override_creds(of->file->f_cred); ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb, threadgroup); + of->file->f_path.dentry->d_sb, + threadgroup, ctx->ns); + revert_creds(saved_cred); if (ret) goto out_finish; @@ -6142,7 +6173,8 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) goto err; ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, - !(kargs->flags & CLONE_THREAD)); + !(kargs->flags & CLONE_THREAD), + current->nsproxy->cgroup_ns); if (ret) goto err; diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 623cec04ad42..0cf7e90c0052 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, size_t len) int cg_create(const char *cgroup) { - return mkdir(cgroup, 0644); + return mkdir(cgroup, 0755); } int cg_wait_for_proc_count(const char *cgroup, int count) diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 3df648c37876..600123503063 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -1,11 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE #include +#include #include #include #include #include #include +#include #include #include #include @@ -674,6 +677,166 @@ cleanup: return ret; } +/* + * cgroup migration permission check should be performed based on the + * credentials at the time of open instead of write. + */ +static int test_cgcore_lesser_euid_open(const char *root) +{ + const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + int ret = KSFT_FAIL; + char *cg_test_a = NULL, *cg_test_b = NULL; + char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; + int cg_test_b_procs_fd = -1; + uid_t saved_uid; + + cg_test_a = cg_name(root, "cg_test_a"); + cg_test_b = cg_name(root, "cg_test_b"); + + if (!cg_test_a || !cg_test_b) + goto cleanup; + + cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); + cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); + + if (!cg_test_a_procs || !cg_test_b_procs) + goto cleanup; + + if (cg_create(cg_test_a) || cg_create(cg_test_b)) + goto cleanup; + + if (cg_enter_current(cg_test_a)) + goto cleanup; + + if (chown(cg_test_a_procs, test_euid, -1) || + chown(cg_test_b_procs, test_euid, -1)) + goto cleanup; + + saved_uid = geteuid(); + if (seteuid(test_euid)) + goto cleanup; + + cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR); + + if (seteuid(saved_uid)) + goto cleanup; + + if (cg_test_b_procs_fd < 0) + goto cleanup; + + if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_test_b_procs_fd >= 0) + close(cg_test_b_procs_fd); + if (cg_test_b) + cg_destroy(cg_test_b); + if (cg_test_a) + cg_destroy(cg_test_a); + free(cg_test_b_procs); + free(cg_test_a_procs); + free(cg_test_b); + free(cg_test_a); + return ret; +} + +struct lesser_ns_open_thread_arg { + const char *path; + int fd; + int err; +}; + +static int lesser_ns_open_thread_fn(void *arg) +{ + struct lesser_ns_open_thread_arg *targ = arg; + + targ->fd = open(targ->path, O_RDWR); + targ->err = errno; + return 0; +} + +/* + * cgroup migration permission check should be performed based on the cgroup + * namespace at the time of open instead of write. + */ +static int test_cgcore_lesser_ns_open(const char *root) +{ + static char stack[65536]; + const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + int ret = KSFT_FAIL; + char *cg_test_a = NULL, *cg_test_b = NULL; + char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; + int cg_test_b_procs_fd = -1; + struct lesser_ns_open_thread_arg targ = { .fd = -1 }; + pid_t pid; + int status; + + cg_test_a = cg_name(root, "cg_test_a"); + cg_test_b = cg_name(root, "cg_test_b"); + + if (!cg_test_a || !cg_test_b) + goto cleanup; + + cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs"); + cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs"); + + if (!cg_test_a_procs || !cg_test_b_procs) + goto cleanup; + + if (cg_create(cg_test_a) || cg_create(cg_test_b)) + goto cleanup; + + if (cg_enter_current(cg_test_b)) + goto cleanup; + + if (chown(cg_test_a_procs, test_euid, -1) || + chown(cg_test_b_procs, test_euid, -1)) + goto cleanup; + + targ.path = cg_test_b_procs; + pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack), + CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD, + &targ); + if (pid < 0) + goto cleanup; + + if (waitpid(pid, &status, 0) < 0) + goto cleanup; + + if (!WIFEXITED(status)) + goto cleanup; + + cg_test_b_procs_fd = targ.fd; + if (cg_test_b_procs_fd < 0) + goto cleanup; + + if (cg_enter_current(cg_test_a)) + goto cleanup; + + if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (cg_test_b_procs_fd >= 0) + close(cg_test_b_procs_fd); + if (cg_test_b) + cg_destroy(cg_test_b); + if (cg_test_a) + cg_destroy(cg_test_a); + free(cg_test_b_procs); + free(cg_test_a_procs); + free(cg_test_b); + free(cg_test_a); + return ret; +} + #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -689,6 +852,8 @@ struct corecg_test { T(test_cgcore_proc_migration), T(test_cgcore_thread_migration), T(test_cgcore_destroy), + T(test_cgcore_lesser_euid_open), + T(test_cgcore_lesser_ns_open), }; #undef T