diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 101dde3eb1ea..311e10400708 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -495,6 +495,8 @@ GuC .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c :doc: GuC +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.h + GuC Firmware Layout ~~~~~~~~~~~~~~~~~~~ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 166bb46408a9..65eabca57228 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -442,6 +442,13 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) u16 idx, num_bonds; int err, n; + if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915) && + !IS_ROCKETLAKE(i915) && !IS_ALDERLAKE_S(i915)) { + drm_dbg(&i915->drm, + "Bonding on gen12+ aside from TGL, RKL, and ADL_S not supported\n"); + return -ENODEV; + } + if (get_user(idx, &ext->virtual_index)) return -EFAULT; @@ -735,44 +742,6 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, return ret; } -static struct i915_address_space * -context_get_vm_rcu(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(!rcu_access_pointer(ctx->vm)); - - do { - struct i915_address_space *vm; - - /* - * We do not allow downgrading from full-ppgtt [to a shared - * global gtt], so ctx->vm cannot become NULL. - */ - vm = rcu_dereference(ctx->vm); - if (!kref_get_unless_zero(&vm->ref)) - continue; - - /* - * This ppgtt may have be reallocated between - * the read and the kref, and reassigned to a third - * context. In order to avoid inadvertent sharing - * of this ppgtt with that third context (and not - * src), we have to confirm that we have the same - * ppgtt after passing through the strong memory - * barrier implied by a successful - * kref_get_unless_zero(). - * - * Once we have acquired the current ppgtt of ctx, - * we no longer care if it is released from ctx, as - * it cannot be reallocated elsewhere. - */ - - if (vm == rcu_access_pointer(ctx->vm)) - return rcu_pointer_handoff(vm); - - i915_vm_put(vm); - } while (1); -} - static int intel_context_set_gem(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_sseu sseu) @@ -784,16 +753,8 @@ static int intel_context_set_gem(struct intel_context *ce, ce->ring_size = SZ_16K; - if (rcu_access_pointer(ctx->vm)) { - struct i915_address_space *vm; - - rcu_read_lock(); - vm = context_get_vm_rcu(ctx); /* hmm */ - rcu_read_unlock(); - - i915_vm_put(ce->vm); - ce->vm = vm; - } + i915_vm_put(ce->vm); + ce->vm = i915_gem_context_get_eb_vm(ctx); if (ctx->sched.priority >= I915_PRIORITY_NORMAL && intel_engine_has_timeslices(ce->engine) && @@ -982,9 +943,11 @@ free_engines: return err; } -void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) { - struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); + struct i915_gem_context *ctx = container_of(work, typeof(*ctx), + release_work); + struct i915_address_space *vm; trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); @@ -992,6 +955,10 @@ void i915_gem_context_release(struct kref *ref) if (ctx->syncobj) drm_syncobj_put(ctx->syncobj); + vm = ctx->vm; + if (vm) + i915_vm_put(vm); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); @@ -1001,6 +968,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); } +void i915_gem_context_release(struct kref *ref) +{ + struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); + + queue_work(ctx->i915->wq, &ctx->release_work); +} + static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) { @@ -1207,9 +1181,16 @@ static void context_close(struct i915_gem_context *ctx) set_closed_name(ctx); - vm = i915_gem_context_vm(ctx); - if (vm) + vm = ctx->vm; + if (vm) { + /* i915_vm_close drops the final reference, which is a bit too + * early and could result in surprises with concurrent + * operations racing with thist ctx close. Keep a full reference + * until the end. + */ + i915_vm_get(vm); i915_vm_close(vm); + } ctx->file_priv = ERR_PTR(-EBADF); @@ -1280,49 +1261,6 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) return 0; } -static inline struct i915_gem_engines * -__context_engines_await(const struct i915_gem_context *ctx, - bool *user_engines) -{ - struct i915_gem_engines *engines; - - rcu_read_lock(); - do { - engines = rcu_dereference(ctx->engines); - GEM_BUG_ON(!engines); - - if (user_engines) - *user_engines = i915_gem_context_user_engines(ctx); - - /* successful await => strong mb */ - if (unlikely(!i915_sw_fence_await(&engines->fence))) - continue; - - if (likely(engines == rcu_access_pointer(ctx->engines))) - break; - - i915_sw_fence_complete(&engines->fence); - } while (1); - rcu_read_unlock(); - - return engines; -} - -static void -context_apply_all(struct i915_gem_context *ctx, - void (*fn)(struct intel_context *ce, void *data), - void *data) -{ - struct i915_gem_engines_iter it; - struct i915_gem_engines *e; - struct intel_context *ce; - - e = __context_engines_await(ctx, NULL); - for_each_gem_engine(ce, e, it) - fn(ce, data); - i915_sw_fence_complete(&e->fence); -} - static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, const struct i915_gem_proto_context *pc) @@ -1342,6 +1280,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link); + INIT_WORK(&ctx->release_work, i915_gem_context_release_work); spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines); @@ -1361,7 +1300,7 @@ i915_gem_create_context(struct drm_i915_private *i915, vm = &ppgtt->vm; } if (vm) { - RCU_INIT_POINTER(ctx->vm, i915_vm_open(vm)); + ctx->vm = i915_vm_open(vm); /* i915_vm_open() takes a reference */ i915_vm_put(vm); @@ -1584,18 +1523,15 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, int err; u32 id; - if (!rcu_access_pointer(ctx->vm)) + if (!i915_gem_context_has_full_ppgtt(ctx)) return -ENODEV; - rcu_read_lock(); - vm = context_get_vm_rcu(ctx); - rcu_read_unlock(); - if (!vm) - return -ENODEV; + vm = ctx->vm; + GEM_BUG_ON(!vm); err = xa_alloc(&file_priv->vm_xa, &id, vm, xa_limit_32b, GFP_KERNEL); if (err) - goto err_put; + return err; i915_vm_open(vm); @@ -1603,8 +1539,6 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, args->value = id; args->size = 0; -err_put: - i915_vm_put(vm); return err; } @@ -1772,23 +1706,11 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } -static void __apply_priority(struct intel_context *ce, void *arg) -{ - struct i915_gem_context *ctx = arg; - - if (!intel_engine_has_timeslices(ce->engine)) - return; - - if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_semaphores(ce->engine)) - intel_context_set_use_semaphores(ce); - else - intel_context_clear_use_semaphores(ce); -} - static int set_priority(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) { + struct i915_gem_engines_iter it; + struct intel_context *ce; int err; err = validate_priority(ctx->i915, args); @@ -1796,7 +1718,18 @@ static int set_priority(struct i915_gem_context *ctx, return err; ctx->sched.priority = args->value; - context_apply_all(ctx, __apply_priority, ctx); + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_has_timeslices(ce->engine)) + continue; + + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) + intel_context_set_use_semaphores(ce); + else + intel_context_clear_use_semaphores(ce); + } + i915_gem_context_unlock_engines(ctx); return 0; } @@ -2127,6 +2060,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_gem_context_param *args = data; struct i915_gem_context *ctx; + struct i915_address_space *vm; int ret = 0; ctx = i915_gem_context_lookup(file_priv, args->ctx_id); @@ -2136,12 +2070,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, switch (args->param) { case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; - rcu_read_lock(); - if (rcu_access_pointer(ctx->vm)) - args->value = rcu_dereference(ctx->vm)->total; - else - args->value = to_i915(dev)->ggtt.vm.total; - rcu_read_unlock(); + vm = i915_gem_context_get_eb_vm(ctx); + args->value = vm->total; + i915_vm_put(vm); + break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 18060536b0c2..d3279086a5e7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -154,17 +154,22 @@ i915_gem_context_vm(struct i915_gem_context *ctx) return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); } +static inline bool i915_gem_context_has_full_ppgtt(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(!!ctx->vm != HAS_FULL_PPGTT(ctx->i915)); + + return !!ctx->vm; +} + static inline struct i915_address_space * -i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx) +i915_gem_context_get_eb_vm(struct i915_gem_context *ctx) { struct i915_address_space *vm; - rcu_read_lock(); - vm = rcu_dereference(ctx->vm); + vm = ctx->vm; if (!vm) vm = &ctx->i915->ggtt.vm; vm = i915_vm_get(vm); - rcu_read_unlock(); return vm; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..c4617e4d9fa9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -262,7 +262,7 @@ struct i915_gem_context { * In other modes, this is a NULL pointer with the expectation that * the caller uses the shared global GTT. */ - struct i915_address_space __rcu *vm; + struct i915_address_space *vm; /** * @pid: process id of creator @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref; + /** + * @release_work: + * + * Work item for deferred cleanup, since i915_gem_context_put() tends to + * be called from hardirq context. + * + * FIXME: The only real reason for this is &i915_gem_engines.fence, all + * other callers are from process context and need at most some mild + * shuffling to pull the i915_gem_context_put() call out of a spinlock. + */ + struct work_struct release_work; + /** * @rcu: rcu_head for deferred freeing. */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 23fee13a3384..1d341b8c47c0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -347,9 +347,6 @@ static int ext_set_placements(struct i915_user_extension __user *base, { struct drm_i915_gem_create_ext_memory_regions ext; - if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) - return -ENODEV; - if (copy_from_user(&ext, base, sizeof(ext))) return -EFAULT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 1aa249908b64..c2f74dba0557 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -733,7 +733,7 @@ static int eb_select_context(struct i915_execbuffer *eb) return PTR_ERR(ctx); eb->gem_context = ctx; - if (rcu_access_pointer(ctx->vm)) + if (i915_gem_context_has_full_ppgtt(ctx)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; return 0; @@ -759,11 +759,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb, /* Check that the context hasn't been closed in the meantime */ err = -EINTR; if (!mutex_lock_interruptible(&ctx->lut_mutex)) { - struct i915_address_space *vm = rcu_access_pointer(ctx->vm); - - if (unlikely(vm && vma->vm != vm)) - err = -EAGAIN; /* user racing with ctx set-vm */ - else if (likely(!i915_gem_context_is_closed(ctx))) + if (likely(!i915_gem_context_is_closed(ctx))) err = radix_tree_insert(&ctx->handles_vma, handle, vma); else err = -ENOENT; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 1cc8f0b453ae..2f672f06b169 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -426,6 +426,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, } static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + bool clear, struct ttm_resource *dst_mem, struct sg_table *dst_st) { @@ -444,13 +445,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return -EINVAL; dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); - if (!ttm || !ttm_tt_is_populated(ttm)) { + if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL; - if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return 0; - intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, dst_st->sgl, dst_level, @@ -484,6 +482,41 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return ret; } +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) +{ + int ret; + + ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st); + if (ret) { + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct intel_memory_region *dst_reg, *src_reg; + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_iomap io; + } _dst_iter, _src_iter; + struct ttm_kmap_iter *dst_iter, *src_iter; + + dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); + src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); + GEM_BUG_ON(!dst_reg || !src_reg); + + dst_iter = !cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg->region.start); + + src_iter = !cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj->ttm.cached_io_st, + src_reg->region.start); + + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); + } +} + static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_resource *dst_mem, @@ -492,19 +525,11 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); struct ttm_resource_manager *dst_man = ttm_manager_type(bo->bdev, dst_mem->mem_type); - struct intel_memory_region *dst_reg, *src_reg; - union { - struct ttm_kmap_iter_tt tt; - struct ttm_kmap_iter_iomap io; - } _dst_iter, _src_iter; - struct ttm_kmap_iter *dst_iter, *src_iter; + struct ttm_tt *ttm = bo->ttm; struct sg_table *dst_st; + bool clear; int ret; - dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); - src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); - GEM_BUG_ON(!dst_reg || !src_reg); - /* Sync for now. We could do the actual copy async. */ ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -521,9 +546,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, } /* Populate ttm with pages if needed. Typically system memory. */ - if (bo->ttm && (dst_man->use_tt || - (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { - ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, ttm, ctx); if (ret) return ret; } @@ -532,23 +556,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st); - ret = i915_ttm_accel_move(bo, dst_mem, dst_st); - if (ret) { - /* If we start mapping GGTT, we can no longer use man::use_tt here. */ - dst_iter = !cpu_maps_iomem(dst_mem) ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg->region.start); + clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) + __i915_ttm_move(bo, clear, dst_mem, dst_st); - src_iter = !cpu_maps_iomem(bo->resource) ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj->ttm.cached_io_st, - src_reg->region.start); - - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); - } - /* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index 5e6e8c91ab38..dbdbdc344d87 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gemfs.h" @@ -15,6 +14,7 @@ int i915_gemfs_init(struct drm_i915_private *i915) { struct file_system_type *type; struct vfsmount *gemfs; + char *opts; type = get_fs_type("tmpfs"); if (!type) @@ -26,10 +26,26 @@ int i915_gemfs_init(struct drm_i915_private *i915) * * One example, although it is probably better with a per-file * control, is selecting huge page allocations ("huge=within_size"). - * Currently unused due to bandwidth issues (slow reads) on Broadwell+. + * However, we only do so to offset the overhead of iommu lookups + * due to bandwidth issues (slow reads) on Broadwell+. */ - gemfs = kern_mount(type); + opts = NULL; + if (intel_vtd_active()) { + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + static char huge_opt[] = "huge=within_size"; /* r/w */ + + opts = huge_opt; + drm_info(&i915->drm, + "Transparent Hugepage mode '%s'\n", + opts); + } else { + drm_notice(&i915->drm, + "Transparent Hugepage support is recommended for optimal performance when IOMMU is enabled!\n"); + } + } + + gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, opts); if (IS_ERR(gemfs)) return PTR_ERR(gemfs); diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index a094f3ce1a90..0827634c842c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1456,7 +1456,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1512,13 +1512,14 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; struct intel_context *ce; struct i915_vma *vma; unsigned int flags = PIN_USER; unsigned int n; + bool should_swap; int err = 0; /* @@ -1567,23 +1568,38 @@ static int igt_shrink_thp(void *arg) break; } i915_gem_context_unlock_engines(ctx); + /* + * Nuke everything *before* we unpin the pages so we can be reasonably + * sure that when later checking get_nr_swap_pages() that some random + * leftover object doesn't steal the remaining swap space. + */ + i915_gem_shrink(NULL, i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); i915_vma_unpin(vma); if (err) goto out_put; /* - * Now that the pages are *unpinned* shrink-all should invoke - * shmem to truncate our pages. + * Now that the pages are *unpinned* shrinking should invoke + * shmem to truncate our pages, if we have available swap. */ - i915_gem_shrink_all(i915); - if (i915_gem_object_has_pages(obj)) { - pr_err("shrink-all didn't truncate the pages\n"); + should_swap = get_nr_swap_pages() > 0; + i915_gem_shrink(NULL, i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); + if (should_swap == i915_gem_object_has_pages(obj)) { + pr_err("unexpected pages mismatch, should_swap=%s\n", + yesno(should_swap)); err = -EINVAL; goto out_put; } - if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { - pr_err("residual page-size bits left\n"); + if (should_swap == (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys)) { + pr_err("unexpected residual page-size bits, should_swap=%s\n", + yesno(should_swap)); err = -EINVAL; goto out_put; } @@ -1688,11 +1704,9 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) goto out_file; } - mutex_lock(&ctx->mutex); - vm = i915_gem_context_vm(ctx); + vm = ctx->vm; if (vm) WRITE_ONCE(vm->scrub_64K, true); - mutex_unlock(&ctx->mutex); err = i915_subtests(tests, ctx); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 8eb5050f8cb3..b32f7fed2d9c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -27,12 +27,6 @@ #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) -static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx) -{ - /* single threaded, private ctx */ - return rcu_dereference_protected(ctx->vm, true); -} - static int live_nop_switch(void *arg) { const unsigned int nctx = 1024; @@ -94,7 +88,7 @@ static int live_nop_switch(void *arg) rq = i915_request_get(this); i915_request_add(this); } - if (i915_request_wait(rq, 0, HZ / 5) < 0) { + if (i915_request_wait(rq, 0, HZ) < 0) { pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); i915_request_put(rq); @@ -704,7 +698,7 @@ static int igt_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!rcu_access_pointer(ctx->vm)), + yesno(i915_gem_context_has_full_ppgtt(ctx)), err); intel_context_put(ce); kernel_context_close(ctx); @@ -813,7 +807,7 @@ static int igt_shared_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915, ctx_vm(parent)); + ctx = kernel_context(i915, parent->vm); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_test; @@ -823,7 +817,7 @@ static int igt_shared_ctx_exec(void *arg) GEM_BUG_ON(IS_ERR(ce)); if (!obj) { - obj = create_test_object(ctx_vm(parent), + obj = create_test_object(parent->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -838,7 +832,7 @@ static int igt_shared_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!rcu_access_pointer(ctx->vm)), + yesno(i915_gem_context_has_full_ppgtt(ctx)), err); intel_context_put(ce); kernel_context_close(ctx); @@ -1380,7 +1374,7 @@ static int igt_ctx_readonly(void *arg) goto out_file; } - vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm; + vm = ctx->vm ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { err = 0; goto out_file; @@ -1417,7 +1411,7 @@ static int igt_ctx_readonly(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), ce->engine->name, - yesno(!!ctx_vm(ctx)), + yesno(i915_gem_context_has_full_ppgtt(ctx)), err); i915_gem_context_unlock_engines(ctx); goto out_file; @@ -1499,7 +1493,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); - err = check_scratch(ctx_vm(ctx), offset); + err = check_scratch(ctx->vm, offset); if (err) return err; @@ -1528,7 +1522,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, intel_gt_chipset_flush(engine->gt); - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -1596,7 +1590,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); - err = check_scratch(ctx_vm(ctx), offset); + err = check_scratch(ctx->vm, offset); if (err) return err; @@ -1607,7 +1601,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (GRAPHICS_VER(i915) >= 8) { const u32 GPR0 = engine->mmio_base + 0x600; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -1739,7 +1733,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) u32 *vaddr; int err = 0; - vm = ctx_vm(ctx); + vm = ctx->vm; if (!vm) return -ENODEV; @@ -1801,7 +1795,7 @@ static int igt_vm_isolation(void *arg) } /* We can only test vm isolation, if the vm are distinct */ - if (ctx_vm(ctx_a) == ctx_vm(ctx_b)) + if (ctx_a->vm == ctx_b->vm) goto out_file; /* Read the initial state of the scratch page */ @@ -1813,8 +1807,8 @@ static int igt_vm_isolation(void *arg) if (err) goto out_file; - vm_total = ctx_vm(ctx_a)->total; - GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total); + vm_total = ctx_a->vm->total; + GEM_BUG_ON(ctx_b->vm->total != vm_total); count = 0; num_engines = 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c deleted file mode 100644 index 16162fc2782d..000000000000 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ /dev/null @@ -1,190 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2020 Intel Corporation - */ - -#include "i915_selftest.h" - -#include "gt/intel_engine_pm.h" -#include "selftests/igt_flush_test.h" - -static u64 read_reloc(const u32 *map, int x, const u64 mask) -{ - u64 reloc; - - memcpy(&reloc, &map[x], sizeof(reloc)); - return reloc & mask; -} - -static int __igt_gpu_reloc(struct i915_execbuffer *eb, - struct drm_i915_gem_object *obj) -{ - const unsigned int offsets[] = { 8, 3, 0 }; - const u64 mask = - GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0); - const u32 *map = page_mask_bits(obj->mm.mapping); - struct i915_request *rq; - struct i915_vma *vma; - int err; - int i; - - vma = i915_vma_instance(obj, eb->context->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_gem_object_lock(obj, &eb->ww); - if (err) - return err; - - err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH); - if (err) - return err; - - /* 8-Byte aligned */ - err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); - if (err <= 0) - goto reloc_err; - - /* !8-Byte aligned */ - err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); - if (err <= 0) - goto reloc_err; - - /* Skip to the end of the cmd page */ - i = PAGE_SIZE / sizeof(u32) - 1; - i -= eb->reloc_cache.rq_size; - memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size, - MI_NOOP, i); - eb->reloc_cache.rq_size += i; - - /* Force next batch */ - err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); - if (err <= 0) - goto reloc_err; - - GEM_BUG_ON(!eb->reloc_cache.rq); - rq = i915_request_get(eb->reloc_cache.rq); - reloc_gpu_flush(eb, &eb->reloc_cache); - GEM_BUG_ON(eb->reloc_cache.rq); - - err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2); - if (err) { - intel_gt_set_wedged(eb->engine->gt); - goto put_rq; - } - - if (!i915_request_completed(rq)) { - pr_err("%s: did not wait for relocations!\n", eb->engine->name); - err = -EINVAL; - goto put_rq; - } - - for (i = 0; i < ARRAY_SIZE(offsets); i++) { - u64 reloc = read_reloc(map, offsets[i], mask); - - if (reloc != i) { - pr_err("%s[%d]: map[%d] %llx != %x\n", - eb->engine->name, i, offsets[i], reloc, i); - err = -EINVAL; - } - } - if (err) - igt_hexdump(map, 4096); - -put_rq: - i915_request_put(rq); -unpin_vma: - i915_vma_unpin(vma); - return err; - -reloc_err: - if (!err) - err = -EIO; - goto unpin_vma; -} - -static int igt_gpu_reloc(void *arg) -{ - struct i915_execbuffer eb; - struct drm_i915_gem_object *scratch; - int err = 0; - u32 *map; - - eb.i915 = arg; - - scratch = i915_gem_object_create_internal(eb.i915, 4096); - if (IS_ERR(scratch)) - return PTR_ERR(scratch); - - map = i915_gem_object_pin_map_unlocked(scratch, I915_MAP_WC); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto err_scratch; - } - - intel_gt_pm_get(&eb.i915->gt); - - for_each_uabi_engine(eb.engine, eb.i915) { - if (intel_engine_requires_cmd_parser(eb.engine) || - intel_engine_using_cmd_parser(eb.engine)) - continue; - - reloc_cache_init(&eb.reloc_cache, eb.i915); - memset(map, POISON_INUSE, 4096); - - intel_engine_pm_get(eb.engine); - eb.context = intel_context_create(eb.engine); - if (IS_ERR(eb.context)) { - err = PTR_ERR(eb.context); - goto err_pm; - } - eb.reloc_pool = NULL; - eb.reloc_context = NULL; - - i915_gem_ww_ctx_init(&eb.ww, false); -retry: - err = intel_context_pin_ww(eb.context, &eb.ww); - if (!err) { - err = __igt_gpu_reloc(&eb, scratch); - - intel_context_unpin(eb.context); - } - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&eb.ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&eb.ww); - - if (eb.reloc_pool) - intel_gt_buffer_pool_put(eb.reloc_pool); - if (eb.reloc_context) - intel_context_put(eb.reloc_context); - - intel_context_put(eb.context); -err_pm: - intel_engine_pm_put(eb.engine); - if (err) - break; - } - - if (igt_flush_test(eb.i915)) - err = -EIO; - - intel_gt_pm_put(&eb.i915->gt); -err_scratch: - i915_gem_object_put(scratch); - return err; -} - -int i915_gem_execbuffer_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_gpu_reloc), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index fee070df1c97..067d68a6fe4c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -23,6 +23,7 @@ mock_context(struct drm_i915_private *i915, kref_init(&ctx->ref); INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + INIT_WORK(&ctx->release_work, i915_gem_context_release_work); mutex_init(&ctx->mutex); diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index d6f5836396f8..f6733f279890 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -309,13 +309,11 @@ static int frequency_show(struct seq_file *m, void *unused) int max_freq; rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); - if (IS_GEN9_LP(i915)) { - rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); + if (IS_GEN9_LP(i915)) gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); - } else { - rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + else gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); - } /* RPSTAT1 is in the GT power well */ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 6e0e52eeb87a..6a5af995f5b1 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -548,6 +548,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, I915_GTT_PAGE_SIZE_2M)))) { vaddr = px_vaddr(pd); vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; + clflush_cache_range(vaddr, PAGE_SIZE); page_size = I915_GTT_PAGE_SIZE_64K; /* @@ -568,6 +569,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, for (i = 1; i < index; i += 16) memset64(vaddr + i, encode, 15); + clflush_cache_range(vaddr, PAGE_SIZE); } } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 17ca4dc4d0cb..c35c151f652f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -395,19 +395,18 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) spin_lock_init(&ce->guc_state.lock); INIT_LIST_HEAD(&ce->guc_state.fences); + INIT_LIST_HEAD(&ce->guc_state.requests); - spin_lock_init(&ce->guc_active.lock); - INIT_LIST_HEAD(&ce->guc_active.requests); - - ce->guc_id = GUC_INVALID_LRC_ID; - INIT_LIST_HEAD(&ce->guc_id_link); + ce->guc_id.id = GUC_INVALID_LRC_ID; + INIT_LIST_HEAD(&ce->guc_id.link); /* * Initialize fence to be complete as this is expected to be complete * unless there is a pending schedule disable outstanding. */ - i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify); - i915_sw_fence_commit(&ce->guc_blocked); + i915_sw_fence_init(&ce->guc_state.blocked, + sw_fence_dummy_notify); + i915_sw_fence_commit(&ce->guc_state.blocked); i915_active_init(&ce->active, __intel_context_active, __intel_context_retire, 0); @@ -421,7 +420,6 @@ void intel_context_fini(struct intel_context *ce) mutex_destroy(&ce->pin_mutex); i915_active_fini(&ce->active); - i915_sw_fence_fini(&ce->guc_blocked); } void i915_context_module_exit(void) @@ -522,15 +520,15 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce) GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); - spin_lock_irqsave(&ce->guc_active.lock, flags); - list_for_each_entry_reverse(rq, &ce->guc_active.requests, + spin_lock_irqsave(&ce->guc_state.lock, flags); + list_for_each_entry_reverse(rq, &ce->guc_state.requests, sched.link) { if (i915_request_completed(rq)) break; active = rq; } - spin_unlock_irqrestore(&ce->guc_active.lock, flags); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2..930569a1a01f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -112,6 +112,7 @@ struct intel_context { #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 #define CONTEXT_NOPREEMPT 8 #define CONTEXT_LRCA_DIRTY 9 +#define CONTEXT_GUC_INIT 10 struct { u64 timeout_us; @@ -155,49 +156,72 @@ struct intel_context { u8 wa_bb_page; /* if set, page num reserved for context workarounds */ struct { - /** lock: protects everything in guc_state */ + /** @lock: protects everything in guc_state */ spinlock_t lock; /** - * sched_state: scheduling state of this context using GuC + * @sched_state: scheduling state of this context using GuC * submission */ - u16 sched_state; + u32 sched_state; /* - * fences: maintains of list of requests that have a submit - * fence related to GuC submission + * @fences: maintains a list of requests that are currently + * being fenced until a GuC operation completes */ struct list_head fences; + /** + * @blocked: fence used to signal when the blocking of a + * context's submissions is complete. + */ + struct i915_sw_fence blocked; + /** @number_committed_requests: number of committed requests */ + int number_committed_requests; + /** @requests: list of active requests on this context */ + struct list_head requests; + /** @prio: the context's current guc priority */ + u8 prio; + /** + * @prio_count: a counter of the number requests in flight in + * each priority bucket + */ + u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; } guc_state; struct { - /** lock: protects everything in guc_active */ - spinlock_t lock; - /** requests: active requests on this context */ - struct list_head requests; - } guc_active; + /** + * @id: handle which is used to uniquely identify this context + * with the GuC, protected by guc->contexts_lock + */ + u16 id; + /** + * @ref: the number of references to the guc_id, when + * transitioning in and out of zero protected by + * guc->contexts_lock + */ + atomic_t ref; + /** + * @link: in guc->guc_id_list when the guc_id has no refs but is + * still valid, protected by guc->contexts_lock + */ + struct list_head link; + } guc_id; - /* GuC scheduling state flags that do not require a lock. */ - atomic_t guc_sched_state_no_lock; - - /* GuC LRC descriptor ID */ - u16 guc_id; - - /* GuC LRC descriptor reference count */ - atomic_t guc_id_ref; - - /* - * GuC ID link - in list when unpinned but guc_id still valid in GuC +#ifdef CONFIG_DRM_I915_SELFTEST + /** + * @drop_schedule_enable: Force drop of schedule enable G2H for selftest */ - struct list_head guc_id_link; + bool drop_schedule_enable; - /* GuC context blocked fence */ - struct i915_sw_fence guc_blocked; - - /* - * GuC priority management + /** + * @drop_schedule_disable: Force drop of schedule disable G2H for + * selftest */ - u8 guc_prio; - u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; + bool drop_schedule_disable; + + /** + * @drop_deregister: Force drop of deregister G2H for selftest + */ + bool drop_deregister; +#endif }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0d9105a31d84..332efea696a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1163,16 +1163,16 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, u32 mmio_base = engine->mmio_base; int slice; int subslice; + int iter; memset(instdone, 0, sizeof(*instdone)); - switch (GRAPHICS_VER(i915)) { - default: + if (GRAPHICS_VER(i915) >= 8) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id != RCS0) - break; + return; instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); @@ -1182,21 +1182,39 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, instdone->slice_common_extra[1] = intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); } - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { - instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); - instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_ROW_INSTDONE); + } + } else { + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_ROW_INSTDONE); + } } - break; - case 7: + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) + instdone->geom_svg[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + XEHPG_INSTDONE_GEOM_SVG); + } + } else if (GRAPHICS_VER(i915) >= 7) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id != RCS0) - break; + return; instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); @@ -1204,22 +1222,15 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE); instdone->row[0][0] = intel_uncore_read(uncore, GEN7_ROW_INSTDONE); - - break; - case 6: - case 5: - case 4: + } else if (GRAPHICS_VER(i915) >= 4) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id == RCS0) /* HACK: Using the wrong struct member */ instdone->slice_common = intel_uncore_read(uncore, GEN4_INSTDONE1); - break; - case 3: - case 2: + } else { instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE); - break; } } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index ed91bcff20eb..bfbfe53c23dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -67,8 +67,11 @@ struct intel_instdone { /* The following exist only in the RCS engine */ u32 slice_common; u32 slice_common_extra[2]; - u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; - u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; + u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; + u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; + + /* Added in XeHPG */ + u32 geom_svg[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; }; /* @@ -578,4 +581,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ (instdone_has_subslice(dev_priv_, sseu_, slice_, \ subslice_))) + +#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ + for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ + (iter_) < GEN_MAX_SUBSLICES; \ + (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ + (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ + for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) + #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index de5f9c86b9a4..87c595e4efe2 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2140,10 +2140,6 @@ static void __execlists_unhold(struct i915_request *rq) if (p->flags & I915_DEPENDENCY_WEAK) continue; - /* Propagate any change in error status */ - if (rq->fence.error) - i915_request_set_error_once(w, rq->fence.error); - if (w->engine != rq->engine) continue; @@ -2565,7 +2561,7 @@ __execlists_context_pre_pin(struct intel_context *ce, if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) { lrc_init_state(ce, engine, *vaddr); - __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); + __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); } return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index de3ac58fceec..8d71f67926f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -727,7 +727,6 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) atomic_set(&ggtt->vm.open, 0); - rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ flush_workqueue(ggtt->vm.i915->wq); mutex_lock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 62d40c986642..2aeaae036a6f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -682,6 +682,8 @@ int intel_gt_init(struct intel_gt *gt) goto err_pm; } + set_mocs_index(gt); + err = intel_engines_init(gt); if (err) goto err_engines; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a81e21bf1bd1..6fdcde64c180 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -192,6 +192,10 @@ struct intel_gt { unsigned long mslice_mask; } info; + + struct { + u8 uc_index; + } mocs; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index e137dd32b5b8..a0c2b952aa57 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -155,7 +155,7 @@ void i915_vm_resv_release(struct kref *kref) static void __i915_vm_release(struct work_struct *work) { struct i915_address_space *vm = - container_of(work, struct i915_address_space, rcu.work); + container_of(work, struct i915_address_space, release_work); vm->cleanup(vm); i915_address_space_fini(vm); @@ -171,7 +171,7 @@ void i915_vm_release(struct kref *kref) GEM_BUG_ON(i915_is_ggtt(vm)); trace_i915_ppgtt_release(vm); - queue_rcu_work(vm->i915->wq, &vm->rcu); + queue_work(vm->i915->wq, &vm->release_work); } void i915_address_space_init(struct i915_address_space *vm, int subclass) @@ -185,7 +185,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->resv_ref)) kref_init(&vm->resv_ref); - INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + INIT_WORK(&vm->release_work, __i915_vm_release); atomic_set(&vm->open, 1); /* diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index bc7153018ebd..5b539bd7645d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -213,7 +213,7 @@ struct i915_vma_ops { struct i915_address_space { struct kref ref; - struct rcu_work rcu; + struct work_struct release_work; struct drm_mm mm; struct intel_gt *gt; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bb4af4977920..6ba8daea2f56 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -226,6 +226,40 @@ static const u8 gen12_xcs_offsets[] = { END }; +static const u8 dg2_xcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + static const u8 gen8_rcs_offsets[] = { NOP(1), LRI(14, POSTED), @@ -525,6 +559,49 @@ static const u8 xehp_rcs_offsets[] = { END }; +static const u8 dg2_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + #undef END #undef REG16 #undef REG @@ -543,7 +620,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) !intel_engine_has_relative_mmio(engine)); if (engine->class == RENDER_CLASS) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + return dg2_rcs_offsets; + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) return xehp_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_rcs_offsets; @@ -554,7 +633,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) else return gen8_rcs_offsets; } else { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + return dg2_xcs_offsets; + else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_xcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 9) return gen9_xcs_offsets; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 582c4423b95d..e4b97cd14cf9 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -22,6 +22,8 @@ struct drm_i915_mocs_table { unsigned int size; unsigned int n_entries; const struct drm_i915_mocs_entry *table; + u8 uc_index; + u8 unused_entries_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -40,6 +42,8 @@ struct drm_i915_mocs_table { #define L3_ESC(value) ((value) << 0) #define L3_SCC(value) ((value) << 1) #define _L3_CACHEABILITY(value) ((value) << 4) +#define L3_GLBGO(value) ((value) << 6) +#define L3_LKUP(value) ((value) << 7) /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ @@ -88,18 +92,25 @@ struct drm_i915_mocs_table { * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For Gen < 12 - * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for - * PTE and will be initialized to an invalid value. + * they will be initialized to PTE. Gen >= 12 don't have a setting for + * PTE and those platforms except TGL/RKL will be initialized L3 WB to + * catch accidental use of reserved and unused mocs indexes. * * The last few entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older * platforms they should never be written to. * - * NOTE: These tables are part of bspec and defined as part of hardware + * NOTE1: These tables are part of bspec and defined as part of hardware * interface for ICL+. For older platforms, they are part of kernel * ABI. It is expected that, for specific hardware platform, existing * entries will remain constant and the table will only be updated by * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS + * indices have been set to L3 WB. These reserved entries should never + * be used, they may be changed to low performant variants with better + * coherency in the future if more entries are needed. + * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. */ #define GEN9_MOCS_ENTRIES \ MOCS_ENTRY(I915_MOCS_UNCACHED, \ @@ -282,17 +293,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* Error */ - MOCS_ENTRY(0, 0, L3_0_DIRECT), /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), - - /* Reserved */ - MOCS_ENTRY(2, 0, L3_0_DIRECT), - MOCS_ENTRY(3, 0, L3_0_DIRECT), - MOCS_ENTRY(4, 0, L3_0_DIRECT), - /* WB - L3 */ MOCS_ENTRY(5, 0, L3_3_WB), /* WB - L3 50% */ @@ -314,6 +317,83 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = { MOCS_ENTRY(63, 0, L3_1_UC), }; +static const struct drm_i915_mocs_entry gen12_mocs_table[] = { + GEN11_MOCS_ENTRIES, + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + +static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = { + /* wa_1608975824 */ + MOCS_ENTRY(0, 0, L3_3_WB | L3_LKUP(1)), + + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(3, 0, L3_1_UC | L3_GLBGO(1)), + /* UC - Non-Coherent; GO:L3 */ + MOCS_ENTRY(4, 0, L3_1_UC), + + /* WB */ + MOCS_ENTRY(5, 0, L3_3_WB | L3_LKUP(1)), + + /* HW Reserved - SW program but never use. */ + MOCS_ENTRY(48, 0, L3_3_WB | L3_LKUP(1)), + MOCS_ENTRY(49, 0, L3_1_UC | L3_LKUP(1)), + MOCS_ENTRY(60, 0, L3_1_UC), + MOCS_ENTRY(61, 0, L3_1_UC), + MOCS_ENTRY(62, 0, L3_1_UC), + MOCS_ENTRY(63, 0, L3_1_UC), +}; + +static const struct drm_i915_mocs_entry dg2_mocs_table[] = { + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + +static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { + /* Wa_14011441408: Set Go to Memory for MOCS#0 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -340,14 +420,45 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, { unsigned int flags; - if (IS_DG1(i915)) { + memset(table, 0, sizeof(struct drm_i915_mocs_table)); + + table->unused_entries_index = I915_MOCS_PTE; + if (IS_DG2(i915)) { + if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) { + table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); + table->table = dg2_mocs_table_g10_ax; + } else { + table->size = ARRAY_SIZE(dg2_mocs_table); + table->table = dg2_mocs_table; + } + table->uc_index = 1; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->unused_entries_index = 3; + } else if (IS_XEHPSDV(i915)) { + table->size = ARRAY_SIZE(xehpsdv_mocs_table); + table->table = xehpsdv_mocs_table; + table->uc_index = 2; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->unused_entries_index = 5; + } else if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; + table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; - } else if (GRAPHICS_VER(i915) >= 12) { + table->uc_index = 1; + table->unused_entries_index = 5; + } else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) { + /* For TGL/RKL, Can't be changed now for ABI reasons */ table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; + } else if (GRAPHICS_VER(i915) >= 12) { + table->size = ARRAY_SIZE(gen12_mocs_table); + table->table = gen12_mocs_table; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; + table->unused_entries_index = 2; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -393,16 +504,16 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, } /* - * Get control_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].control_value; - - return table->table[I915_MOCS_PTE].control_value; + return table->table[table->unused_entries_index].control_value; } #define for_each_mocs(mocs, t, i) \ @@ -417,6 +528,8 @@ static void __init_mocs_table(struct intel_uncore *uncore, unsigned int i; u32 mocs; + drm_WARN_ONCE(&uncore->i915->drm, !table->unused_entries_index, + "Unused entries index should have been defined\n"); for_each_mocs(mocs, table, i) intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs); } @@ -443,16 +556,16 @@ static void init_mocs_table(struct intel_engine_cs *engine, } /* - * Get l3cc_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get l3cc_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is not zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].l3cc_value; - - return table->table[I915_MOCS_PTE].l3cc_value; + return table->table[table->unused_entries_index].l3cc_value; } static u32 l3cc_combine(u16 low, u16 high) @@ -468,10 +581,9 @@ static u32 l3cc_combine(u16 low, u16 high) 0; \ i++) -static void init_l3cc_table(struct intel_engine_cs *engine, +static void init_l3cc_table(struct intel_uncore *uncore, const struct drm_i915_mocs_table *table) { - struct intel_uncore *uncore = engine->uncore; unsigned int i; u32 l3cc; @@ -496,7 +608,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_mocs_table(engine, &table); if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) - init_l3cc_table(engine, &table); + init_l3cc_table(engine->uncore, &table); } static u32 global_mocs_offset(void) @@ -504,6 +616,14 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } +void set_mocs_index(struct intel_gt *gt) +{ + struct drm_i915_mocs_table table; + + get_mocs_settings(gt->i915, &table); + gt->mocs.uc_index = table.uc_index; +} + void intel_mocs_init(struct intel_gt *gt) { struct drm_i915_mocs_table table; @@ -515,6 +635,14 @@ void intel_mocs_init(struct intel_gt *gt) flags = get_mocs_settings(gt->i915, &table); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt->uncore, &table, global_mocs_offset()); + + /* + * Initialize the L3CC table as part of mocs initalization to make + * sure the LNCFCMOCSx registers are programmed for the subsequent + * memory transactions including guc transactions + */ + if (flags & HAS_RENDER_L3CC) + init_l3cc_table(gt->uncore, &table); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index d83274f5163b..8a09d64b115f 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -36,5 +36,6 @@ struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); +void set_mocs_index(struct intel_gt *gt); #endif diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 2958e2fae380..3c65efcb7bed 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1265,7 +1265,7 @@ static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine) int size, err; if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS) - return 0; + return NULL; err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); if (err < 0) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 0a03fbed9f9b..172de6c9f949 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -994,20 +994,16 @@ int intel_rps_set(struct intel_rps *rps, u8 val) static void gen6_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rp_state_cap = intel_rps_read_state_cap(rps); /* All of these values are in units of 50MHz */ /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(i915)) { - u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 16) & 0xff; rps->rp1_freq = (rp_state_cap >> 8) & 0xff; rps->min_freq = (rp_state_cap >> 0) & 0xff; } else { - u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 0) & 0xff; rps->rp1_freq = (rp_state_cap >> 8) & 0xff; rps->min_freq = (rp_state_cap >> 16) & 0xff; @@ -2144,6 +2140,19 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) return set_min_freq(rps, val); } +u32 intel_rps_read_state_cap(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + if (IS_XEHPSDV(i915)) + return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); + else if (IS_GEN9_LP(i915)) + return intel_uncore_read(uncore, BXT_RP_STATE_CAP); + else + return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 4213bcce1667..11960d64ca82 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -41,6 +41,7 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); +u32 intel_rps_read_state_cap(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index bbd272943c3f..b0e09b58005e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -46,11 +46,11 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) } void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u32 ss_mask) + u8 *subslice_mask, u32 ss_mask) { int offset = slice * sseu->ss_stride; - memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); + memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); } unsigned int @@ -100,14 +100,24 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } -static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, - u8 s_en, u32 ss_en, u16 eu_en) +static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) +{ + u32 ss_mask; + + ss_mask = ss_en >> (s * sseu->max_subslices); + ss_mask &= GENMASK(sseu->max_subslices - 1, 0); + + return ss_mask; +} + +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, + u32 g_ss_en, u32 c_ss_en, u16 eu_en) { int s, ss; - /* ss_en represents entire subslice mask across all slices */ + /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > - sizeof(ss_en) * BITS_PER_BYTE); + sizeof(g_ss_en) * BITS_PER_BYTE); for (s = 0; s < sseu->max_slices; s++) { if ((s_en & BIT(s)) == 0) @@ -115,7 +125,22 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, sseu->slice_mask |= BIT(s); - intel_sseu_set_subslices(sseu, s, ss_en); + /* + * XeHP introduces the concept of compute vs geometry DSS. To + * reduce variation between GENs around subslice usage, store a + * mask for both the geometry and compute enabled masks since + * userspace will need to be able to query these masks + * independently. Also compute a total enabled subslice count + * for the purposes of selecting subslices to use in a + * particular GEM context. + */ + intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, + get_ss_stride_mask(sseu, s, c_ss_en)); + intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, + get_ss_stride_mask(sseu, s, g_ss_en)); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + get_ss_stride_mask(sseu, s, + g_ss_en | c_ss_en)); for (ss = 0; ss < sseu->max_subslices; ss++) if (intel_sseu_has_subslice(sseu, s, ss)) @@ -129,7 +154,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; - u32 dss_en; + u32 g_dss_en, c_dss_en = 0; u16 eu_en = 0; u8 eu_en_fuse; u8 s_en; @@ -160,7 +185,9 @@ static void gen12_sseu_info_init(struct intel_gt *gt) s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; - dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); + g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); /* one bit per pair of EUs */ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) @@ -173,7 +200,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -199,7 +226,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt) eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -240,7 +267,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) sseu_set_eus(sseu, 0, 1, ~disabled_mask); } - intel_sseu_set_subslices(sseu, 0, subslice_mask); + intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); sseu->eu_total = compute_eu_total(sseu); @@ -296,7 +323,8 @@ static void gen9_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { @@ -408,7 +436,8 @@ static void bdw_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { u8 eu_disabled_mask; @@ -506,7 +535,8 @@ static void hsw_sseu_info_init(struct intel_gt *gt) sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 22fef98887c0..60882a74741e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -26,9 +26,14 @@ struct drm_printer; #define GEN_DSS_PER_CSLICE 8 #define GEN_DSS_PER_MSLICE 8 +#define GEN_MAX_GSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE) +#define GEN_MAX_CSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE) + struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; @@ -78,6 +83,10 @@ intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice, u8 mask; int ss_idx = subslice / BITS_PER_BYTE; + if (slice >= sseu->max_slices || + subslice >= sseu->max_subslices) + return false; + GEM_BUG_ON(ss_idx >= sseu->ss_stride); mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx]; @@ -97,7 +106,7 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u32 ss_mask); + u8 *subslice_mask, u32 ss_mask); void intel_sseu_info_init(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index aae609d7d85d..c314d4917b6b 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -644,6 +644,72 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); } +static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + /* + * This is a "fake" workaround defined by software to ensure we + * maintain reliable, backward-compatible behavior for userspace with + * regards to how nested MI_BATCH_BUFFER_START commands are handled. + * + * The per-context setting of MI_MODE[12] determines whether the bits + * of a nested MI_BATCH_BUFFER_START instruction should be interpreted + * in the traditional manner or whether they should instead use a new + * tgl+ meaning that breaks backward compatibility, but allows nesting + * into 3rd-level batchbuffers. When this new capability was first + * added in TGL, it remained off by default unless a context + * intentionally opted in to the new behavior. However Xe_HPG now + * flips this on by default and requires that we explicitly opt out if + * we don't want the new behavior. + * + * From a SW perspective, we want to maintain the backward-compatible + * behavior for userspace, so we'll apply a fake workaround to set it + * back to the legacy behavior on platforms where the hardware default + * is to break compatibility. At the moment there is no Linux + * userspace that utilizes third-level batchbuffers, so this will avoid + * userspace from needing to make any changes. using the legacy + * meaning is the correct thing to do. If/when we have userspace + * consumers that want to utilize third-level batch nesting, we can + * provide a context parameter to allow them to opt-in. + */ + wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN); +} + +static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + u8 mocs; + + /* + * Some blitter commands do not have a field for MOCS, those + * commands will use MOCS index pointed by BLIT_CCTL. + * BLIT_CCTL registers are needed to be programmed to un-cached. + */ + if (engine->class == COPY_ENGINE_CLASS) { + mocs = engine->gt->mocs.uc_index; + wa_write_clr_set(wal, + BLIT_CCTL(engine->mmio_base), + BLIT_CCTL_MASK, + BLIT_CCTL_MOCS(mocs, mocs)); + } +} + +/* + * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround + * defined by the hardware team, but it programming general context registers. + * Adding those context register programming in context workaround + * allow us to use the wa framework for proper application and validation. + */ +static void +gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + fakewa_disable_nestedbb_mode(engine, wal); + + gen12_ctx_gt_mocs_init(engine, wal); +} + static void __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, struct i915_wa_list *wal, @@ -651,11 +717,19 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, { struct drm_i915_private *i915 = engine->i915; - if (engine->class != RENDER_CLASS) - return; - wa_init_start(wal, name, engine->name); + /* Applies to all engines */ + /* + * Fake workarounds are not the actual workaround but + * programming of context registers using workaround framework. + */ + if (GRAPHICS_VER(i915) >= 12) + gen12_ctx_gt_fake_wa_init(engine, wal); + + if (engine->class != RENDER_CLASS) + goto done; + if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 12) @@ -685,6 +759,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, else MISSING_CASE(GRAPHICS_VER(i915)); +done: wa_init_finish(wal); } @@ -1604,6 +1679,31 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_NOPID(base))); } +/* + * engine_fake_wa_init(), a place holder to program the registers + * which are not part of an official workaround defined by the + * hardware team. + * Adding programming of those register inside workaround will + * allow utilizing wa framework to proper application and verification. + */ +static void +engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + u8 mocs; + + /* + * RING_CMD_CCTL are need to be programed to un-cached + * for memory writes and reads outputted by Command + * Streamers on Gen12 onward platforms. + */ + if (GRAPHICS_VER(engine->i915) >= 12) { + mocs = engine->gt->mocs.uc_index; + wa_masked_field_set(wal, + RING_CMD_CCTL(engine->mmio_base), + CMD_CCTL_MOCS_MASK, + CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); + } +} static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -2044,6 +2144,8 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) return; + engine_fake_wa_init(engine, wal); + if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else @@ -2067,12 +2169,7 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(engine->gt, &engine->wa_list); } -struct mcr_range { - u32 start; - u32 end; -}; - -static const struct mcr_range mcr_ranges_gen8[] = { +static const struct i915_range mcr_ranges_gen8[] = { { .start = 0x5500, .end = 0x55ff }, { .start = 0x7000, .end = 0x7fff }, { .start = 0x9400, .end = 0x97ff }, @@ -2081,7 +2178,7 @@ static const struct mcr_range mcr_ranges_gen8[] = { {}, }; -static const struct mcr_range mcr_ranges_gen12[] = { +static const struct i915_range mcr_ranges_gen12[] = { { .start = 0x8150, .end = 0x815f }, { .start = 0x9520, .end = 0x955f }, { .start = 0xb100, .end = 0xb3ff }, @@ -2090,7 +2187,7 @@ static const struct mcr_range mcr_ranges_gen12[] = { {}, }; -static const struct mcr_range mcr_ranges_xehp[] = { +static const struct i915_range mcr_ranges_xehp[] = { { .start = 0x4000, .end = 0x4aff }, { .start = 0x5200, .end = 0x52ff }, { .start = 0x5400, .end = 0x7fff }, @@ -2109,7 +2206,7 @@ static const struct mcr_range mcr_ranges_xehp[] = { static bool mcr_range(struct drm_i915_private *i915, u32 offset) { - const struct mcr_range *mcr_ranges; + const struct i915_range *mcr_ranges; int i; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index f12ffe797639..b3863abc51f5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -3493,7 +3493,7 @@ static int smoke_submit(struct preempt_smoke *smoke, if (batch) { struct i915_address_space *vm; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(batch, vm, NULL); i915_vm_put(vm); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 2c1ed32ca5ac..7e6fdabac599 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -117,7 +117,7 @@ static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct intel_gt *gt = h->gt; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(h->ctx); struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; @@ -789,7 +789,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (err) pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", engine->name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id, err); + rq->fence.seqno, rq->context->guc_id.id, err); } skip: @@ -1098,7 +1098,7 @@ static int __igt_reset_engines(struct intel_gt *gt, if (err) pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", engine->name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id, err); + rq->fence.seqno, rq->context->guc_id.id, err); } count++; @@ -1108,7 +1108,7 @@ static int __igt_reset_engines(struct intel_gt *gt, pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n", engine->name, test_name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id); + rq->fence.seqno, rq->context->guc_id.id); i915_request_put(rq); GEM_TRACE_DUMP(); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2e27fe59786b..5dd174babf7a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -22,74 +22,121 @@ struct __guc_ads_blob; -/* - * Top level structure of GuC. It handles firmware loading and manages client - * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList - * submission. +/** + * struct intel_guc - Top level structure of GuC. + * + * It handles firmware loading and manages client pool. intel_guc owns an + * i915_sched_engine for submission. */ struct intel_guc { + /** @fw: the GuC firmware */ struct intel_uc_fw fw; + /** @log: sub-structure containing GuC log related data and objects */ struct intel_guc_log log; + /** @ct: the command transport communication channel */ struct intel_guc_ct ct; + /** @slpc: sub-structure containing SLPC related data and objects */ struct intel_guc_slpc slpc; - /* Global engine used to submit requests to GuC */ + /** @sched_engine: Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; + /** + * @stalled_request: if GuC can't process a request for any reason, we + * save it until GuC restarts processing. No other request can be + * submitted until the stalled request is processed. + */ struct i915_request *stalled_request; /* intel_guc_recv interrupt related state */ + /** @irq_lock: protects GuC irq state */ spinlock_t irq_lock; + /** + * @msg_enabled_mask: mask of events that are processed when receiving + * an INTEL_GUC_ACTION_DEFAULT G2H message. + */ unsigned int msg_enabled_mask; + /** + * @outstanding_submission_g2h: number of outstanding GuC to Host + * responses related to GuC submission, used to determine if the GT is + * idle + */ atomic_t outstanding_submission_g2h; + /** @interrupts: pointers to GuC interrupt-managing functions. */ struct { void (*reset)(struct intel_guc *guc); void (*enable)(struct intel_guc *guc); void (*disable)(struct intel_guc *guc); } interrupts; - /* - * contexts_lock protects the pool of free guc ids and a linked list of - * guc ids available to be stolen + /** + * @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and + * ce->guc_id.ref when transitioning in and out of zero */ spinlock_t contexts_lock; + /** @guc_ids: used to allocate unique ce->guc_id.id values */ struct ida guc_ids; + /** + * @guc_id_list: list of intel_context with valid guc_ids but no refs + */ struct list_head guc_id_list; + /** + * @submission_supported: tracks whether we support GuC submission on + * the current platform + */ bool submission_supported; + /** @submission_selected: tracks whether the user enabled GuC submission */ bool submission_selected; + /** + * @rc_supported: tracks whether we support GuC rc on the current platform + */ bool rc_supported; + /** @rc_selected: tracks whether the user enabled GuC rc */ bool rc_selected; + /** @ads_vma: object allocated to hold the GuC ADS */ struct i915_vma *ads_vma; + /** @ads_blob: contents of the GuC ADS */ struct __guc_ads_blob *ads_blob; + /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; + /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */ struct i915_vma *lrc_desc_pool; + /** @lrc_desc_pool_vaddr: contents of the GuC LRC descriptor pool */ void *lrc_desc_pool_vaddr; - /* guc_id to intel_context lookup */ + /** + * @context_lookup: used to resolve intel_context from guc_id, if a + * context is present in this structure it is registered with the GuC + */ struct xarray context_lookup; - /* Control params for fw initialization */ + /** @params: Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; - /* GuC's FW specific registers used in MMIO send */ + /** @send_regs: GuC's FW specific registers used for sending MMIO H2G */ struct { u32 base; unsigned int count; enum forcewake_domains fw_domains; } send_regs; - /* register used to send interrupts to the GuC FW */ + /** @notify_reg: register used to send interrupts to the GuC FW */ i915_reg_t notify_reg; - /* Store msg (e.g. log flush) that we see while CTBs are disabled */ + /** + * @mmio_msg: notification bitmask that the GuC writes in one of its + * registers when the CT channel is disabled, to be processed when the + * channel is back up. + */ u32 mmio_msg; - /* To serialize the intel_guc_send actions */ + /** @send_mutex: used to serialize the intel_guc_send actions */ struct mutex send_mutex; }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 6926919bcac6..2c6ea64af7ec 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -349,6 +349,8 @@ static void fill_engine_enable_masks(struct intel_gt *gt, info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); } +#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) +#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE) static int guc_prep_golden_context(struct intel_guc *guc, struct __guc_ads_blob *blob) { @@ -396,7 +398,18 @@ static int guc_prep_golden_context(struct intel_guc *guc, if (!blob) continue; - blob->ads.eng_state_size[guc_class] = real_size; + /* + * This interface is slightly confusing. We need to pass the + * base address of the full golden context and the size of just + * the engine state, which is the section of the context image + * that starts after the execlists context. This is required to + * allow the GuC to restore just the engine state when a + * watchdog reset occurs. + * We calculate the engine state size by removing the size of + * what comes before it in the context image (which is identical + * on all engines). + */ + blob->ads.eng_state_size[guc_class] = real_size - LRC_SKIP_SIZE; blob->ads.golden_context_lrca[guc_class] = addr_ggtt; addr_ggtt += alloc_size; } @@ -436,11 +449,6 @@ static void guc_init_golden_context(struct intel_guc *guc) u8 engine_class, guc_class; u8 *ptr; - /* Skip execlist and PPGTT registers + HWSP */ - const u32 lr_hw_context_size = 80 * sizeof(u32); - const u32 skip_size = LRC_PPHWSP_SZ * PAGE_SIZE + - lr_hw_context_size; - if (!intel_uc_uses_guc_submission(>->uc)) return; @@ -476,12 +484,12 @@ static void guc_init_golden_context(struct intel_guc *guc) continue; } - GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != real_size); + GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != + real_size - LRC_SKIP_SIZE); GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt); addr_ggtt += alloc_size; - shmem_read(engine->default_state, skip_size, ptr + skip_size, - real_size - skip_size); + shmem_read(engine->default_state, 0, ptr, real_size); ptr += alloc_size; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 22b4733b55e2..20c710a74498 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1042,9 +1042,9 @@ static void ct_incoming_request_worker_func(struct work_struct *w) container_of(w, struct intel_guc_ct, requests.worker); bool done; - done = ct_process_incoming_requests(ct); - if (!done) - queue_work(system_unbound_wq, &ct->requests.worker); + do { + done = ct_process_incoming_requests(ct); + } while (!done); } static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 87d8dc8f51b9..c7a41802b448 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -28,21 +28,6 @@ /** * DOC: GuC-based command submission * - * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC - * firmware is moving to an updated submission interface and we plan to - * turn submission back on when that lands. The below documentation (and related - * code) matches the old submission model and will be updated as part of the - * upgrade to the new flow. - * - * GuC stage descriptor: - * During initialization, the driver allocates a static pool of 1024 such - * descriptors, and shares them with the GuC. Currently, we only use one - * descriptor. This stage descriptor lets the GuC know about the workqueue and - * process descriptor. Theoretically, it also lets the GuC know about our HW - * contexts (context ID, etc...), but we actually employ a kind of submission - * where the GuC uses the LRCA sent via the work item instead. This is called - * a "proxy" submission. - * * The Scratch registers: * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes * a value to the action register (SOFT_SCRATCH_0) along with any data. It then @@ -51,14 +36,85 @@ * processes the request. The kernel driver polls waiting for this update and * then proceeds. * - * Work Items: - * There are several types of work items that the host may place into a - * workqueue, each with its own requirements and limitations. Currently only - * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which - * represents in-order queue. The kernel driver packs ring tail pointer and an - * ELSP context descriptor dword into Work Item. - * See guc_add_request() + * Command Transport buffers (CTBs): + * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host + * - G2H) are a message interface between the i915 and GuC. * + * Context registration: + * Before a context can be submitted it must be registered with the GuC via a + * H2G. A unique guc_id is associated with each context. The context is either + * registered at request creation time (normal operation) or at submission time + * (abnormal operation, e.g. after a reset). + * + * Context submission: + * The i915 updates the LRC tail value in memory. The i915 must enable the + * scheduling of the context within the GuC for the GuC to actually consider it. + * Therefore, the first time a disabled context is submitted we use a schedule + * enable H2G, while follow up submissions are done via the context submit H2G, + * which informs the GuC that a previously enabled context has new work + * available. + * + * Context unpin: + * To unpin a context a H2G is used to disable scheduling. When the + * corresponding G2H returns indicating the scheduling disable operation has + * completed it is safe to unpin the context. While a disable is in flight it + * isn't safe to resubmit the context so a fence is used to stall all future + * requests of that context until the G2H is returned. + * + * Context deregistration: + * Before a context can be destroyed or if we steal its guc_id we must + * deregister the context with the GuC via H2G. If stealing the guc_id it isn't + * safe to submit anything to this guc_id until the deregister completes so a + * fence is used to stall all requests associated with this guc_id until the + * corresponding G2H returns indicating the guc_id has been deregistered. + * + * guc_ids: + * Unique number associated with private GuC context data passed in during + * context registration / submission / deregistration. 64k available. Simple ida + * is used for allocation. + * + * Stealing guc_ids: + * If no guc_ids are available they can be stolen from another context at + * request creation time if that context is unpinned. If a guc_id can't be found + * we punt this problem to the user as we believe this is near impossible to hit + * during normal use cases. + * + * Locking: + * In the GuC submission code we have 3 basic spin locks which protect + * everything. Details about each below. + * + * sched_engine->lock + * This is the submission lock for all contexts that share an i915 schedule + * engine (sched_engine), thus only one of the contexts which share a + * sched_engine can be submitting at a time. Currently only one sched_engine is + * used for all of GuC submission but that could change in the future. + * + * guc->contexts_lock + * Protects guc_id allocation for the given GuC, i.e. only one context can be + * doing guc_id allocation operations at a time for each GuC in the system. + * + * ce->guc_state.lock + * Protects everything under ce->guc_state. Ensures that a context is in the + * correct state before issuing a H2G. e.g. We don't issue a schedule disable + * on a disabled context (bad idea), we don't issue a schedule enable when a + * schedule disable is in flight, etc... Also protects list of inflight requests + * on the context and the priority management state. Lock is individual to each + * context. + * + * Lock ordering rules: + * sched_engine->lock -> ce->guc_state.lock + * guc->contexts_lock -> ce->guc_state.lock + * + * Reset races: + * When a full GT reset is triggered it is assumed that some G2H responses to + * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be + * fatal as we do certain operations upon receiving a G2H (e.g. destroy + * contexts, release guc_ids, etc...). When this occurs we can scrub the + * context state and cleanup appropriately, however this is quite racey. + * To avoid races, the reset code must disable submission before scrubbing for + * the missing G2H, while the submission code must check for submission being + * disabled and skip sending H2Gs and updating context states when it is. Both + * sides must also make sure to hold the relevant locks. */ /* GuC Virtual Engine */ @@ -72,87 +128,36 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); #define GUC_REQUEST_SIZE 64 /* bytes */ -/* - * Below is a set of functions which control the GuC scheduling state which do - * not require a lock as all state transitions are mutually exclusive. i.e. It - * is not possible for the context pinning code and submission, for the same - * context, to be executing simultaneously. We still need an atomic as it is - * possible for some of the bits to changing at the same time though. - */ -#define SCHED_STATE_NO_LOCK_ENABLED BIT(0) -#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) -#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2) -static inline bool context_enabled(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_ENABLED); -} - -static inline void set_context_enabled(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_enabled(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED, - &ce->guc_sched_state_no_lock); -} - -static inline bool context_pending_enable(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_PENDING_ENABLE); -} - -static inline void set_context_pending_enable(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE, - &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_pending_enable(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE, - &ce->guc_sched_state_no_lock); -} - -static inline bool context_registered(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_REGISTERED); -} - -static inline void set_context_registered(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_REGISTERED, - &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_registered(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED, - &ce->guc_sched_state_no_lock); -} - /* * Below is a set of functions which control the GuC scheduling state which - * require a lock, aside from the special case where the functions are called - * from guc_lrc_desc_pin(). In that case it isn't possible for any other code - * path to be executing on the context. + * require a lock. */ #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) #define SCHED_STATE_DESTROYED BIT(1) #define SCHED_STATE_PENDING_DISABLE BIT(2) #define SCHED_STATE_BANNED BIT(3) -#define SCHED_STATE_BLOCKED_SHIFT 4 +#define SCHED_STATE_ENABLED BIT(4) +#define SCHED_STATE_PENDING_ENABLE BIT(5) +#define SCHED_STATE_REGISTERED BIT(6) +#define SCHED_STATE_BLOCKED_SHIFT 7 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) + static inline void init_sched_state(struct intel_context *ce) { - /* Only should be called from guc_lrc_desc_pin() */ - atomic_set(&ce->guc_sched_state_no_lock, 0); - ce->guc_state.sched_state = 0; + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; +} + +__maybe_unused +static bool sched_state_is_init(struct intel_context *ce) +{ + /* + * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after + * suspend. + */ + return !(ce->guc_state.sched_state &= + ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); } static inline bool @@ -165,7 +170,7 @@ context_wait_for_deregister_to_register(struct intel_context *ce) static inline void set_context_wait_for_deregister_to_register(struct intel_context *ce) { - /* Only should be called from guc_lrc_desc_pin() without lock */ + lockdep_assert_held(&ce->guc_state.lock); ce->guc_state.sched_state |= SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; } @@ -225,6 +230,57 @@ static inline void clr_context_banned(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; } +static inline bool context_enabled(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_ENABLED; +} + +static inline void set_context_enabled(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_ENABLED; +} + +static inline void clr_context_enabled(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; +} + +static inline bool context_pending_enable(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; +} + +static inline void set_context_pending_enable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; +} + +static inline void clr_context_pending_enable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; +} + +static inline bool context_registered(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; +} + +static inline void set_context_registered(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; +} + +static inline void clr_context_registered(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; +} + static inline u32 context_blocked(struct intel_context *ce) { return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> @@ -233,7 +289,6 @@ static inline u32 context_blocked(struct intel_context *ce) static inline void incr_context_blocked(struct intel_context *ce) { - lockdep_assert_held(&ce->engine->sched_engine->lock); lockdep_assert_held(&ce->guc_state.lock); ce->guc_state.sched_state += SCHED_STATE_BLOCKED; @@ -243,7 +298,6 @@ static inline void incr_context_blocked(struct intel_context *ce) static inline void decr_context_blocked(struct intel_context *ce) { - lockdep_assert_held(&ce->engine->sched_engine->lock); lockdep_assert_held(&ce->guc_state.lock); GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ @@ -251,14 +305,33 @@ static inline void decr_context_blocked(struct intel_context *ce) ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; } +static inline bool context_has_committed_requests(struct intel_context *ce) +{ + return !!ce->guc_state.number_committed_requests; +} + +static inline void incr_context_committed_requests(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ++ce->guc_state.number_committed_requests; + GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); +} + +static inline void decr_context_committed_requests(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + --ce->guc_state.number_committed_requests; + GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); +} + static inline bool context_guc_id_invalid(struct intel_context *ce) { - return ce->guc_id == GUC_INVALID_LRC_ID; + return ce->guc_id.id == GUC_INVALID_LRC_ID; } static inline void set_context_guc_id_invalid(struct intel_context *ce) { - ce->guc_id = GUC_INVALID_LRC_ID; + ce->guc_id.id = GUC_INVALID_LRC_ID; } static inline struct intel_guc *ce_to_guc(struct intel_context *ce) @@ -352,20 +425,29 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, xa_unlock_irqrestore(&guc->context_lookup, flags); } +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) + wake_up_all(&guc->ct.wq); +} + static int guc_submission_send_busy_loop(struct intel_guc *guc, const u32 *action, u32 len, u32 g2h_len_dw, bool loop) { - int err; + /* + * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), + * so we don't handle the case where we don't get a reply because we + * aborted the send due to the channel being busy. + */ + GEM_BUG_ON(g2h_len_dw && !loop); - err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - - if (!err && g2h_len_dw) + if (g2h_len_dw) atomic_inc(&guc->outstanding_submission_g2h); - return err; + return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); } int intel_guc_wait_for_pending_msg(struct intel_guc *guc, @@ -430,6 +512,8 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) u32 g2h_len_dw = 0; bool enabled; + lockdep_assert_held(&rq->engine->sched_engine->lock); + /* * Corner case where requests were sitting in the priority list or a * request resubmitted after the context was banned. @@ -437,22 +521,24 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (unlikely(intel_context_is_banned(ce))) { i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(ce->engine); - goto out; + return 0; } - GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); /* * Corner case where the GuC firmware was blown away and reloaded while * this context was pinned. */ - if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) { + if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id))) { err = guc_lrc_desc_pin(ce, false); if (unlikely(err)) - goto out; + return err; } + spin_lock(&ce->guc_state.lock); + /* * The request / context will be run on the hardware when scheduling * gets enabled in the unblock. @@ -464,14 +550,14 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (!enabled) { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; - action[len++] = ce->guc_id; + action[len++] = ce->guc_id.id; action[len++] = GUC_CONTEXT_ENABLE; set_context_pending_enable(ce); intel_context_get(ce); g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; } else { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; - action[len++] = ce->guc_id; + action[len++] = ce->guc_id.id; } err = intel_guc_send_nb(guc, action, len, g2h_len_dw); @@ -487,6 +573,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) trace_i915_request_guc_submit(rq); out: + spin_unlock(&ce->guc_state.lock); return err; } @@ -596,10 +683,18 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) unsigned long index, flags; bool pending_disable, pending_enable, deregister, destroyed, banned; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - /* Flush context */ - spin_lock_irqsave(&ce->guc_state.lock, flags); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); + /* + * Corner case where the ref count on the object is zero but and + * deregister G2H was lost. In this case we don't touch the ref + * count and finish the destroy of the context. + */ + bool do_put = kref_get_unless_zero(&ce->ref); + + xa_unlock(&guc->context_lookup); + + spin_lock(&ce->guc_state.lock); /* * Once we are at this point submission_disabled() is guaranteed @@ -615,8 +710,12 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) banned = context_banned(ce); init_sched_state(ce); + spin_unlock(&ce->guc_state.lock); + + GEM_BUG_ON(!do_put && !destroyed); + if (pending_enable || destroyed || deregister) { - atomic_dec(&guc->outstanding_submission_g2h); + decr_outstanding_submission_g2h(guc); if (deregister) guc_signal_context_fence(ce); if (destroyed) { @@ -635,14 +734,20 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) intel_engine_signal_breadcrumbs(ce->engine); } intel_context_sched_disable_unpin(ce); - atomic_dec(&guc->outstanding_submission_g2h); - spin_lock_irqsave(&ce->guc_state.lock, flags); + decr_outstanding_submission_g2h(guc); + + spin_lock(&ce->guc_state.lock); guc_blocked_fence_complete(ce); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); + spin_unlock(&ce->guc_state.lock); intel_context_put(ce); } + + if (do_put) + intel_context_put(ce); + xa_lock(&guc->context_lookup); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } static inline bool @@ -725,6 +830,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) wait_for_reset(guc, &guc->outstanding_submission_g2h); } while (!list_empty(&guc->ct.requests.incoming)); } + scrub_guc_desc_for_outstanding_g2h(guc); } @@ -796,16 +902,14 @@ __unwind_incomplete_requests(struct intel_context *ce) unsigned long flags; spin_lock_irqsave(&sched_engine->lock, flags); - spin_lock(&ce->guc_active.lock); - list_for_each_entry_safe(rq, rn, - &ce->guc_active.requests, - sched.link) { + spin_lock(&ce->guc_state.lock); + list_for_each_entry_safe_reverse(rq, rn, + &ce->guc_state.requests, + sched.link) { if (i915_request_completed(rq)) continue; list_del_init(&rq->sched.link); - spin_unlock(&ce->guc_active.lock); - __i915_request_unsubmit(rq); /* Push the request back into the queue for later resubmission. */ @@ -816,29 +920,43 @@ __unwind_incomplete_requests(struct intel_context *ce) } GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); - list_add_tail(&rq->sched.link, pl); + list_add(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - - spin_lock(&ce->guc_active.lock); } - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); spin_unlock_irqrestore(&sched_engine->lock, flags); } static void __guc_reset_context(struct intel_context *ce, bool stalled) { struct i915_request *rq; + unsigned long flags; u32 head; + bool skip = false; intel_context_get(ce); /* - * GuC will implicitly mark the context as non-schedulable - * when it sends the reset notification. Make sure our state - * reflects this change. The context will be marked enabled - * on resubmission. + * GuC will implicitly mark the context as non-schedulable when it sends + * the reset notification. Make sure our state reflects this change. The + * context will be marked enabled on resubmission. + * + * XXX: If the context is reset as a result of the request cancellation + * this G2H is received after the schedule disable complete G2H which is + * wrong as this creates a race between the request cancellation code + * re-submitting the context and this G2H handler. This is a bug in the + * GuC but can be worked around in the meantime but converting this to a + * NOP if a pending enable is in flight as this indicates that a request + * cancellation has occurred. */ - clr_context_enabled(ce); + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (likely(!context_pending_enable(ce))) + clr_context_enabled(ce); + else + skip = true; + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(skip)) + goto out_put; rq = intel_context_find_active_request(ce); if (!rq) { @@ -857,6 +975,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) out_replay: guc_reset_state(ce, head, stalled); __unwind_incomplete_requests(ce); +out_put: intel_context_put(ce); } @@ -864,16 +983,29 @@ void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) { struct intel_context *ce; unsigned long index; + unsigned long flags; if (unlikely(!guc_submission_initialized(guc))) { /* Reset called during driver load? GuC not yet initialised! */ return; } - xa_for_each(&guc->context_lookup, index, ce) + xa_lock_irqsave(&guc->context_lookup, flags); + xa_for_each(&guc->context_lookup, index, ce) { + if (!kref_get_unless_zero(&ce->ref)) + continue; + + xa_unlock(&guc->context_lookup); + if (intel_context_is_pinned(ce)) __guc_reset_context(ce, stalled); + intel_context_put(ce); + + xa_lock(&guc->context_lookup); + } + xa_unlock_irqrestore(&guc->context_lookup, flags); + /* GuC is blown away, drop all references to contexts */ xa_destroy(&guc->context_lookup); } @@ -886,10 +1018,10 @@ static void guc_cancel_context_requests(struct intel_context *ce) /* Mark all executing requests as skipped. */ spin_lock_irqsave(&sched_engine->lock, flags); - spin_lock(&ce->guc_active.lock); - list_for_each_entry(rq, &ce->guc_active.requests, sched.link) + spin_lock(&ce->guc_state.lock); + list_for_each_entry(rq, &ce->guc_state.requests, sched.link) i915_request_put(i915_request_mark_eio(rq)); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); spin_unlock_irqrestore(&sched_engine->lock, flags); } @@ -948,11 +1080,24 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) { struct intel_context *ce; unsigned long index; + unsigned long flags; + + xa_lock_irqsave(&guc->context_lookup, flags); + xa_for_each(&guc->context_lookup, index, ce) { + if (!kref_get_unless_zero(&ce->ref)) + continue; + + xa_unlock(&guc->context_lookup); - xa_for_each(&guc->context_lookup, index, ce) if (intel_context_is_pinned(ce)) guc_cancel_context_requests(ce); + intel_context_put(ce); + + xa_lock(&guc->context_lookup); + } + xa_unlock_irqrestore(&guc->context_lookup, flags); + guc_cancel_sched_engine_requests(guc->sched_engine); /* GuC is blown away, drop all references to contexts */ @@ -1027,6 +1172,7 @@ static inline void queue_request(struct i915_sched_engine *sched_engine, list_add_tail(&rq->sched.link, i915_sched_lookup_priolist(sched_engine, prio)); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + tasklet_hi_schedule(&sched_engine->tasklet); } static int guc_bypass_tasklet_submit(struct intel_guc *guc, @@ -1077,12 +1223,12 @@ static int new_guc_id(struct intel_guc *guc) static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) { if (!context_guc_id_invalid(ce)) { - ida_simple_remove(&guc->guc_ids, ce->guc_id); - reset_lrc_desc(guc, ce->guc_id); + ida_simple_remove(&guc->guc_ids, ce->guc_id.id); + reset_lrc_desc(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); } static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) @@ -1104,14 +1250,18 @@ static int steal_guc_id(struct intel_guc *guc) if (!list_empty(&guc->guc_id_list)) { ce = list_first_entry(&guc->guc_id_list, struct intel_context, - guc_id_link); + guc_id.link); - GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); - list_del_init(&ce->guc_id_link); - guc_id = ce->guc_id; + list_del_init(&ce->guc_id.link); + guc_id = ce->guc_id.id; + + spin_lock(&ce->guc_state.lock); clr_context_registered(ce); + spin_unlock(&ce->guc_state.lock); + set_context_guc_id_invalid(ce); return guc_id; } else { @@ -1142,26 +1292,28 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) int ret = 0; unsigned long flags, tries = PIN_GUC_ID_TRIES; - GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); try_again: spin_lock_irqsave(&guc->contexts_lock, flags); + might_lock(&ce->guc_state.lock); + if (context_guc_id_invalid(ce)) { - ret = assign_guc_id(guc, &ce->guc_id); + ret = assign_guc_id(guc, &ce->guc_id.id); if (ret) goto out_unlock; ret = 1; /* Indidcates newly assigned guc_id */ } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); - atomic_inc(&ce->guc_id_ref); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); + atomic_inc(&ce->guc_id.ref); out_unlock: spin_unlock_irqrestore(&guc->contexts_lock, flags); /* - * -EAGAIN indicates no guc_ids are available, let's retire any + * -EAGAIN indicates no guc_id are available, let's retire any * outstanding requests to see if that frees up a guc_id. If the first * retire didn't help, insert a sleep with the timeslice duration before * attempting to retire more requests. Double the sleep period each @@ -1189,15 +1341,15 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) { unsigned long flags; - GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); if (unlikely(context_guc_id_invalid(ce))) return; spin_lock_irqsave(&guc->contexts_lock, flags); - if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) && - !atomic_read(&ce->guc_id_ref)) - list_add_tail(&ce->guc_id_link, &guc->guc_id_list); + if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && + !atomic_read(&ce->guc_id.ref)) + list_add_tail(&ce->guc_id.link, &guc->guc_id_list); spin_unlock_irqrestore(&guc->contexts_lock, flags); } @@ -1220,21 +1372,25 @@ static int register_context(struct intel_context *ce, bool loop) { struct intel_guc *guc = ce_to_guc(ce); u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + - ce->guc_id * sizeof(struct guc_lrc_desc); + ce->guc_id.id * sizeof(struct guc_lrc_desc); int ret; trace_intel_context_register(ce); - ret = __guc_action_register_context(guc, ce->guc_id, offset, loop); - if (likely(!ret)) + ret = __guc_action_register_context(guc, ce->guc_id.id, offset, loop); + if (likely(!ret)) { + unsigned long flags; + + spin_lock_irqsave(&ce->guc_state.lock, flags); set_context_registered(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + } return ret; } static int __guc_action_deregister_context(struct intel_guc *guc, - u32 guc_id, - bool loop) + u32 guc_id) { u32 action[] = { INTEL_GUC_ACTION_DEREGISTER_CONTEXT, @@ -1243,16 +1399,16 @@ static int __guc_action_deregister_context(struct intel_guc *guc, return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), G2H_LEN_DW_DEREGISTER_CONTEXT, - loop); + true); } -static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop) +static int deregister_context(struct intel_context *ce, u32 guc_id) { struct intel_guc *guc = ce_to_guc(ce); trace_intel_context_deregister(ce); - return __guc_action_deregister_context(guc, guc_id, loop); + return __guc_action_deregister_context(guc, guc_id); } static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) @@ -1285,22 +1441,19 @@ static void guc_context_policy_init(struct intel_engine_cs *engine, desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; } -static inline u8 map_i915_prio_to_guc_prio(int prio); - static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; struct intel_guc *guc = &engine->gt->uc.guc; - u32 desc_idx = ce->guc_id; + u32 desc_idx = ce->guc_id.id; struct guc_lrc_desc *desc; - const struct i915_gem_context *ctx; - int prio = I915_CONTEXT_DEFAULT_PRIORITY; bool context_registered; intel_wakeref_t wakeref; int ret = 0; GEM_BUG_ON(!engine->mask); + GEM_BUG_ON(!sched_state_is_init(ce)); /* * Ensure LRC + CT vmas are is same region as write barrier is done @@ -1311,12 +1464,6 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) context_registered = lrc_desc_registered(guc, desc_idx); - rcu_read_lock(); - ctx = rcu_dereference(ce->gem_context); - if (ctx) - prio = ctx->sched.priority; - rcu_read_unlock(); - reset_lrc_desc(guc, desc_idx); set_lrc_desc_registered(guc, desc_idx, ce); @@ -1325,11 +1472,9 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) desc->engine_submit_mask = adjust_engine_mask(engine->class, engine->mask); desc->hw_context_desc = ce->lrc.lrca; - ce->guc_prio = map_i915_prio_to_guc_prio(prio); - desc->priority = ce->guc_prio; + desc->priority = ce->guc_state.prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); - init_sched_state(ce); /* * The context_lookup xarray is used to determine if the hardware @@ -1340,26 +1485,23 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) * registering this context. */ if (context_registered) { + bool disabled; + unsigned long flags; + trace_intel_context_steal_guc_id(ce); - if (!loop) { + GEM_BUG_ON(!loop); + + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) { set_context_wait_for_deregister_to_register(ce); intel_context_get(ce); - } else { - bool disabled; - unsigned long flags; - - /* Seal race with Reset */ - spin_lock_irqsave(&ce->guc_state.lock, flags); - disabled = submission_disabled(guc); - if (likely(!disabled)) { - set_context_wait_for_deregister_to_register(ce); - intel_context_get(ce); - } - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (unlikely(disabled)) { - reset_lrc_desc(guc, desc_idx); - return 0; /* Will get registered later */ - } + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + reset_lrc_desc(guc, desc_idx); + return 0; /* Will get registered later */ } /* @@ -1367,20 +1509,18 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) * context whose guc_id was stolen. */ with_intel_runtime_pm(runtime_pm, wakeref) - ret = deregister_context(ce, ce->guc_id, loop); - if (unlikely(ret == -EBUSY)) { - clr_context_wait_for_deregister_to_register(ce); - intel_context_put(ce); - } else if (unlikely(ret == -ENODEV)) { + ret = deregister_context(ce, ce->guc_id.id); + if (unlikely(ret == -ENODEV)) ret = 0; /* Will get registered later */ - } } else { with_intel_runtime_pm(runtime_pm, wakeref) ret = register_context(ce, loop); - if (unlikely(ret == -EBUSY)) + if (unlikely(ret == -EBUSY)) { + reset_lrc_desc(guc, desc_idx); + } else if (unlikely(ret == -ENODEV)) { reset_lrc_desc(guc, desc_idx); - else if (unlikely(ret == -ENODEV)) ret = 0; /* Will get registered later */ + } } return ret; @@ -1440,7 +1580,7 @@ static void __guc_context_sched_enable(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, - ce->guc_id, + ce->guc_id.id, GUC_CONTEXT_ENABLE }; @@ -1456,7 +1596,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, - guc_id, /* ce->guc_id not stable */ + guc_id, /* ce->guc_id.id not stable */ GUC_CONTEXT_DISABLE }; @@ -1472,24 +1612,24 @@ static void guc_blocked_fence_complete(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); - if (!i915_sw_fence_done(&ce->guc_blocked)) - i915_sw_fence_complete(&ce->guc_blocked); + if (!i915_sw_fence_done(&ce->guc_state.blocked)) + i915_sw_fence_complete(&ce->guc_state.blocked); } static void guc_blocked_fence_reinit(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); - GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked)); + GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); /* * This fence is always complete unless a pending schedule disable is * outstanding. We arm the fence here and complete it when we receive * the pending schedule disable complete message. */ - i915_sw_fence_fini(&ce->guc_blocked); - i915_sw_fence_reinit(&ce->guc_blocked); - i915_sw_fence_await(&ce->guc_blocked); - i915_sw_fence_commit(&ce->guc_blocked); + i915_sw_fence_fini(&ce->guc_state.blocked); + i915_sw_fence_reinit(&ce->guc_state.blocked); + i915_sw_fence_await(&ce->guc_state.blocked); + i915_sw_fence_commit(&ce->guc_state.blocked); } static u16 prep_context_pending_disable(struct intel_context *ce) @@ -1501,13 +1641,12 @@ static u16 prep_context_pending_disable(struct intel_context *ce) guc_blocked_fence_reinit(ce); intel_context_get(ce); - return ce->guc_id; + return ce->guc_id.id; } static struct i915_sw_fence *guc_context_block(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - struct i915_sched_engine *sched_engine = ce->engine->sched_engine; unsigned long flags; struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; intel_wakeref_t wakeref; @@ -1516,20 +1655,14 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); - /* - * Sync with submission path, increment before below changes to context - * state. - */ - spin_lock(&sched_engine->lock); incr_context_blocked(ce); - spin_unlock(&sched_engine->lock); enabled = context_enabled(ce); if (unlikely(!enabled || submission_disabled(guc))) { if (enabled) clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - return &ce->guc_blocked; + return &ce->guc_state.blocked; } /* @@ -1545,13 +1678,29 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) with_intel_runtime_pm(runtime_pm, wakeref) __guc_context_sched_disable(guc, ce, guc_id); - return &ce->guc_blocked; + return &ce->guc_state.blocked; +} + +#define SCHED_STATE_MULTI_BLOCKED_MASK \ + (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) +#define SCHED_STATE_NO_UNBLOCK \ + (SCHED_STATE_MULTI_BLOCKED_MASK | \ + SCHED_STATE_PENDING_DISABLE | \ + SCHED_STATE_BANNED) + +static bool context_cant_unblock(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + + return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || + context_guc_id_invalid(ce) || + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) || + !intel_context_is_pinned(ce); } static void guc_context_unblock(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - struct i915_sched_engine *sched_engine = ce->engine->sched_engine; unsigned long flags; struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; intel_wakeref_t wakeref; @@ -1562,9 +1711,7 @@ static void guc_context_unblock(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); if (unlikely(submission_disabled(guc) || - !intel_context_is_pinned(ce) || - context_pending_disable(ce) || - context_blocked(ce) > 1)) { + context_cant_unblock(ce))) { enable = false; } else { enable = true; @@ -1573,13 +1720,7 @@ static void guc_context_unblock(struct intel_context *ce) intel_context_get(ce); } - /* - * Sync with submission path, decrement after above changes to context - * state. - */ - spin_lock(&sched_engine->lock); decr_context_blocked(ce); - spin_unlock(&sched_engine->lock); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -1593,15 +1734,25 @@ static void guc_context_cancel_request(struct intel_context *ce, struct i915_request *rq) { if (i915_sw_fence_signaled(&rq->submit)) { - struct i915_sw_fence *fence = guc_context_block(ce); + struct i915_sw_fence *fence; + intel_context_get(ce); + fence = guc_context_block(ce); i915_sw_fence_wait(fence); if (!i915_request_completed(rq)) { __i915_request_skip(rq); guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), true); } + + /* + * XXX: Racey if context is reset, see comment in + * __guc_reset_context(). + */ + flush_work(&ce_to_guc(ce)->ct.requests.worker); + guc_context_unblock(ce); + intel_context_put(ce); } } @@ -1662,7 +1813,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) if (!context_guc_id_invalid(ce)) with_intel_runtime_pm(runtime_pm, wakeref) __guc_context_set_preemption_timeout(guc, - ce->guc_id, + ce->guc_id.id, 1); spin_unlock_irqrestore(&ce->guc_state.lock, flags); } @@ -1675,40 +1826,22 @@ static void guc_context_sched_disable(struct intel_context *ce) struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; intel_wakeref_t wakeref; u16 guc_id; - bool enabled; - - if (submission_disabled(guc) || context_guc_id_invalid(ce) || - !lrc_desc_registered(guc, ce->guc_id)) { - clr_context_enabled(ce); - goto unpin; - } - - if (!context_enabled(ce)) - goto unpin; spin_lock_irqsave(&ce->guc_state.lock, flags); /* - * We have to check if the context has been disabled by another thread. - * We also have to check if the context has been pinned again as another - * pin operation is allowed to pass this function. Checking the pin - * count, within ce->guc_state.lock, synchronizes this function with - * guc_request_alloc ensuring a request doesn't slip through the - * 'context_pending_disable' fence. Checking within the spin lock (can't - * sleep) ensures another process doesn't pin this context and generate - * a request before we set the 'context_pending_disable' flag here. + * We have to check if the context has been disabled by another thread, + * check if submssion has been disabled to seal a race with reset and + * finally check if any more requests have been committed to the + * context ensursing that a request doesn't slip through the + * 'context_pending_disable' fence. */ - enabled = context_enabled(ce); - if (unlikely(!enabled || submission_disabled(guc))) { - if (enabled) - clr_context_enabled(ce); + if (unlikely(!context_enabled(ce) || submission_disabled(guc) || + context_has_committed_requests(ce))) { + clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); goto unpin; } - if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) { - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - return; - } guc_id = prep_context_pending_disable(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -1725,20 +1858,20 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id)); - GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); + GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); + GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); GEM_BUG_ON(context_enabled(ce)); - clr_context_registered(ce); - deregister_context(ce, ce->guc_id, true); + deregister_context(ce, ce->guc_id.id); } static void __guc_context_destroy(struct intel_context *ce) { - GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + GEM_BUG_ON(ce->guc_state.number_committed_requests); lrc_fini(ce); intel_context_fini(ce); @@ -1774,7 +1907,7 @@ static void guc_context_destroy(struct kref *kref) __guc_context_destroy(ce); return; } else if (submission_disabled(guc) || - !lrc_desc_registered(guc, ce->guc_id)) { + !lrc_desc_registered(guc, ce->guc_id.id)) { release_guc_id(guc, ce); __guc_context_destroy(ce); return; @@ -1783,10 +1916,10 @@ static void guc_context_destroy(struct kref *kref) /* * We have to acquire the context spinlock and check guc_id again, if it * is valid it hasn't been stolen and needs to be deregistered. We - * delete this context from the list of unpinned guc_ids available to + * delete this context from the list of unpinned guc_id available to * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB * returns indicating this context has been deregistered the guc_id is - * returned to the pool of available guc_ids. + * returned to the pool of available guc_id. */ spin_lock_irqsave(&guc->contexts_lock, flags); if (context_guc_id_invalid(ce)) { @@ -1795,15 +1928,17 @@ static void guc_context_destroy(struct kref *kref) return; } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); spin_unlock_irqrestore(&guc->contexts_lock, flags); /* Seal race with Reset */ spin_lock_irqsave(&ce->guc_state.lock, flags); disabled = submission_disabled(guc); - if (likely(!disabled)) + if (likely(!disabled)) { set_context_destroyed(ce); + clr_context_registered(ce); + } spin_unlock_irqrestore(&ce->guc_state.lock, flags); if (unlikely(disabled)) { release_guc_id(guc, ce); @@ -1839,20 +1974,23 @@ static void guc_context_set_prio(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, - ce->guc_id, + ce->guc_id.id, prio, }; GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || prio > GUC_CLIENT_PRIORITY_NORMAL); + lockdep_assert_held(&ce->guc_state.lock); - if (ce->guc_prio == prio || submission_disabled(guc) || - !context_registered(ce)) + if (ce->guc_state.prio == prio || submission_disabled(guc) || + !context_registered(ce)) { + ce->guc_state.prio = prio; return; + } guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); - ce->guc_prio = prio; + ce->guc_state.prio = prio; trace_intel_context_set_prio(ce); } @@ -1871,25 +2009,25 @@ static inline u8 map_i915_prio_to_guc_prio(int prio) static inline void add_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { - lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); - ++ce->guc_prio_count[guc_prio]; + ++ce->guc_state.prio_count[guc_prio]; /* Overflow protection */ - GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); } static inline void sub_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { - lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); /* Underflow protection */ - GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); - --ce->guc_prio_count[guc_prio]; + --ce->guc_state.prio_count[guc_prio]; } static inline void update_context_prio(struct intel_context *ce) @@ -1900,10 +2038,10 @@ static inline void update_context_prio(struct intel_context *ce) BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); - lockdep_assert_held(&ce->guc_active.lock); + lockdep_assert_held(&ce->guc_state.lock); - for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) { - if (ce->guc_prio_count[i]) { + for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { + if (ce->guc_state.prio_count[i]) { guc_context_set_prio(guc, ce, i); break; } @@ -1923,8 +2061,8 @@ static void add_to_context(struct i915_request *rq) GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); - spin_lock(&ce->guc_active.lock); - list_move_tail(&rq->sched.link, &ce->guc_active.requests); + spin_lock(&ce->guc_state.lock); + list_move_tail(&rq->sched.link, &ce->guc_state.requests); if (rq->guc_prio == GUC_PRIO_INIT) { rq->guc_prio = new_guc_prio; @@ -1936,12 +2074,12 @@ static void add_to_context(struct i915_request *rq) } update_context_prio(ce); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) { - lockdep_assert_held(&ce->guc_active.lock); + lockdep_assert_held(&ce->guc_state.lock); if (rq->guc_prio != GUC_PRIO_INIT && rq->guc_prio != GUC_PRIO_FINI) { @@ -1955,7 +2093,7 @@ static void remove_from_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - spin_lock_irq(&ce->guc_active.lock); + spin_lock_irq(&ce->guc_state.lock); list_del_init(&rq->sched.link); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -1965,9 +2103,11 @@ static void remove_from_context(struct i915_request *rq) guc_prio_fini(rq, ce); - spin_unlock_irq(&ce->guc_active.lock); + decr_context_committed_requests(ce); - atomic_dec(&ce->guc_id_ref); + spin_unlock_irq(&ce->guc_state.lock); + + atomic_dec(&ce->guc_id.ref); i915_request_notify_execute_cb_imm(rq); } @@ -1994,17 +2134,32 @@ static const struct intel_context_ops guc_context_ops = { .create_virtual = guc_create_virtual, }; +static void submit_work_cb(struct irq_work *wrk) +{ + struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); + + might_lock(&rq->engine->sched_engine->lock); + i915_sw_fence_complete(&rq->submit); +} + static void __guc_signal_context_fence(struct intel_context *ce) { - struct i915_request *rq; + struct i915_request *rq, *rn; lockdep_assert_held(&ce->guc_state.lock); if (!list_empty(&ce->guc_state.fences)) trace_intel_context_fence_release(ce); - list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link) - i915_sw_fence_complete(&rq->submit); + /* + * Use an IRQ to ensure locking order of sched_engine->lock -> + * ce->guc_state.lock is preserved. + */ + list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, + guc_fence_link) { + list_del(&rq->guc_fence_link); + irq_work_queue(&rq->submit_work); + } INIT_LIST_HEAD(&ce->guc_state.fences); } @@ -2022,10 +2177,25 @@ static void guc_signal_context_fence(struct intel_context *ce) static bool context_needs_register(struct intel_context *ce, bool new_guc_id) { return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || - !lrc_desc_registered(ce_to_guc(ce), ce->guc_id)) && + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) && !submission_disabled(ce_to_guc(ce)); } +static void guc_context_init(struct intel_context *ce) +{ + const struct i915_gem_context *ctx; + int prio = I915_CONTEXT_DEFAULT_PRIORITY; + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx) + prio = ctx->sched.priority; + rcu_read_unlock(); + + ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); + set_bit(CONTEXT_GUC_INIT, &ce->flags); +} + static int guc_request_alloc(struct i915_request *rq) { struct intel_context *ce = rq->context; @@ -2057,14 +2227,17 @@ static int guc_request_alloc(struct i915_request *rq) rq->reserved_space -= GUC_REQUEST_SIZE; + if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) + guc_context_init(ce); + /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the - * guc_ids and creating horrible race conditions. This is especially bad - * when guc_ids are being stolen due to over subscription. By the time + * guc_id and creating horrible race conditions. This is especially bad + * when guc_id are being stolen due to over subscription. By the time * this function is reached, it is guaranteed that the guc_id will be * persistent until the generated request is retired. Thus, sealing these - * race conditions. It is still safe to fail here if guc_ids are + * race conditions. It is still safe to fail here if guc_id are * exhausted and return -EAGAIN to the user indicating that they can try * again in the future. * @@ -2074,7 +2247,7 @@ static int guc_request_alloc(struct i915_request *rq) * decremented on each retire. When it is zero, a lock around the * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. */ - if (atomic_add_unless(&ce->guc_id_ref, 1, 0)) + if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) goto out; ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ @@ -2087,7 +2260,7 @@ static int guc_request_alloc(struct i915_request *rq) disable_submission(guc); goto out; /* GPU will be reset */ } - atomic_dec(&ce->guc_id_ref); + atomic_dec(&ce->guc_id.ref); unpin_guc_id(guc, ce); return ret; } @@ -2102,22 +2275,16 @@ out: * schedule enable or context registration if either G2H is pending * respectfully. Once a G2H returns, the fence is released that is * blocking these requests (see guc_signal_context_fence). - * - * We can safely check the below fields outside of the lock as it isn't - * possible for these fields to transition from being clear to set but - * converse is possible, hence the need for the check within the lock. */ - if (likely(!context_wait_for_deregister_to_register(ce) && - !context_pending_disable(ce))) - return 0; - spin_lock_irqsave(&ce->guc_state.lock, flags); if (context_wait_for_deregister_to_register(ce) || context_pending_disable(ce)) { + init_irq_work(&rq->submit_work, submit_work_cb); i915_sw_fence_await(&rq->submit); list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); } + incr_context_committed_requests(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); return 0; @@ -2259,7 +2426,7 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) return; - spin_lock(&ce->guc_active.lock); + spin_lock(&ce->guc_state.lock); if (rq->guc_prio != GUC_PRIO_FINI) { if (rq->guc_prio != GUC_PRIO_INIT) sub_context_inflight_prio(ce, rq->guc_prio); @@ -2267,16 +2434,16 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, add_context_inflight_prio(ce, rq->guc_prio); update_context_prio(ce); } - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void guc_retire_inflight_request_prio(struct i915_request *rq) { struct intel_context *ce = rq->context; - spin_lock(&ce->guc_active.lock); + spin_lock(&ce->guc_state.lock); guc_prio_fini(rq, ce); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -2583,12 +2750,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) return ce; } -static void decr_outstanding_submission_g2h(struct intel_guc *guc) -{ - if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) - wake_up_all(&guc->ct.wq); -} - int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) @@ -2607,6 +2768,13 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, trace_intel_context_deregister_done(ce); +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_deregister)) { + ce->drop_deregister = false; + return 0; + } +#endif + if (context_wait_for_deregister_to_register(ce)) { struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; @@ -2652,8 +2820,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, (!context_pending_enable(ce) && !context_pending_disable(ce)))) { drm_err(&guc_to_gt(guc)->i915->drm, - "Bad context sched_state 0x%x, 0x%x, desc_idx %u", - atomic_read(&ce->guc_sched_state_no_lock), + "Bad context sched_state 0x%x, desc_idx %u", ce->guc_state.sched_state, desc_idx); return -EPROTO; } @@ -2661,10 +2828,26 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, trace_intel_context_sched_done(ce); if (context_pending_enable(ce)) { +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_enable)) { + ce->drop_schedule_enable = false; + return 0; + } +#endif + + spin_lock_irqsave(&ce->guc_state.lock, flags); clr_context_pending_enable(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); } else if (context_pending_disable(ce)) { bool banned; +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_disable)) { + ce->drop_schedule_disable = false; + return 0; + } +#endif + /* * Unpin must be done before __guc_signal_context_fence, * otherwise a race exists between the requests getting @@ -2721,7 +2904,12 @@ static void guc_handle_context_reset(struct intel_guc *guc, { trace_intel_context_reset(ce); - if (likely(!intel_context_is_banned(ce))) { + /* + * XXX: Racey if request cancellation has occurred, see comment in + * __guc_reset_context(). + */ + if (likely(!intel_context_is_banned(ce) && + !context_blocked(ce))) { capture_error_state(guc, ce); guc_context_replay(ce); } @@ -2797,33 +2985,47 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine) struct intel_context *ce; struct i915_request *rq; unsigned long index; + unsigned long flags; /* Reset called during driver load? GuC not yet initialised! */ if (unlikely(!guc_submission_initialized(guc))) return; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - if (!intel_context_is_pinned(ce)) + if (!kref_get_unless_zero(&ce->ref)) continue; + xa_unlock(&guc->context_lookup); + + if (!intel_context_is_pinned(ce)) + goto next; + if (intel_engine_is_virtual(ce->engine)) { if (!(ce->engine->mask & engine->mask)) - continue; + goto next; } else { if (ce->engine != engine) - continue; + goto next; } - list_for_each_entry(rq, &ce->guc_active.requests, sched.link) { + list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) continue; intel_engine_set_hung_context(engine, ce); /* Can only cope with one hang at a time... */ - return; + intel_context_put(ce); + xa_lock(&guc->context_lookup); + goto done; } +next: + intel_context_put(ce); + xa_lock(&guc->context_lookup); } +done: + xa_unlock_irqrestore(&guc->context_lookup, flags); } void intel_guc_dump_active_requests(struct intel_engine_cs *engine, @@ -2839,23 +3041,34 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine, if (unlikely(!guc_submission_initialized(guc))) return; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - if (!intel_context_is_pinned(ce)) + if (!kref_get_unless_zero(&ce->ref)) continue; + xa_unlock(&guc->context_lookup); + + if (!intel_context_is_pinned(ce)) + goto next; + if (intel_engine_is_virtual(ce->engine)) { if (!(ce->engine->mask & engine->mask)) - continue; + goto next; } else { if (ce->engine != engine) - continue; + goto next; } - spin_lock_irqsave(&ce->guc_active.lock, flags); - intel_engine_dump_active_requests(&ce->guc_active.requests, + spin_lock(&ce->guc_state.lock); + intel_engine_dump_active_requests(&ce->guc_state.requests, hung_rq, m); - spin_unlock_irqrestore(&ce->guc_active.lock, flags); + spin_unlock(&ce->guc_state.lock); + +next: + intel_context_put(ce); + xa_lock(&guc->context_lookup); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } void intel_guc_submission_print_info(struct intel_guc *guc, @@ -2881,7 +3094,7 @@ void intel_guc_submission_print_info(struct intel_guc *guc, priolist_for_each_request(rq, pl) drm_printf(p, "guc_id=%u, seqno=%llu\n", - rq->context->guc_id, + rq->context->guc_id.id, rq->fence.seqno); } spin_unlock_irqrestore(&sched_engine->lock, flags); @@ -2893,13 +3106,12 @@ static inline void guc_log_context_priority(struct drm_printer *p, { int i; - drm_printf(p, "\t\tPriority: %d\n", - ce->guc_prio); + drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; i < GUC_CLIENT_PRIORITY_NUM; ++i) { drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", - i, ce->guc_prio_count[i]); + i, ce->guc_state.prio_count[i]); } drm_printf(p, "\n"); } @@ -2909,9 +3121,11 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, { struct intel_context *ce; unsigned long index; + unsigned long flags; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id); + drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", ce->ring->head, @@ -2922,13 +3136,13 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", - atomic_read(&ce->guc_id_ref)); - drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n", - ce->guc_state.sched_state, - atomic_read(&ce->guc_sched_state_no_lock)); + atomic_read(&ce->guc_id.ref)); + drm_printf(p, "\t\tSchedule State: 0x%x\n\n", + ce->guc_state.sched_state); guc_log_context_priority(p, ce); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } static struct intel_context * @@ -3036,3 +3250,7 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) return false; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_guc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c new file mode 100644 index 000000000000..fb0e4a7bd8ca --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright �� 2021 Intel Corporation + */ + +#include "selftests/intel_scheduler_helpers.h" + +static struct i915_request *nop_user_request(struct intel_context *ce, + struct i915_request *from) +{ + struct i915_request *rq; + int ret; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + if (from) { + ret = i915_sw_fence_await_dma_fence(&rq->submit, + &from->fence, 0, + I915_FENCE_GFP); + if (ret < 0) { + i915_request_put(rq); + return ERR_PTR(ret); + } + } + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int intel_guc_scrub_ctbs(void *arg) +{ + struct intel_gt *gt = arg; + int ret = 0; + int i; + struct i915_request *last[3] = {NULL, NULL, NULL}, *rq; + intel_wakeref_t wakeref; + struct intel_engine_cs *engine; + struct intel_context *ce; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + engine = intel_selftest_find_any_engine(gt); + + /* Submit requests and inject errors forcing G2H to be dropped */ + for (i = 0; i < 3; ++i) { + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + pr_err("Failed to create context, %d: %d\n", i, ret); + goto err; + } + + switch (i) { + case 0: + ce->drop_schedule_enable = true; + break; + case 1: + ce->drop_schedule_disable = true; + break; + case 2: + ce->drop_deregister = true; + break; + } + + rq = nop_user_request(ce, NULL); + intel_context_put(ce); + + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + pr_err("Failed to create request, %d: %d\n", i, ret); + goto err; + } + + last[i] = rq; + } + + for (i = 0; i < 3; ++i) { + ret = i915_request_wait(last[i], 0, HZ); + if (ret < 0) { + pr_err("Last request failed to complete: %d\n", ret); + goto err; + } + i915_request_put(last[i]); + last[i] = NULL; + } + + /* Force all H2G / G2H to be submitted / processed */ + intel_gt_retire_requests(gt); + msleep(500); + + /* Scrub missing G2H */ + intel_gt_handle_error(engine->gt, -1, 0, "selftest reset"); + + /* GT will not idle if G2H are lost */ + ret = intel_gt_wait_for_idle(gt, HZ); + if (ret < 0) { + pr_err("GT failed to idle: %d\n", ret); + goto err; + } + +err: + for (i = 0; i < 3; ++i) + if (last[i]) + i915_request_put(last[i]); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + + return ret; +} + +int intel_guc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_guc_scrub_ctbs), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) + return 0; + + if (!intel_uc_uses_guc_submission(>->uc)) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c index 7b274c51cac0..6e2ad68f8f3f 100644 --- a/drivers/gpu/drm/i915/i915_buddy.c +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -4,6 +4,7 @@ */ #include +#include #include "i915_buddy.h" @@ -82,6 +83,7 @@ int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size) size = round_down(size, chunk_size); mm->size = size; + mm->avail = size; mm->chunk_size = chunk_size; mm->max_order = ilog2(size) - ilog2(chunk_size); @@ -155,6 +157,8 @@ void i915_buddy_fini(struct i915_buddy_mm *mm) i915_block_free(mm, mm->roots[i]); } + GEM_WARN_ON(mm->avail != mm->size); + kfree(mm->roots); kfree(mm->free_list); } @@ -230,6 +234,7 @@ void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block) { GEM_BUG_ON(!i915_buddy_block_is_allocated(block)); + mm->avail += i915_buddy_block_size(mm, block); __i915_buddy_free(mm, block); } @@ -283,6 +288,7 @@ i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order) } mark_allocated(block); + mm->avail -= i915_buddy_block_size(mm, block); kmemleak_update_trace(block); return block; @@ -368,6 +374,7 @@ int i915_buddy_alloc_range(struct i915_buddy_mm *mm, } mark_allocated(block); + mm->avail -= i915_buddy_block_size(mm, block); list_add_tail(&block->link, &allocated); continue; } @@ -402,6 +409,44 @@ err_free: return err; } +void i915_buddy_block_print(struct i915_buddy_mm *mm, + struct i915_buddy_block *block, + struct drm_printer *p) +{ + u64 start = i915_buddy_block_offset(block); + u64 size = i915_buddy_block_size(mm, block); + + drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size); +} + +void i915_buddy_print(struct i915_buddy_mm *mm, struct drm_printer *p) +{ + int order; + + drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB\n", + mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20); + + for (order = mm->max_order; order >= 0; order--) { + struct i915_buddy_block *block; + u64 count = 0, free; + + list_for_each_entry(block, &mm->free_list[order], link) { + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + count++; + } + + drm_printf(p, "order-%d ", order); + + free = count * (mm->chunk_size << order); + if (free < SZ_1M) + drm_printf(p, "free: %lluKiB", free >> 10); + else + drm_printf(p, "free: %lluMiB", free >> 20); + + drm_printf(p, ", pages: %llu\n", count); + } +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_buddy.c" #endif diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h index 3940d632f208..7077742112ac 100644 --- a/drivers/gpu/drm/i915/i915_buddy.h +++ b/drivers/gpu/drm/i915/i915_buddy.h @@ -10,6 +10,8 @@ #include #include +#include + struct i915_buddy_block { #define I915_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) #define I915_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) @@ -69,6 +71,7 @@ struct i915_buddy_mm { /* Must be at least PAGE_SIZE */ u64 chunk_size; u64 size; + u64 avail; }; static inline u64 @@ -129,6 +132,11 @@ void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block); void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects); +void i915_buddy_print(struct i915_buddy_mm *mm, struct drm_printer *p); +void i915_buddy_block_print(struct i915_buddy_mm *mm, + struct i915_buddy_block *block, + struct drm_printer *p); + void i915_buddy_module_exit(void); int i915_buddy_module_init(void); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 44969f5dde50..04351a851586 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -238,6 +238,7 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) static int i915_gem_object_info(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); + struct drm_printer p = drm_seq_file_printer(m); struct intel_memory_region *mr; enum intel_region_id id; @@ -246,8 +247,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) atomic_read(&i915->mm.free_count), i915->mm.shrink_memory); for_each_memory_region(mr, i915, id) - seq_printf(m, "%s: total:%pa, available:%pa bytes\n", - mr->name, &mr->total, &mr->avail); + intel_memory_region_debug(mr, &p); return 0; } @@ -420,13 +420,11 @@ static int i915_frequency_info(struct seq_file *m, void *unused) int max_freq; rp_state_limits = intel_uncore_read(&dev_priv->uncore, GEN6_RP_STATE_LIMITS); - if (IS_GEN9_LP(dev_priv)) { - rp_state_cap = intel_uncore_read(&dev_priv->uncore, BXT_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); + if (IS_GEN9_LP(dev_priv)) gt_perf_status = intel_uncore_read(&dev_priv->uncore, BXT_GT_PERF_STATUS); - } else { - rp_state_cap = intel_uncore_read(&dev_priv->uncore, GEN6_RP_STATE_CAP); + else gt_perf_status = intel_uncore_read(&dev_priv->uncore, GEN6_GT_PERF_STATUS); - } /* RPSTAT1 is in the GT power well */ intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a8129153d1db..37c1ca266bcd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1859,11 +1859,11 @@ i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) { struct i915_address_space *vm; - rcu_read_lock(); + xa_lock(&file_priv->vm_xa); vm = xa_load(&file_priv->vm_xa, id); - if (vm && !kref_get_unless_zero(&vm->ref)) - vm = NULL; - rcu_read_unlock(); + if (vm) + kref_get(&vm->ref); + xa_unlock(&file_priv->vm_xa); return vm; } diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h b/drivers/gpu/drm/i915/i915_gem_ww.h index f6b1a796667b..86f0fe343de6 100644 --- a/drivers/gpu/drm/i915/i915_gem_ww.h +++ b/drivers/gpu/drm/i915/i915_gem_ww.h @@ -11,8 +11,7 @@ struct i915_gem_ww_ctx { struct ww_acquire_ctx ctx; struct list_head obj_list; struct drm_i915_gem_object *contended; - unsigned short intr; - unsigned short loop; + bool intr; }; void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); @@ -20,31 +19,23 @@ void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); -/* Internal functions used by the inlines! Don't use. */ +/* Internal function used by the inlines! Don't use. */ static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err) { - ww->loop = 0; if (err == -EDEADLK) { err = i915_gem_ww_ctx_backoff(ww); if (!err) - ww->loop = 1; + err = -EDEADLK; } - if (!ww->loop) + if (err != -EDEADLK) i915_gem_ww_ctx_fini(ww); return err; } -static inline void -__i915_gem_ww_init(struct i915_gem_ww_ctx *ww, bool intr) -{ - i915_gem_ww_ctx_init(ww, intr); - ww->loop = 1; -} - -#define for_i915_gem_ww(_ww, _err, _intr) \ - for (__i915_gem_ww_init(_ww, _intr); (_ww)->loop; \ - _err = __i915_gem_ww_fini(_ww, _err)) - +#define for_i915_gem_ww(_ww, _err, _intr) \ + for (i915_gem_ww_ctx_init(_ww, _intr), (_err) = -EDEADLK; \ + (_err) == -EDEADLK; \ + (_err) = __i915_gem_ww_fini(_ww, _err)) #endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9cf6ac575de1..b9f66dbd46bb 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -431,6 +431,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, const struct sseu_dev_info *sseu = &ee->engine->gt->info.sseu; int slice; int subslice; + int iter; err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone.instdone); @@ -444,19 +445,38 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) <= 6) return; - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) - err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.sampler[slice][subslice]); + if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 50)) { + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.sampler[slice][subslice]); - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) - err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.row[slice][subslice]); + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.row[slice][subslice]); + } else { + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) + err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.sampler[slice][subslice]); + + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) + err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.row[slice][subslice]); + } if (GRAPHICS_VER(m->i915) < 12) return; + if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 55)) { + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " GEOM_SVGUNIT_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.geom_svg[slice][subslice]); + } + err_printf(m, " SC_INSTDONE_EXTRA: 0x%08x\n", ee->instdone.slice_common_extra[0]); err_printf(m, " SC_INSTDONE_EXTRA2: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c index d8b4482c69d0..ab2295dd4500 100644 --- a/drivers/gpu/drm/i915/i915_module.c +++ b/drivers/gpu/drm/i915/i915_module.c @@ -67,8 +67,8 @@ static const struct { { .init = i915_mock_selftests }, { .init = i915_pmu_init, .exit = i915_pmu_exit }, - { .init = i915_register_pci_driver, - .exit = i915_unregister_pci_driver }, + { .init = i915_pci_register_driver, + .exit = i915_pci_unregister_driver }, { .init = i915_perf_sysctl_register, .exit = i915_perf_sysctl_unregister }, }; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 52c04369559f..d4a6a9dcf182 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -894,7 +894,7 @@ static const struct intel_device_info rkl_info = { .has_snoop = 1, \ .is_dgfx = 1 -static const struct intel_device_info dg1_info __maybe_unused = { +static const struct intel_device_info dg1_info = { GEN12_FEATURES, DGFX_FEATURES, .graphics_rel = 10, @@ -912,7 +912,6 @@ static const struct intel_device_info adl_s_info = { GEN12_FEATURES, PLATFORM(INTEL_ALDERLAKE_S), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), - .require_force_probe = 1, .display.has_hti = 1, .display.has_psr_hw_tracking = 0, .platform_engine_mask = @@ -1115,6 +1114,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_RKL_IDS(&rkl_info), INTEL_ADLS_IDS(&adl_s_info), INTEL_ADLP_IDS(&adl_p_info), + INTEL_DG1_IDS(&dg1_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); @@ -1234,12 +1234,12 @@ static struct pci_driver i915_pci_driver = { .driver.pm = &i915_pm_ops, }; -int i915_register_pci_driver(void) +int i915_pci_register_driver(void) { return pci_register_driver(&i915_pci_driver); } -void i915_unregister_pci_driver(void) +void i915_pci_unregister_driver(void) { pci_unregister_driver(&i915_pci_driver); } diff --git a/drivers/gpu/drm/i915/i915_pci.h b/drivers/gpu/drm/i915/i915_pci.h index b386f319f52e..ee048c238174 100644 --- a/drivers/gpu/drm/i915/i915_pci.h +++ b/drivers/gpu/drm/i915/i915_pci.h @@ -1,8 +1,12 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2021 Intel Corporation */ -int i915_register_pci_driver(void); -void i915_unregister_pci_driver(void); +#ifndef __I915_PCI_H__ +#define __I915_PCI_H__ + +int i915_pci_register_driver(void); +void i915_pci_unregister_driver(void); + +#endif /* __I915_PCI_H__ */ diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index e49da36c62fb..5e2b909827f4 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -432,9 +432,6 @@ static int query_memregion_info(struct drm_i915_private *i915, u32 total_length; int ret, id, i; - if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) - return -ENODEV; - if (query_item->flags != 0) return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c8de2b4c6a9b..251d6ddebcff 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2555,6 +2555,32 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_HWS_PGA(base) _MMIO((base) + 0x80) #define RING_ID(base) _MMIO((base) + 0x8c) #define RING_HWS_PGA_GEN6(base) _MMIO((base) + 0x2080) + +#define RING_CMD_CCTL(base) _MMIO((base) + 0xc4) +/* + * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. + * The lsb of each can be considered a separate enabling bit for encryption. + * 6:0 == default MOCS value for reads => 6:1 == table index for reads. + * 13:7 == default MOCS value for writes => 13:8 == table index for writes. + * 15:14 == Reserved => 31:30 are set to 0. + */ +#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 7) +#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 0) +#define CMD_CCTL_MOCS_MASK (CMD_CCTL_WRITE_OVERRIDE_MASK | \ + CMD_CCTL_READ_OVERRIDE_MASK) +#define CMD_CCTL_MOCS_OVERRIDE(write, read) \ + (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ + REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) + +#define BLIT_CCTL(base) _MMIO((base) + 0x204) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 8) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 0) +#define BLIT_CCTL_MASK (BLIT_CCTL_DST_MOCS_MASK | \ + BLIT_CCTL_SRC_MOCS_MASK) +#define BLIT_CCTL_MOCS(dst, src) \ + (REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \ + REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) @@ -2690,6 +2716,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define XEHPG_INSTDONE_GEOM_SVG _MMIO(0x666c) #define MCFG_MCR_SELECTOR _MMIO(0xfd0) #define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) @@ -2824,6 +2851,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MI_MODE _MMIO(0x209c) # define VS_TIMER_DISPATCH (1 << 6) # define MI_FLUSH_ENABLE (1 << 12) +# define TGL_NESTED_BB_EN (1 << 12) # define ASYNC_FLIP_PERF_DISABLE (1 << 14) # define MODE_IDLE (1 << 9) # define STOP_RING (1 << 8) @@ -3154,7 +3182,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) -#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) +#define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913C) +#define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) #define XEHP_EU_ENABLE _MMIO(0x9134) #define XEHP_EU_ENA_MASK 0xFF @@ -4117,6 +4146,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RPN_CAP_MASK REG_GENMASK(23, 16) #define BXT_RP_STATE_CAP _MMIO(0x138170) #define GEN9_RP_STATE_LIMITS _MMIO(0x138148) +#define XEHPSDV_RP_STATE_CAP _MMIO(0x250014) /* * Logical Context regs diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 1bc1349ba3c2..7bd9ed20623e 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -218,6 +218,11 @@ struct i915_request { }; struct llist_head execute_cb; struct i915_sw_fence semaphore; + /** + * @submit_work: complete submit fence from an IRQ if needed for + * locking hierarchy reasons. + */ + struct irq_work submit_work; /* * A list of everyone we wait upon, and everyone who waits upon us. @@ -285,18 +290,23 @@ struct i915_request { struct hrtimer timer; } watchdog; - /* - * Requests may need to be stalled when using GuC submission waiting for - * certain GuC operations to complete. If that is the case, stalled - * requests are added to a per context list of stalled requests. The - * below list_head is the link in that list. + /** + * @guc_fence_link: Requests may need to be stalled when using GuC + * submission waiting for certain GuC operations to complete. If that is + * the case, stalled requests are added to a per context list of stalled + * requests. The below list_head is the link in that list. Protected by + * ce->guc_state.lock. */ struct list_head guc_fence_link; /** - * Priority level while the request is inflight. Differs from i915 - * scheduler priority. See comment above - * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. + * @guc_prio: Priority level while the request is in flight. Differs + * from i915 scheduler priority. See comment above + * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. Protected by + * ce->guc_active.lock. Two special values (GUC_PRIO_INIT and + * GUC_PRIO_FINI) outside the GuC priority range are used to indicate + * if the priority has not been initialized yet or if no more updates + * are possible because the request has completed. */ #define GUC_PRIO_INIT 0xff #define GUC_PRIO_FINI 0xfe diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 63fec1c3c132..8104981a6604 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -900,23 +900,19 @@ DECLARE_EVENT_CLASS(intel_context, __field(u32, guc_id) __field(int, pin_count) __field(u32, sched_state) - __field(u32, guc_sched_state_no_lock) __field(u8, guc_prio) ), TP_fast_assign( - __entry->guc_id = ce->guc_id; + __entry->guc_id = ce->guc_id.id; __entry->pin_count = atomic_read(&ce->pin_count); __entry->sched_state = ce->guc_state.sched_state; - __entry->guc_sched_state_no_lock = - atomic_read(&ce->guc_sched_state_no_lock); - __entry->guc_prio = ce->guc_prio; + __entry->guc_prio = ce->guc_state.prio; ), - TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x, guc_prio=%u", + TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x, guc_prio=%u", __entry->guc_id, __entry->pin_count, __entry->sched_state, - __entry->guc_sched_state_no_lock, __entry->guc_prio) ); @@ -1243,7 +1239,7 @@ DECLARE_EVENT_CLASS(i915_context, TP_fast_assign( __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; - __entry->vm = rcu_access_pointer(ctx->vm); + __entry->vm = ctx->vm; ), TP_printk("dev=%u, ctx=%p, ctx_vm=%p", diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 6877362f6b85..d59fbb019032 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -126,12 +126,30 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man, kfree(bman_res); } +static void i915_ttm_buddy_man_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_buddy_block *block; + + mutex_lock(&bman->lock); + drm_printf(printer, "default_page_size: %lluKiB\n", + bman->default_page_size >> 10); + + i915_buddy_print(&bman->mm, printer); + + drm_printf(printer, "reserved:\n"); + list_for_each_entry(block, &bman->reserved, link) + i915_buddy_block_print(&bman->mm, block, printer); + mutex_unlock(&bman->lock); +} + static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { .alloc = i915_ttm_buddy_man_alloc, .free = i915_ttm_buddy_man_free, + .debug = i915_ttm_buddy_man_debug, }; - /** * i915_ttm_buddy_man_init - Setup buddy allocator based ttm manager * @bdev: The ttm device diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 779eb2fa90b6..e7f7e6627750 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -78,6 +78,18 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, return i915_ttm_buddy_man_reserve(man, offset, size); } +void intel_memory_region_debug(struct intel_memory_region *mr, + struct drm_printer *printer) +{ + drm_printf(printer, "%s: ", mr->name); + + if (mr->region_private) + ttm_resource_manager_debug(mr->region_private, printer); + else + drm_printf(printer, "total:%pa, available:%pa bytes\n", + &mr->total, &mr->avail); +} + struct intel_memory_region * intel_memory_region_create(struct drm_i915_private *i915, resource_size_t start, diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 1f2b96efa69d..3feae3353d33 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -15,6 +15,7 @@ struct drm_i915_private; struct drm_i915_gem_object; +struct drm_printer; struct intel_memory_region; struct sg_table; struct ttm_resource; @@ -127,6 +128,9 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size); +void intel_memory_region_debug(struct intel_memory_region *mr, + struct drm_printer *printer); + struct intel_memory_region * i915_gem_ttm_system_setup(struct drm_i915_private *i915, u16 type, u16 instance); diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6b38bc2811c1..f9767054dbdf 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -64,7 +64,7 @@ static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug) static const char * const forcewake_domain_names[] = { "render", - "blitter", + "gt", "media", "vdbox0", "vdbox1", @@ -945,105 +945,92 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { #define __gen11_fwtable_reg_read_fw_domains(uncore, offset) \ find_fw_domain(uncore, offset) -#define __gen12_fwtable_reg_read_fw_domains(uncore, offset) \ - find_fw_domain(uncore, offset) - /* *Must* be sorted by offset! See intel_shadow_table_check(). */ -static const i915_reg_t gen8_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(GEN6_BSD_RING_BASE), /* 0x12000 (base) */ - RING_TAIL(VEBOX_RING_BASE), /* 0x1a000 (base) */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ +static const struct i915_range gen8_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x12030, .end = 0x12030 }, + { .start = 0x1a030, .end = 0x1a030 }, + { .start = 0x22030, .end = 0x22030 }, /* TODO: Other registers are not yet used */ }; -static const i915_reg_t gen11_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ - /* TODO: Other registers are not yet used */ +static const struct i915_range gen11_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2550, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22230, .end = 0x22230 }, + { .start = 0x22510, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0230, .end = 0x1C0230 }, + { .start = 0x1C0510, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4230, .end = 0x1C4230 }, + { .start = 0x1C4510, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8230, .end = 0x1C8230 }, + { .start = 0x1C8510, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0230, .end = 0x1D0230 }, + { .start = 0x1D0510, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4230, .end = 0x1D4230 }, + { .start = 0x1D4510, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8230, .end = 0x1D8230 }, + { .start = 0x1D8510, .end = 0x1D8550 }, }; -static const i915_reg_t gen12_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ - /* TODO: Other registers are not yet used */ +static const struct i915_range gen12_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2510, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0xA188, .end = 0xA188 }, + { .start = 0xA278, .end = 0xA278 }, + { .start = 0xA540, .end = 0xA56C }, + { .start = 0xC4C8, .end = 0xC4C8 }, + { .start = 0xC4D4, .end = 0xC4D4 }, + { .start = 0xC600, .end = 0xC600 }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22510, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0510, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4510, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8510, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0510, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4510, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8510, .end = 0x1D8550 }, + + /* + * The rest of these ranges are specific to Xe_HP and beyond, but + * are reserved/unused ranges on earlier gen12 platforms, so they can + * be safely added to the gen12 table. + */ + { .start = 0x1E0030, .end = 0x1E0030 }, + { .start = 0x1E0510, .end = 0x1E0550 }, + { .start = 0x1E4030, .end = 0x1E4030 }, + { .start = 0x1E4510, .end = 0x1E4550 }, + { .start = 0x1E8030, .end = 0x1E8030 }, + { .start = 0x1E8510, .end = 0x1E8550 }, + { .start = 0x1F0030, .end = 0x1F0030 }, + { .start = 0x1F0510, .end = 0x1F0550 }, + { .start = 0x1F4030, .end = 0x1F4030 }, + { .start = 0x1F4510, .end = 0x1F4550 }, + { .start = 0x1F8030, .end = 0x1F8030 }, + { .start = 0x1F8510, .end = 0x1F8550 }, }; -static const i915_reg_t xehp_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ - RING_TAIL(XEHP_BSD5_RING_BASE), /* 0x1E0000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD5_RING_BASE), /* 0x1E0550 */ - RING_TAIL(XEHP_BSD6_RING_BASE), /* 0x1E4000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD6_RING_BASE), /* 0x1E4550 */ - RING_TAIL(XEHP_VEBOX3_RING_BASE), /* 0x1E8000 (base) */ - RING_EXECLIST_CONTROL(XEHP_VEBOX3_RING_BASE), /* 0x1E8550 */ - RING_TAIL(XEHP_BSD7_RING_BASE), /* 0x1F0000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD7_RING_BASE), /* 0x1F0550 */ - RING_TAIL(XEHP_BSD8_RING_BASE), /* 0x1F4000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD8_RING_BASE), /* 0x1F4550 */ - RING_TAIL(XEHP_VEBOX4_RING_BASE), /* 0x1F8000 (base) */ - RING_EXECLIST_CONTROL(XEHP_VEBOX4_RING_BASE), /* 0x1F8550 */ - /* TODO: Other registers are not yet used */ -}; - -static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) +static int mmio_range_cmp(u32 key, const struct i915_range *range) { - u32 offset = i915_mmio_reg_offset(*reg); - - if (key < offset) + if (key < range->start) return -1; - else if (key > offset) + else if (key > range->end) return 1; else return 0; @@ -1052,15 +1039,14 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) #define __is_X_shadowed(x) \ static bool is_##x##_shadowed(u32 offset) \ { \ - const i915_reg_t *regs = x##_shadowed_regs; \ + const struct i915_range *regs = x##_shadowed_regs; \ return BSEARCH(offset, regs, ARRAY_SIZE(x##_shadowed_regs), \ - mmio_reg_cmp); \ + mmio_range_cmp); \ } __is_X_shadowed(gen8) __is_X_shadowed(gen11) __is_X_shadowed(gen12) -__is_X_shadowed(xehp) static enum forcewake_domains gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) @@ -1124,15 +1110,6 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) -#define __xehp_fwtable_reg_write_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - const u32 __offset = (offset); \ - if (!is_xehp_shadowed(__offset)) \ - __fwd = find_fw_domain(uncore, __offset); \ - __fwd; \ -}) - /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_GT), @@ -1644,7 +1621,6 @@ __gen_read(func, 16) \ __gen_read(func, 32) \ __gen_read(func, 64) -__gen_reg_read_funcs(gen12_fwtable); __gen_reg_read_funcs(gen11_fwtable); __gen_reg_read_funcs(fwtable); __gen_reg_read_funcs(gen6); @@ -1736,7 +1712,6 @@ __gen_write(func, 8) \ __gen_write(func, 16) \ __gen_write(func, 32) -__gen_reg_write_funcs(xehp_fwtable); __gen_reg_write_funcs(gen12_fwtable); __gen_reg_write_funcs(gen11_fwtable); __gen_reg_write_funcs(fwtable); @@ -2113,16 +2088,16 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) == 11) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 3c0b0a8b5250..531665b08039 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -119,6 +119,12 @@ struct intel_forcewake_range { enum forcewake_domains domains; }; +/* Other register ranges (e.g., shadow tables, MCR tables, etc.) */ +struct i915_range { + u32 start; + u32 end; +}; + struct intel_uncore { void __iomem *regs; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f843a5040706..2d60a5a5b065 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1300,7 +1300,7 @@ static int exercise_mock(struct drm_i915_private *i915, if (!ctx) return -ENOMEM; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); err = func(vm, 0, min(vm->total, limit), end_time); i915_vm_put(vm); @@ -1848,7 +1848,7 @@ static int igt_cs_tlb(void *arg) goto out_unlock; } - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); if (i915_is_ggtt(vm)) goto out_vm; diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index cfa5c4165a4f..3cf6758931f9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -47,5 +47,6 @@ selftest(execlists, intel_execlists_live_selftests) selftest(ring_submission, intel_ring_submission_live_selftests) selftest(perf, i915_perf_live_selftests) selftest(slpc, intel_slpc_live_selftests) +selftest(guc, intel_guc_live_selftests) /* Here be dragons: keep last to run last! */ selftest(late_gt_pm, intel_gt_pm_late_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index dd0607254a95..1f10fe36619b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -39,7 +39,7 @@ static bool assert_vma(struct i915_vma *vma, { bool ok = true; - if (vma->vm != rcu_access_pointer(ctx->vm)) { + if (vma->vm != ctx->vm) { pr_err("VMA created with wrong VM\n"); ok = false; } @@ -118,7 +118,7 @@ static int create_vmas(struct drm_i915_private *i915, struct i915_vma *vma; int err; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = checked_vma_instance(obj, vm, NULL); i915_vm_put(vm); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c index 4b328346b48a..310fb83c527e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c @@ -14,6 +14,18 @@ #define REDUCED_PREEMPT 10 #define WAIT_FOR_RESET_TIME 10000 +struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + return engine; + + pr_err("No valid engine found!\n"); + return NULL; +} + int intel_selftest_modify_policy(struct intel_engine_cs *engine, struct intel_selftest_saved_policy *saved, u32 modify_type) diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h index 35c098601ac0..ae60bb507f45 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h @@ -10,6 +10,7 @@ struct i915_request; struct intel_engine_cs; +struct intel_gt; struct intel_selftest_saved_policy { u32 flags; @@ -23,6 +24,7 @@ enum selftest_scheduler_modify { SELFTEST_SCHEDULER_MODIFY_FAST_RESET, }; +struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt); int intel_selftest_modify_policy(struct intel_engine_cs *engine, struct intel_selftest_saved_policy *saved, enum selftest_scheduler_modify modify_type); diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 720b60853f8b..22ef2c87df1a 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -62,30 +62,39 @@ static int intel_fw_table_check(const struct intel_forcewake_range *ranges, static int intel_shadow_table_check(void) { struct { - const i915_reg_t *regs; + const struct i915_range *regs; unsigned int size; - } reg_lists[] = { + } range_lists[] = { { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, - { xehp_shadowed_regs, ARRAY_SIZE(xehp_shadowed_regs) }, }; - const i915_reg_t *reg; + const struct i915_range *range; unsigned int i, j; s32 prev; - for (j = 0; j < ARRAY_SIZE(reg_lists); ++j) { - reg = reg_lists[j].regs; - for (i = 0, prev = -1; i < reg_lists[j].size; i++, reg++) { - u32 offset = i915_mmio_reg_offset(*reg); - - if (prev >= (s32)offset) { - pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", - __func__, i, offset, prev); + for (j = 0; j < ARRAY_SIZE(range_lists); ++j) { + range = range_lists[j].regs; + for (i = 0, prev = -1; i < range_lists[j].size; i++, range++) { + if (range->end < range->start) { + pr_err("%s: range[%d]:(%06x-%06x) has end before start\n", + __func__, i, range->start, range->end); return -EINVAL; } - prev = offset; + if (prev >= (s32)range->start) { + pr_err("%s: range[%d]:(%06x-%06x) is before end of previous (%06x)\n", + __func__, i, range->start, range->end, prev); + return -EINVAL; + } + + if (range->start % 4) { + pr_err("%s: range[%d]:(%06x-%06x) has non-dword-aligned start\n", + __func__, i, range->start, range->end); + return -EINVAL; + } + + prev = range->end; } } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index abf2d7a4fdf1..c1a6fff4135b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -78,22 +78,21 @@ void ttm_mem_io_free(struct ttm_device *bdev, /** * ttm_move_memcpy - Helper to perform a memcpy ttm move operation. - * @bo: The struct ttm_buffer_object. - * @new_mem: The struct ttm_resource we're moving to (copy destination). - * @new_iter: A struct ttm_kmap_iter representing the destination resource. + * @clear: Whether to clear rather than copy. + * @num_pages: Number of pages of the operation. + * @dst_iter: A struct ttm_kmap_iter representing the destination resource. * @src_iter: A struct ttm_kmap_iter representing the source resource. * * This function is intended to be able to move out async under a * dma-fence if desired. */ -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter) { const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops; const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; - struct ttm_tt *ttm = bo->ttm; struct dma_buf_map src_map, dst_map; pgoff_t i; @@ -102,10 +101,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, return; /* Don't move nonexistent data. Clear destination instead. */ - if (src_ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm))) { - if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return; - + if (clear) { for (i = 0; i < num_pages; ++i) { dst_ops->map_local(dst_iter, &dst_map, i); if (dst_map.is_iomem) @@ -148,6 +144,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_kmap_iter_linear_io io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter; + bool clear; int ret = 0; if (ttm && ((ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) || @@ -171,7 +168,9 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, goto out_src_iter; } - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); if (!src_iter->ops->maps_tt) ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, src_mem); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 68d6069572aa..5f087575194b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -322,7 +322,7 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct ttm_resource *mem); */ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter);