From ce7e75c7ef1bf8ea3d947da8c674d2f40fd7d734 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Wed, 28 Jul 2021 12:21:00 -0700 Subject: [PATCH 01/85] drm/i915: Disable bonding on gen12+ platforms Disable bonding on gen12+ platforms aside from ones already supported by the i915 - TGL, RKL, and ADL-S. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Acked-by: Daniel Vetter Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210728192100.132425-1-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index cff72679ad7c..dbaeb924a437 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -442,6 +442,13 @@ set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data) u16 idx, num_bonds; int err, n; + if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915) && + !IS_ROCKETLAKE(i915) && !IS_ALDERLAKE_S(i915)) { + drm_dbg(&i915->drm, + "Bonding on gen12+ aside from TGL, RKL, and ADL_S not supported\n"); + return -ENODEV; + } + if (get_user(idx, &ext->virtual_index)) return -EFAULT; From bc33e71f00a7491810cac9e1335ca97e889d5620 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:13 -0700 Subject: [PATCH 02/85] drm/i915: correct name of GT forcewake domain in error messages For historical reasons, the GT forcewake domain used to be referred to as the "blitter" domain; that name is no longer accurate since the GT domain contains a lot of additional registers and functionality besides just the blitter. Although we renamed the domain in the driver in commit 55e3c170950f ("drm/i915: Rename FORCEWAKE_BLITTER to FORCEWAKE_GT"), we neglected to update the string that gets printed in driver error messages; let's do that now to avoid confusion. Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 0c35acfcd6da..13d069823635 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -64,7 +64,7 @@ static void mmio_debug_resume(struct intel_uncore_mmio_debug *mmio_debug) static const char * const forcewake_domain_names[] = { "render", - "blitter", + "gt", "media", "vdbox0", "vdbox1", From 39afa4104bedf214e5779ef20655665723ad48cd Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:14 -0700 Subject: [PATCH 03/85] drm/i915: Re-use gen11 forcewake read functions on gen12 The forcewake read logic is identical between gen11 and gen12, only the forcewake table data (which is tracked separately) differs; there's no need to generate a separate set of gen12 read functions when the gen11 functions will work just as well. We'll keep the separate write functions for now since the generated code directly references different shadow tables between the two platforms. Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 13d069823635..9ed7ce71e520 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -945,9 +945,6 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { #define __gen11_fwtable_reg_read_fw_domains(uncore, offset) \ find_fw_domain(uncore, offset) -#define __gen12_fwtable_reg_read_fw_domains(uncore, offset) \ - find_fw_domain(uncore, offset) - /* *Must* be sorted by offset! See intel_shadow_table_check(). */ static const i915_reg_t gen8_shadowed_regs[] = { RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ @@ -1644,7 +1641,6 @@ __gen_read(func, 16) \ __gen_read(func, 32) \ __gen_read(func, 64) -__gen_reg_read_funcs(gen12_fwtable); __gen_reg_read_funcs(gen11_fwtable); __gen_reg_read_funcs(fwtable); __gen_reg_read_funcs(gen6); @@ -2122,7 +2118,7 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); - ASSIGN_READ_MMIO_VFUNCS(uncore, gen12_fwtable); + ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) == 11) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen11_fw_ranges); ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen11_fwtable); From f9d56cd64ef3186d6ce072751f7f44dcd189f6bc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 29 Jul 2021 08:21:58 -0700 Subject: [PATCH 04/85] drm/i915: Make shadow tables range-based Rather than defining our shadow tables as a list of individual registers, provide them as a list of register ranges; we'll have some ranges of multiple registers being added soon (and we already have a couple adjacent registers that we can squash into a single range now). This change also defines the table with hex literal values rather than symbolic register names; since that's how the tables are defined in the bspec, this change will make it easier to review the tables overall. v2: - Force signed comparison on range overlap sanity check Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729152158.2646246-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 +- drivers/gpu/drm/i915/intel_uncore.c | 160 +++++++++--------- drivers/gpu/drm/i915/intel_uncore.h | 6 + drivers/gpu/drm/i915/selftests/intel_uncore.c | 34 ++-- 4 files changed, 109 insertions(+), 104 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index aae609d7d85d..e9b8af9f08ea 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -2067,12 +2067,7 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(engine->gt, &engine->wa_list); } -struct mcr_range { - u32 start; - u32 end; -}; - -static const struct mcr_range mcr_ranges_gen8[] = { +static const struct i915_range mcr_ranges_gen8[] = { { .start = 0x5500, .end = 0x55ff }, { .start = 0x7000, .end = 0x7fff }, { .start = 0x9400, .end = 0x97ff }, @@ -2081,7 +2076,7 @@ static const struct mcr_range mcr_ranges_gen8[] = { {}, }; -static const struct mcr_range mcr_ranges_gen12[] = { +static const struct i915_range mcr_ranges_gen12[] = { { .start = 0x8150, .end = 0x815f }, { .start = 0x9520, .end = 0x955f }, { .start = 0xb100, .end = 0xb3ff }, @@ -2090,7 +2085,7 @@ static const struct mcr_range mcr_ranges_gen12[] = { {}, }; -static const struct mcr_range mcr_ranges_xehp[] = { +static const struct i915_range mcr_ranges_xehp[] = { { .start = 0x4000, .end = 0x4aff }, { .start = 0x5200, .end = 0x52ff }, { .start = 0x5400, .end = 0x7fff }, @@ -2109,7 +2104,7 @@ static const struct mcr_range mcr_ranges_xehp[] = { static bool mcr_range(struct drm_i915_private *i915, u32 offset) { - const struct mcr_range *mcr_ranges; + const struct i915_range *mcr_ranges; int i; if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 9ed7ce71e520..52601b960248 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -946,101 +946,95 @@ static const struct intel_forcewake_range __vlv_fw_ranges[] = { find_fw_domain(uncore, offset) /* *Must* be sorted by offset! See intel_shadow_table_check(). */ -static const i915_reg_t gen8_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(GEN6_BSD_RING_BASE), /* 0x12000 (base) */ - RING_TAIL(VEBOX_RING_BASE), /* 0x1a000 (base) */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ +static const struct i915_range gen8_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x12030, .end = 0x12030 }, + { .start = 0x1a030, .end = 0x1a030 }, + { .start = 0x22030, .end = 0x22030 }, /* TODO: Other registers are not yet used */ }; -static const i915_reg_t gen11_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ +static const struct i915_range gen11_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2550, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22550, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8550, .end = 0x1D8550 }, /* TODO: Other registers are not yet used */ }; -static const i915_reg_t gen12_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ +static const struct i915_range gen12_shadowed_regs[] = { + { .start = 0x2030, .end = 0x2030 }, + { .start = 0x2550, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22550, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8550, .end = 0x1D8550 }, /* TODO: Other registers are not yet used */ }; -static const i915_reg_t xehp_shadowed_regs[] = { - RING_TAIL(RENDER_RING_BASE), /* 0x2000 (base) */ - RING_EXECLIST_CONTROL(RENDER_RING_BASE), /* 0x2550 */ - GEN6_RPNSWREQ, /* 0xA008 */ - GEN6_RC_VIDEO_FREQ, /* 0xA00C */ - RING_TAIL(BLT_RING_BASE), /* 0x22000 (base) */ - RING_EXECLIST_CONTROL(BLT_RING_BASE), /* 0x22550 */ - RING_TAIL(GEN11_BSD_RING_BASE), /* 0x1C0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD_RING_BASE), /* 0x1C0550 */ - RING_TAIL(GEN11_BSD2_RING_BASE), /* 0x1C4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD2_RING_BASE), /* 0x1C4550 */ - RING_TAIL(GEN11_VEBOX_RING_BASE), /* 0x1C8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX_RING_BASE), /* 0x1C8550 */ - RING_TAIL(GEN11_BSD3_RING_BASE), /* 0x1D0000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD3_RING_BASE), /* 0x1D0550 */ - RING_TAIL(GEN11_BSD4_RING_BASE), /* 0x1D4000 (base) */ - RING_EXECLIST_CONTROL(GEN11_BSD4_RING_BASE), /* 0x1D4550 */ - RING_TAIL(GEN11_VEBOX2_RING_BASE), /* 0x1D8000 (base) */ - RING_EXECLIST_CONTROL(GEN11_VEBOX2_RING_BASE), /* 0x1D8550 */ - RING_TAIL(XEHP_BSD5_RING_BASE), /* 0x1E0000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD5_RING_BASE), /* 0x1E0550 */ - RING_TAIL(XEHP_BSD6_RING_BASE), /* 0x1E4000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD6_RING_BASE), /* 0x1E4550 */ - RING_TAIL(XEHP_VEBOX3_RING_BASE), /* 0x1E8000 (base) */ - RING_EXECLIST_CONTROL(XEHP_VEBOX3_RING_BASE), /* 0x1E8550 */ - RING_TAIL(XEHP_BSD7_RING_BASE), /* 0x1F0000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD7_RING_BASE), /* 0x1F0550 */ - RING_TAIL(XEHP_BSD8_RING_BASE), /* 0x1F4000 (base) */ - RING_EXECLIST_CONTROL(XEHP_BSD8_RING_BASE), /* 0x1F4550 */ - RING_TAIL(XEHP_VEBOX4_RING_BASE), /* 0x1F8000 (base) */ - RING_EXECLIST_CONTROL(XEHP_VEBOX4_RING_BASE), /* 0x1F8550 */ +static const struct i915_range xehp_shadowed_regs[] = { + { .start = 0x2000, .end = 0x2030 }, + { .start = 0x2550, .end = 0x2550 }, + { .start = 0xA008, .end = 0xA00C }, + { .start = 0x22030, .end = 0x22030 }, + { .start = 0x22550, .end = 0x22550 }, + { .start = 0x1C0030, .end = 0x1C0030 }, + { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C4030, .end = 0x1C4030 }, + { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C8030, .end = 0x1C8030 }, + { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1D0030, .end = 0x1D0030 }, + { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D4030, .end = 0x1D4030 }, + { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D8030, .end = 0x1D8030 }, + { .start = 0x1D8550, .end = 0x1D8550 }, + { .start = 0x1E0030, .end = 0x1E0030 }, + { .start = 0x1E0550, .end = 0x1E0550 }, + { .start = 0x1E4030, .end = 0x1E4030 }, + { .start = 0x1E4550, .end = 0x1E4550 }, + { .start = 0x1E8030, .end = 0x1E8030 }, + { .start = 0x1E8550, .end = 0x1E8550 }, + { .start = 0x1F0030, .end = 0x1F0030 }, + { .start = 0x1F0550, .end = 0x1F0550 }, + { .start = 0x1F4030, .end = 0x1F4030 }, + { .start = 0x1F4550, .end = 0x1F4550 }, + { .start = 0x1F8030, .end = 0x1F8030 }, + { .start = 0x1F8550, .end = 0x1F8550 }, /* TODO: Other registers are not yet used */ }; -static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) +static int mmio_range_cmp(u32 key, const struct i915_range *range) { - u32 offset = i915_mmio_reg_offset(*reg); - - if (key < offset) + if (key < range->start) return -1; - else if (key > offset) + else if (key > range->end) return 1; else return 0; @@ -1049,9 +1043,9 @@ static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) #define __is_X_shadowed(x) \ static bool is_##x##_shadowed(u32 offset) \ { \ - const i915_reg_t *regs = x##_shadowed_regs; \ + const struct i915_range *regs = x##_shadowed_regs; \ return BSEARCH(offset, regs, ARRAY_SIZE(x##_shadowed_regs), \ - mmio_reg_cmp); \ + mmio_range_cmp); \ } __is_X_shadowed(gen8) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 3c0b0a8b5250..531665b08039 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -119,6 +119,12 @@ struct intel_forcewake_range { enum forcewake_domains domains; }; +/* Other register ranges (e.g., shadow tables, MCR tables, etc.) */ +struct i915_range { + u32 start; + u32 end; +}; + struct intel_uncore { void __iomem *regs; diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 720b60853f8b..d6a9c11afa23 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -62,30 +62,40 @@ static int intel_fw_table_check(const struct intel_forcewake_range *ranges, static int intel_shadow_table_check(void) { struct { - const i915_reg_t *regs; + const struct i915_range *regs; unsigned int size; - } reg_lists[] = { + } range_lists[] = { { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, { xehp_shadowed_regs, ARRAY_SIZE(xehp_shadowed_regs) }, }; - const i915_reg_t *reg; + const struct i915_range *range; unsigned int i, j; s32 prev; - for (j = 0; j < ARRAY_SIZE(reg_lists); ++j) { - reg = reg_lists[j].regs; - for (i = 0, prev = -1; i < reg_lists[j].size; i++, reg++) { - u32 offset = i915_mmio_reg_offset(*reg); - - if (prev >= (s32)offset) { - pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", - __func__, i, offset, prev); + for (j = 0; j < ARRAY_SIZE(range_lists); ++j) { + range = range_lists[j].regs; + for (i = 0, prev = -1; i < range_lists[j].size; i++, range++) { + if (range->end < range->start) { + pr_err("%s: range[%d]:(%06x-%06x) has end before start\n", + __func__, i, range->start, range->end); return -EINVAL; } - prev = offset; + if (prev >= (s32)range->start) { + pr_err("%s: range[%d]:(%06x-%06x) is before end of previous (%06x)\n", + __func__, i, range->start, range->end, prev); + return -EINVAL; + } + + if (range->start % 4) { + pr_err("%s: range[%d]:(%06x-%06x) has non-dword-aligned start\n", + __func__, i, range->start, range->end); + return -EINVAL; + } + + prev = range->end; } } From 0bb50de156d8280e53884adf1d5a04d6108f90e7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:16 -0700 Subject: [PATCH 05/85] drm/i915/gen11: Update shadowed register table The bspec lists many shadowed registers (i.e., registers for which we don't need to grab forcewake when writing) that we weren't tracking in the driver. Although we may not actually use all of these registers right now, it's best to just match the bspec list exactly. Note that the bspec also lists registers that are shadowed for various HW-internal accesses; we can ignore those and just list the ones that are shadowed for accesses from the IA/CPU. Bspec: 18333 Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-5-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 52601b960248..6b5b029148b2 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -960,20 +960,26 @@ static const struct i915_range gen11_shadowed_regs[] = { { .start = 0x2550, .end = 0x2550 }, { .start = 0xA008, .end = 0xA00C }, { .start = 0x22030, .end = 0x22030 }, - { .start = 0x22550, .end = 0x22550 }, + { .start = 0x22230, .end = 0x22230 }, + { .start = 0x22510, .end = 0x22550 }, { .start = 0x1C0030, .end = 0x1C0030 }, - { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C0230, .end = 0x1C0230 }, + { .start = 0x1C0510, .end = 0x1C0550 }, { .start = 0x1C4030, .end = 0x1C4030 }, - { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C4230, .end = 0x1C4230 }, + { .start = 0x1C4510, .end = 0x1C4550 }, { .start = 0x1C8030, .end = 0x1C8030 }, - { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1C8230, .end = 0x1C8230 }, + { .start = 0x1C8510, .end = 0x1C8550 }, { .start = 0x1D0030, .end = 0x1D0030 }, - { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D0230, .end = 0x1D0230 }, + { .start = 0x1D0510, .end = 0x1D0550 }, { .start = 0x1D4030, .end = 0x1D4030 }, - { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D4230, .end = 0x1D4230 }, + { .start = 0x1D4510, .end = 0x1D4550 }, { .start = 0x1D8030, .end = 0x1D8030 }, - { .start = 0x1D8550, .end = 0x1D8550 }, - /* TODO: Other registers are not yet used */ + { .start = 0x1D8230, .end = 0x1D8230 }, + { .start = 0x1D8510, .end = 0x1D8550 }, }; static const struct i915_range gen12_shadowed_regs[] = { From 5798a769d6f5be656638c5e6e0cd5c4f155a2fb5 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:17 -0700 Subject: [PATCH 06/85] drm/i915/gen12: Update shadowed register table The bspec lists many shadowed registers (i.e., registers for which we don't need to grab forcewake when writing) that we weren't tracking in the driver. Although we may not actually use all of these registers right now, it's best to just match the bspec list exactly. Note that the bspec also lists registers that are shadowed for various HW-internal accesses; we can ignore those and just list the ones that are shadowed for accesses from the IA/CPU. Bspec: 52077 Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 6b5b029148b2..0b2dbcc14802 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -984,23 +984,28 @@ static const struct i915_range gen11_shadowed_regs[] = { static const struct i915_range gen12_shadowed_regs[] = { { .start = 0x2030, .end = 0x2030 }, - { .start = 0x2550, .end = 0x2550 }, + { .start = 0x2510, .end = 0x2550 }, { .start = 0xA008, .end = 0xA00C }, + { .start = 0xA188, .end = 0xA188 }, + { .start = 0xA278, .end = 0xA278 }, + { .start = 0xA540, .end = 0xA56C }, + { .start = 0xC4C8, .end = 0xC4C8 }, + { .start = 0xC4D4, .end = 0xC4D4 }, + { .start = 0xC600, .end = 0xC600 }, { .start = 0x22030, .end = 0x22030 }, - { .start = 0x22550, .end = 0x22550 }, + { .start = 0x22510, .end = 0x22550 }, { .start = 0x1C0030, .end = 0x1C0030 }, - { .start = 0x1C0550, .end = 0x1C0550 }, + { .start = 0x1C0510, .end = 0x1C0550 }, { .start = 0x1C4030, .end = 0x1C4030 }, - { .start = 0x1C4550, .end = 0x1C4550 }, + { .start = 0x1C4510, .end = 0x1C4550 }, { .start = 0x1C8030, .end = 0x1C8030 }, - { .start = 0x1C8550, .end = 0x1C8550 }, + { .start = 0x1C8510, .end = 0x1C8550 }, { .start = 0x1D0030, .end = 0x1D0030 }, - { .start = 0x1D0550, .end = 0x1D0550 }, + { .start = 0x1D0510, .end = 0x1D0550 }, { .start = 0x1D4030, .end = 0x1D4030 }, - { .start = 0x1D4550, .end = 0x1D4550 }, + { .start = 0x1D4510, .end = 0x1D4550 }, { .start = 0x1D8030, .end = 0x1D8030 }, - { .start = 0x1D8550, .end = 0x1D8550 }, - /* TODO: Other registers are not yet used */ + { .start = 0x1D8510, .end = 0x1D8550 }, }; static const struct i915_range xehp_shadowed_regs[] = { From 5c5c40e28c52a36bb5ac26817275d5a0281ab819 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 28 Jul 2021 22:41:18 -0700 Subject: [PATCH 07/85] drm/i915/xehp: Xe_HP shadowed registers are a strict superset of gen12 The list of shadowed registers on XeHP is identical to the set for earlier gen12 platforms, with additional ranges added for the new VCS and VECS engines. Since those register ranges were reserved on earlier gen12 platforms, it's safe to consolidate to a single gen12 table rather than tracking Xe_HP separately. Bspec: 52077 Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210729054118.2458523-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 52 +++++-------------- drivers/gpu/drm/i915/selftests/intel_uncore.c | 1 - 2 files changed, 13 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 0b2dbcc14802..de4ef9bd3b51 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1006,39 +1006,24 @@ static const struct i915_range gen12_shadowed_regs[] = { { .start = 0x1D4510, .end = 0x1D4550 }, { .start = 0x1D8030, .end = 0x1D8030 }, { .start = 0x1D8510, .end = 0x1D8550 }, -}; -static const struct i915_range xehp_shadowed_regs[] = { - { .start = 0x2000, .end = 0x2030 }, - { .start = 0x2550, .end = 0x2550 }, - { .start = 0xA008, .end = 0xA00C }, - { .start = 0x22030, .end = 0x22030 }, - { .start = 0x22550, .end = 0x22550 }, - { .start = 0x1C0030, .end = 0x1C0030 }, - { .start = 0x1C0550, .end = 0x1C0550 }, - { .start = 0x1C4030, .end = 0x1C4030 }, - { .start = 0x1C4550, .end = 0x1C4550 }, - { .start = 0x1C8030, .end = 0x1C8030 }, - { .start = 0x1C8550, .end = 0x1C8550 }, - { .start = 0x1D0030, .end = 0x1D0030 }, - { .start = 0x1D0550, .end = 0x1D0550 }, - { .start = 0x1D4030, .end = 0x1D4030 }, - { .start = 0x1D4550, .end = 0x1D4550 }, - { .start = 0x1D8030, .end = 0x1D8030 }, - { .start = 0x1D8550, .end = 0x1D8550 }, + /* + * The rest of these ranges are specific to Xe_HP and beyond, but + * are reserved/unused ranges on earlier gen12 platforms, so they can + * be safely added to the gen12 table. + */ { .start = 0x1E0030, .end = 0x1E0030 }, - { .start = 0x1E0550, .end = 0x1E0550 }, + { .start = 0x1E0510, .end = 0x1E0550 }, { .start = 0x1E4030, .end = 0x1E4030 }, - { .start = 0x1E4550, .end = 0x1E4550 }, + { .start = 0x1E4510, .end = 0x1E4550 }, { .start = 0x1E8030, .end = 0x1E8030 }, - { .start = 0x1E8550, .end = 0x1E8550 }, + { .start = 0x1E8510, .end = 0x1E8550 }, { .start = 0x1F0030, .end = 0x1F0030 }, - { .start = 0x1F0550, .end = 0x1F0550 }, + { .start = 0x1F0510, .end = 0x1F0550 }, { .start = 0x1F4030, .end = 0x1F4030 }, - { .start = 0x1F4550, .end = 0x1F4550 }, + { .start = 0x1F4510, .end = 0x1F4550 }, { .start = 0x1F8030, .end = 0x1F8030 }, - { .start = 0x1F8550, .end = 0x1F8550 }, - /* TODO: Other registers are not yet used */ + { .start = 0x1F8510, .end = 0x1F8550 }, }; static int mmio_range_cmp(u32 key, const struct i915_range *range) @@ -1062,7 +1047,6 @@ static bool is_##x##_shadowed(u32 offset) \ __is_X_shadowed(gen8) __is_X_shadowed(gen11) __is_X_shadowed(gen12) -__is_X_shadowed(xehp) static enum forcewake_domains gen6_reg_write_fw_domains(struct intel_uncore *uncore, i915_reg_t reg) @@ -1126,15 +1110,6 @@ static const struct intel_forcewake_range __chv_fw_ranges[] = { __fwd; \ }) -#define __xehp_fwtable_reg_write_fw_domains(uncore, offset) \ -({ \ - enum forcewake_domains __fwd = 0; \ - const u32 __offset = (offset); \ - if (!is_xehp_shadowed(__offset)) \ - __fwd = find_fw_domain(uncore, __offset); \ - __fwd; \ -}) - /* *Must* be sorted by offset ranges! See intel_fw_table_check(). */ static const struct intel_forcewake_range __gen9_fw_ranges[] = { GEN_FW_RANGE(0x0, 0xaff, FORCEWAKE_GT), @@ -1737,7 +1712,6 @@ __gen_write(func, 8) \ __gen_write(func, 16) \ __gen_write(func, 32) -__gen_reg_write_funcs(xehp_fwtable); __gen_reg_write_funcs(gen12_fwtable); __gen_reg_write_funcs(gen11_fwtable); __gen_reg_write_funcs(fwtable); @@ -2114,11 +2088,11 @@ static int uncore_forcewake_init(struct intel_uncore *uncore) if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __dg2_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { ASSIGN_FW_DOMAINS_TABLE(uncore, __xehp_fw_ranges); - ASSIGN_WRITE_MMIO_VFUNCS(uncore, xehp_fwtable); + ASSIGN_WRITE_MMIO_VFUNCS(uncore, gen12_fwtable); ASSIGN_READ_MMIO_VFUNCS(uncore, gen11_fwtable); } else if (GRAPHICS_VER(i915) >= 12) { ASSIGN_FW_DOMAINS_TABLE(uncore, __gen12_fw_ranges); diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index d6a9c11afa23..22ef2c87df1a 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -68,7 +68,6 @@ static int intel_shadow_table_check(void) { gen8_shadowed_regs, ARRAY_SIZE(gen8_shadowed_regs) }, { gen11_shadowed_regs, ARRAY_SIZE(gen11_shadowed_regs) }, { gen12_shadowed_regs, ARRAY_SIZE(gen12_shadowed_regs) }, - { xehp_shadowed_regs, ARRAY_SIZE(xehp_shadowed_regs) }, }; const struct i915_range *range; unsigned int i, j; From dae2d28832968751f7731336b560a4a84a197b76 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 10 Aug 2021 16:27:48 +0200 Subject: [PATCH 08/85] drm/doc/rfc: drop lmem uapi section We still have quite a bit more work to do with overall reworking of the ttm-based dg1 code, but the uapi stuff is now finalized with the latest pull. So remove that. This also fixes kerneldoc build warnings because we've included the same headers in two places, resulting in sphinx complaining about duplicated symbols. This regression has been created when we moved the uapi definitions to the real include/uapi/ folder in 727ecd99a4c9 ("drm/doc/rfc: drop the i915_gem_lmem.h header") v2: Fix a few references that I missed, the htmldocs build took forever. Acked-by: Jason Ekstrand Acked-by: Maarten Lankhorst Tested-by Stephen Rothwell (v1) References: https://lore.kernel.org/dri-devel/20210603193242.1ce99344@canb.auug.org.au/ Reported-by: Stephen Rothwell Cc: Stephen Rothwell Fixes: 727ecd99a4c9 ("drm/doc/rfc: drop the i915_gem_lmem.h header") Cc: Matthew Auld Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210810142748.1983271-1-daniel.vetter@ffwll.ch --- Documentation/gpu/rfc/i915_gem_lmem.rst | 109 ------------------------ 1 file changed, 109 deletions(-) diff --git a/Documentation/gpu/rfc/i915_gem_lmem.rst b/Documentation/gpu/rfc/i915_gem_lmem.rst index 675ba8620d66..b421a3c1806e 100644 --- a/Documentation/gpu/rfc/i915_gem_lmem.rst +++ b/Documentation/gpu/rfc/i915_gem_lmem.rst @@ -18,114 +18,5 @@ real, with all the uAPI bits is: * Route shmem backend over to TTM SYSTEM for discrete * TTM purgeable object support * Move i915 buddy allocator over to TTM - * MMAP ioctl mode(see `I915 MMAP`_) - * SET/GET ioctl caching(see `I915 SET/GET CACHING`_) * Send RFC(with mesa-dev on cc) for final sign off on the uAPI * Add pciid for DG1 and turn on uAPI for real - -New object placement and region query uAPI -========================================== -Starting from DG1 we need to give userspace the ability to allocate buffers from -device local-memory. Currently the driver supports gem_create, which can place -buffers in system memory via shmem, and the usual assortment of other -interfaces, like dumb buffers and userptr. - -To support this new capability, while also providing a uAPI which will work -beyond just DG1, we propose to offer three new bits of uAPI: - -DRM_I915_QUERY_MEMORY_REGIONS ------------------------------ -New query ID which allows userspace to discover the list of supported memory -regions(like system-memory and local-memory) for a given device. We identify -each region with a class and instance pair, which should be unique. The class -here would be DEVICE or SYSTEM, and the instance would be zero, on platforms -like DG1. - -Side note: The class/instance design is borrowed from our existing engine uAPI, -where we describe every physical engine in terms of its class, and the -particular instance, since we can have more than one per class. - -In the future we also want to expose more information which can further -describe the capabilities of a region. - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_memory_class drm_i915_gem_memory_class_instance drm_i915_memory_region_info drm_i915_query_memory_regions - -GEM_CREATE_EXT --------------- -New ioctl which is basically just gem_create but now allows userspace to provide -a chain of possible extensions. Note that if we don't provide any extensions and -set flags=0 then we get the exact same behaviour as gem_create. - -Side note: We also need to support PXP[1] in the near future, which is also -applicable to integrated platforms, and adds its own gem_create_ext extension, -which basically lets userspace mark a buffer as "protected". - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_create_ext - -I915_GEM_CREATE_EXT_MEMORY_REGIONS ----------------------------------- -Implemented as an extension for gem_create_ext, we would now allow userspace to -optionally provide an immutable list of preferred placements at creation time, -in priority order, for a given buffer object. For the placements we expect -them each to use the class/instance encoding, as per the output of the regions -query. Having the list in priority order will be useful in the future when -placing an object, say during eviction. - -.. kernel-doc:: include/uapi/drm/i915_drm.h - :functions: drm_i915_gem_create_ext_memory_regions - -One fair criticism here is that this seems a little over-engineered[2]. If we -just consider DG1 then yes, a simple gem_create.flags or something is totally -all that's needed to tell the kernel to allocate the buffer in local-memory or -whatever. However looking to the future we need uAPI which can also support -upcoming Xe HP multi-tile architecture in a sane way, where there can be -multiple local-memory instances for a given device, and so using both class and -instance in our uAPI to describe regions is desirable, although specifically -for DG1 it's uninteresting, since we only have a single local-memory instance. - -Existing uAPI issues -==================== -Some potential issues we still need to resolve. - -I915 MMAP ---------- -In i915 there are multiple ways to MMAP GEM object, including mapping the same -object using different mapping types(WC vs WB), i.e multiple active mmaps per -object. TTM expects one MMAP at most for the lifetime of the object. If it -turns out that we have to backpedal here, there might be some potential -userspace fallout. - -I915 SET/GET CACHING --------------------- -In i915 we have set/get_caching ioctl. TTM doesn't let us to change this, but -DG1 doesn't support non-snooped pcie transactions, so we can just always -allocate as WB for smem-only buffers. If/when our hw gains support for -non-snooped pcie transactions then we must fix this mode at allocation time as -a new GEM extension. - -This is related to the mmap problem, because in general (meaning, when we're -not running on intel cpus) the cpu mmap must not, ever, be inconsistent with -allocation mode. - -Possible idea is to let the kernel picks the mmap mode for userspace from the -following table: - -smem-only: WB. Userspace does not need to call clflush. - -smem+lmem: We only ever allow a single mode, so simply allocate this as uncached -memory, and always give userspace a WC mapping. GPU still does snooped access -here(assuming we can't turn it off like on DG1), which is a bit inefficient. - -lmem only: always WC - -This means on discrete you only get a single mmap mode, all others must be -rejected. That's probably going to be a new default mode or something like -that. - -Links -===== -[1] https://patchwork.freedesktop.org/series/86798/ - -[2] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5599#note_553791 From fa9899dad3ed84a8b6433467670d4cacd9b873bc Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:40 -0700 Subject: [PATCH 09/85] drm/i915/xehp: Loop over all gslices for INSTDONE processing We no longer have traditional slices on Xe_HP platforms, but the INSTDONE registers are replicated according to gslice representation which is similar. We can mostly re-use the existing instdone code with just a few modifications: * Create an alternate instdone loop macro that will iterate over the flat DSS space, but still provide the gslice/dss steering values for compatibility with the legacy code. * We should allocate INSTDONE storage space according to the maximum number of gslices rather than the maximum number of legacy slices to ensure we have enough storage space to hold all of the values. XeHP design has 8 gslices, whereas older platforms never had more than 3 slices. Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 48 +++++++++++--------- drivers/gpu/drm/i915/gt/intel_engine_types.h | 12 ++++- drivers/gpu/drm/i915/gt/intel_sseu.h | 7 +++ drivers/gpu/drm/i915/i915_gpu_error.c | 30 ++++++++---- 4 files changed, 65 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0d9105a31d84..58ed67894b3d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1163,16 +1163,16 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, u32 mmio_base = engine->mmio_base; int slice; int subslice; + int iter; memset(instdone, 0, sizeof(*instdone)); - switch (GRAPHICS_VER(i915)) { - default: + if (GRAPHICS_VER(i915) >= 8) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id != RCS0) - break; + return; instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); @@ -1182,21 +1182,32 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, instdone->slice_common_extra[1] = intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); } - for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { - instdone->sampler[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_SAMPLER_INSTDONE); - instdone->row[slice][subslice] = - read_subslice_reg(engine, slice, subslice, - GEN7_ROW_INSTDONE); + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_ROW_INSTDONE); + } + } else { + for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { + instdone->sampler[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_SAMPLER_INSTDONE); + instdone->row[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + GEN7_ROW_INSTDONE); + } } - break; - case 7: + } else if (GRAPHICS_VER(i915) >= 7) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id != RCS0) - break; + return; instdone->slice_common = intel_uncore_read(uncore, GEN7_SC_INSTDONE); @@ -1204,22 +1215,15 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE); instdone->row[0][0] = intel_uncore_read(uncore, GEN7_ROW_INSTDONE); - - break; - case 6: - case 5: - case 4: + } else if (GRAPHICS_VER(i915) >= 4) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); if (engine->id == RCS0) /* HACK: Using the wrong struct member */ instdone->slice_common = intel_uncore_read(uncore, GEN4_INSTDONE1); - break; - case 3: - case 2: + } else { instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE); - break; } } diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index ed91bcff20eb..0b4846b01626 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -67,8 +67,8 @@ struct intel_instdone { /* The following exist only in the RCS engine */ u32 slice_common; u32 slice_common_extra[2]; - u32 sampler[I915_MAX_SLICES][I915_MAX_SUBSLICES]; - u32 row[I915_MAX_SLICES][I915_MAX_SUBSLICES]; + u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; + u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; }; /* @@ -578,4 +578,12 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine) for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \ (instdone_has_subslice(dev_priv_, sseu_, slice_, \ subslice_))) + +#define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \ + for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \ + (iter_) < GEN_MAX_SUBSLICES; \ + (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \ + (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \ + for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_))) + #endif /* __INTEL_ENGINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 22fef98887c0..0270acdcc157 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -26,6 +26,9 @@ struct drm_printer; #define GEN_DSS_PER_CSLICE 8 #define GEN_DSS_PER_MSLICE 8 +#define GEN_MAX_GSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE) +#define GEN_MAX_CSLICES (GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE) + struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; @@ -78,6 +81,10 @@ intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice, u8 mask; int ss_idx = subslice / BITS_PER_BYTE; + if (slice >= sseu->max_slices || + subslice >= sseu->max_subslices) + return false; + GEM_BUG_ON(ss_idx >= sseu->ss_stride); mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx]; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 0f08bcfbe964..8230bc3ac8a9 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -444,15 +444,29 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) <= 6) return; - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) - err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.sampler[slice][subslice]); + if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 50)) { + int iter; - for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) - err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", - slice, subslice, - ee->instdone.row[slice][subslice]); + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.sampler[slice][subslice]); + + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.row[slice][subslice]); + } else { + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) + err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.sampler[slice][subslice]); + + for_each_instdone_slice_subslice(m->i915, sseu, slice, subslice) + err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.row[slice][subslice]); + } if (GRAPHICS_VER(m->i915) < 12) return; From 89f2e7ab4dd93d8785619ce58838391b9b07feb7 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:41 -0700 Subject: [PATCH 10/85] drm/i915/dg2: Report INSTDONE_GEOM values in error state Xe_HPG adds some additional INSTDONE_GEOM debug registers; the Mesa team has indicated that having these reported in the error state would be useful for debugging GPU hangs. These registers are replicated per-DSS with gslice steering. Cc: Lionel Landwerlin Signed-off-by: Matt Roper Acked-by: Lionel Landwerlin Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-4-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++++++ drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 +++ drivers/gpu/drm/i915/i915_gpu_error.c | 10 ++++++++-- drivers/gpu/drm/i915/i915_reg.h | 1 + 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 58ed67894b3d..332efea696a5 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1202,6 +1202,13 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, GEN7_ROW_INSTDONE); } } + + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { + for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) + instdone->geom_svg[slice][subslice] = + read_subslice_reg(engine, slice, subslice, + XEHPG_INSTDONE_GEOM_SVG); + } } else if (GRAPHICS_VER(i915) >= 7) { instdone->instdone = intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 0b4846b01626..bfbfe53c23dd 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -69,6 +69,9 @@ struct intel_instdone { u32 slice_common_extra[2]; u32 sampler[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; u32 row[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; + + /* Added in XeHPG */ + u32 geom_svg[GEN_MAX_GSLICES][I915_MAX_SUBSLICES]; }; /* diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8230bc3ac8a9..91d5da7b0a2b 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -431,6 +431,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, const struct sseu_dev_info *sseu = &ee->engine->gt->info.sseu; int slice; int subslice; + int iter; err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone.instdone); @@ -445,8 +446,6 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, return; if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 50)) { - int iter; - for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) err_printf(m, " SAMPLER_INSTDONE[%d][%d]: 0x%08x\n", slice, subslice, @@ -471,6 +470,13 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, if (GRAPHICS_VER(m->i915) < 12) return; + if (GRAPHICS_VER_FULL(m->i915) >= IP_VER(12, 55)) { + for_each_instdone_gslice_dss_xehp(m->i915, sseu, iter, slice, subslice) + err_printf(m, " GEOM_SVGUNIT_INSTDONE[%d][%d]: 0x%08x\n", + slice, subslice, + ee->instdone.geom_svg[slice][subslice]); + } + err_printf(m, " SC_INSTDONE_EXTRA: 0x%08x\n", ee->instdone.slice_common_extra[0]); err_printf(m, " SC_INSTDONE_EXTRA2: 0x%08x\n", diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2113925084b0..9884c1156b95 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2695,6 +2695,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN12_SC_INSTDONE_EXTRA2 _MMIO(0x7108) #define GEN7_SAMPLER_INSTDONE _MMIO(0xe160) #define GEN7_ROW_INSTDONE _MMIO(0xe164) +#define XEHPG_INSTDONE_GEOM_SVG _MMIO(0x666c) #define MCFG_MCR_SELECTOR _MMIO(0xfd0) #define SF_MCR_SELECTOR _MMIO(0xfd8) #define GEN8_MCR_SELECTOR _MMIO(0xfdc) From d16de9a25b5cc458d0c8c978970f8edf9cf710d0 Mon Sep 17 00:00:00 2001 From: Stuart Summers Date: Fri, 6 Aug 2021 10:29:01 -0700 Subject: [PATCH 11/85] drm/i915/xehpsdv: Add compute DSS type Starting in XeHP, the concept of slice has been removed in favor of DSS (Dual-Subslice) masks for various workload types. These workloads have been divided into those enabled for geometry and those enabled for compute. i915 currently maintains a single set of S/SS/EU masks for the device. The goal of this patch set is to minimize the amount of impact to prior generations while still giving the user maximum flexibility. v2: - Generalize a comment about uapi access to geometry/compute masks; the proposed uapi has changed since the comment was first written, and will show up in a future series once the userspace code is published. (Lucas) v3: - Eliminate unnecessary has_compute_dss flag. (Lucas) - Drop unwanted comment change in uapi header. (Lucas) Bspec: 33117, 33118, 20376 Cc: Daniele Ceraolo Spurio Cc: Matt Roper Cc: Lucas De Marchi Signed-off-by: Stuart Summers Signed-off-by: Steve Hampson Signed-off-by: Matt Roper Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20210806172901.1049133-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_sseu.c | 60 +++++++++++++++++++++------- drivers/gpu/drm/i915/gt/intel_sseu.h | 4 +- drivers/gpu/drm/i915/i915_reg.h | 3 +- 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c index bbd272943c3f..b0e09b58005e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c @@ -46,11 +46,11 @@ u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) } void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u32 ss_mask) + u8 *subslice_mask, u32 ss_mask) { int offset = slice * sseu->ss_stride; - memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); + memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); } unsigned int @@ -100,14 +100,24 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } -static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, - u8 s_en, u32 ss_en, u16 eu_en) +static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) +{ + u32 ss_mask; + + ss_mask = ss_en >> (s * sseu->max_subslices); + ss_mask &= GENMASK(sseu->max_subslices - 1, 0); + + return ss_mask; +} + +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, + u32 g_ss_en, u32 c_ss_en, u16 eu_en) { int s, ss; - /* ss_en represents entire subslice mask across all slices */ + /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > - sizeof(ss_en) * BITS_PER_BYTE); + sizeof(g_ss_en) * BITS_PER_BYTE); for (s = 0; s < sseu->max_slices; s++) { if ((s_en & BIT(s)) == 0) @@ -115,7 +125,22 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, sseu->slice_mask |= BIT(s); - intel_sseu_set_subslices(sseu, s, ss_en); + /* + * XeHP introduces the concept of compute vs geometry DSS. To + * reduce variation between GENs around subslice usage, store a + * mask for both the geometry and compute enabled masks since + * userspace will need to be able to query these masks + * independently. Also compute a total enabled subslice count + * for the purposes of selecting subslices to use in a + * particular GEM context. + */ + intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, + get_ss_stride_mask(sseu, s, c_ss_en)); + intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, + get_ss_stride_mask(sseu, s, g_ss_en)); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + get_ss_stride_mask(sseu, s, + g_ss_en | c_ss_en)); for (ss = 0; ss < sseu->max_subslices; ss++) if (intel_sseu_has_subslice(sseu, s, ss)) @@ -129,7 +154,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) { struct sseu_dev_info *sseu = >->info.sseu; struct intel_uncore *uncore = gt->uncore; - u32 dss_en; + u32 g_dss_en, c_dss_en = 0; u16 eu_en = 0; u8 eu_en_fuse; u8 s_en; @@ -160,7 +185,9 @@ static void gen12_sseu_info_init(struct intel_gt *gt) s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; - dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); + g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); + if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) + c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); /* one bit per pair of EUs */ if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) @@ -173,7 +200,7 @@ static void gen12_sseu_info_init(struct intel_gt *gt) if (eu_en_fuse & BIT(eu)) eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); - gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); /* TGL only supports slice-level power gating */ sseu->has_slice_pg = 1; @@ -199,7 +226,7 @@ static void gen11_sseu_info_init(struct intel_gt *gt) eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); - gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); + gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); /* ICL has no power gating restrictions. */ sseu->has_slice_pg = 1; @@ -240,7 +267,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt) sseu_set_eus(sseu, 0, 1, ~disabled_mask); } - intel_sseu_set_subslices(sseu, 0, subslice_mask); + intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); sseu->eu_total = compute_eu_total(sseu); @@ -296,7 +323,8 @@ static void gen9_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); for (ss = 0; ss < sseu->max_subslices; ss++) { @@ -408,7 +436,8 @@ static void bdw_sseu_info_init(struct intel_gt *gt) /* skip disabled slice */ continue; - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { u8 eu_disabled_mask; @@ -506,7 +535,8 @@ static void hsw_sseu_info_init(struct intel_gt *gt) sseu->eu_per_subslice); for (s = 0; s < sseu->max_slices; s++) { - intel_sseu_set_subslices(sseu, s, subslice_mask); + intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, + subslice_mask); for (ss = 0; ss < sseu->max_subslices; ss++) { sseu_set_eus(sseu, s, ss, diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h index 0270acdcc157..60882a74741e 100644 --- a/drivers/gpu/drm/i915/gt/intel_sseu.h +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h @@ -32,6 +32,8 @@ struct drm_printer; struct sseu_dev_info { u8 slice_mask; u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; + u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE]; u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE]; u16 eu_total; u8 eu_per_subslice; @@ -104,7 +106,7 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice); u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice); void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, - u32 ss_mask); + u8 *subslice_mask, u32 ss_mask); void intel_sseu_info_init(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 9884c1156b95..c8db6e8ef1ad 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3160,7 +3160,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) -#define GEN12_GT_DSS_ENABLE _MMIO(0x913C) +#define GEN12_GT_GEOMETRY_DSS_ENABLE _MMIO(0x913C) +#define GEN12_GT_COMPUTE_DSS_ENABLE _MMIO(0x9144) #define XEHP_EU_ENABLE _MMIO(0x9134) #define XEHP_EU_ENA_MASK 0xFF From b97090575ed27f8a23cc8f8ace642d5a8ea59206 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 10 Aug 2021 15:05:23 +0200 Subject: [PATCH 12/85] drm/i915: Use locked access to ctx->engines in set_priority This essentially reverts commit 89ff76bf9b3b0b86e6bbe344bd6378d8661303fc Author: Chris Wilson Date: Thu Apr 2 13:42:18 2020 +0100 drm/i915/gem: Utilize rcu iteration of context engines Note that the other use of __context_engines_await have disappeard in the following commits: ccbc1b97948a ("drm/i915/gem: Don't allow changing the VM on running contexts (v4)") c7a71fc8ee04 ("drm/i915: Drop getparam support for I915_CONTEXT_PARAM_ENGINES") 4a766ae40ec8 ("drm/i915: Drop the CONTEXT_CLONE API (v2)") None of these have any business to optimize their engine lookup with rcu, unless extremely convincing benchmark data and a solid analysis why we can't make that workload (whatever it is that does) faster with a proper design fix. Also since there's only one caller of context_apply_all left and it's really just a loop, inline it and then inline the lopp body too. This is how all other callers that take the engine lock loop over engines, it's much simpler. Reviewed-by: Jason Ekstrand Signed-off-by: Daniel Vetter Cc: Chris Wilson Cc: Mika Kuoppala Cc: Daniel Vetter Cc: Jason Ekstrand Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20210810130523.1972031-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 72 ++++----------------- 1 file changed, 14 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index dbaeb924a437..fd169cf2f75a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1284,49 +1284,6 @@ static int __context_set_persistence(struct i915_gem_context *ctx, bool state) return 0; } -static inline struct i915_gem_engines * -__context_engines_await(const struct i915_gem_context *ctx, - bool *user_engines) -{ - struct i915_gem_engines *engines; - - rcu_read_lock(); - do { - engines = rcu_dereference(ctx->engines); - GEM_BUG_ON(!engines); - - if (user_engines) - *user_engines = i915_gem_context_user_engines(ctx); - - /* successful await => strong mb */ - if (unlikely(!i915_sw_fence_await(&engines->fence))) - continue; - - if (likely(engines == rcu_access_pointer(ctx->engines))) - break; - - i915_sw_fence_complete(&engines->fence); - } while (1); - rcu_read_unlock(); - - return engines; -} - -static void -context_apply_all(struct i915_gem_context *ctx, - void (*fn)(struct intel_context *ce, void *data), - void *data) -{ - struct i915_gem_engines_iter it; - struct i915_gem_engines *e; - struct intel_context *ce; - - e = __context_engines_await(ctx, NULL); - for_each_gem_engine(ce, e, it) - fn(ce, data); - i915_sw_fence_complete(&e->fence); -} - static struct i915_gem_context * i915_gem_create_context(struct drm_i915_private *i915, const struct i915_gem_proto_context *pc) @@ -1776,23 +1733,11 @@ set_persistence(struct i915_gem_context *ctx, return __context_set_persistence(ctx, args->value); } -static void __apply_priority(struct intel_context *ce, void *arg) -{ - struct i915_gem_context *ctx = arg; - - if (!intel_engine_has_timeslices(ce->engine)) - return; - - if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_semaphores(ce->engine)) - intel_context_set_use_semaphores(ce); - else - intel_context_clear_use_semaphores(ce); -} - static int set_priority(struct i915_gem_context *ctx, const struct drm_i915_gem_context_param *args) { + struct i915_gem_engines_iter it; + struct intel_context *ce; int err; err = validate_priority(ctx->i915, args); @@ -1800,7 +1745,18 @@ static int set_priority(struct i915_gem_context *ctx, return err; ctx->sched.priority = args->value; - context_apply_all(ctx, __apply_priority, ctx); + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (!intel_engine_has_timeslices(ce->engine)) + continue; + + if (ctx->sched.priority >= I915_PRIORITY_NORMAL && + intel_engine_has_semaphores(ce->engine)) + intel_context_set_use_semaphores(ce); + else + intel_context_clear_use_semaphores(ce); + } + i915_gem_context_unlock_engines(ctx); return 0; } From efd330b97855013c8b58185683ddfb75deab5fa9 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 5 Aug 2021 09:36:43 -0700 Subject: [PATCH 13/85] drm/i915/xehpsdv: factor out function to read RP_STATE_CAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of maintaining the same if ladder in 3 different places, add a function to read RP_STATE_CAP. Signed-off-by: Lucas De Marchi Signed-off-by: Matt Roper Reviewed-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-6-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/debugfs_gt_pm.c | 8 +++----- drivers/gpu/drm/i915/gt/intel_rps.c | 17 ++++++++++++----- drivers/gpu/drm/i915/gt/intel_rps.h | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 8 +++----- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index d6f5836396f8..f6733f279890 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -309,13 +309,11 @@ static int frequency_show(struct seq_file *m, void *unused) int max_freq; rp_state_limits = intel_uncore_read(uncore, GEN6_RP_STATE_LIMITS); - if (IS_GEN9_LP(i915)) { - rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); + if (IS_GEN9_LP(i915)) gt_perf_status = intel_uncore_read(uncore, BXT_GT_PERF_STATUS); - } else { - rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); + else gt_perf_status = intel_uncore_read(uncore, GEN6_GT_PERF_STATUS); - } /* RPSTAT1 is in the GT power well */ intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index d812b27835f8..a3e69eba376f 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -996,20 +996,16 @@ int intel_rps_set(struct intel_rps *rps, u8 val) static void gen6_rps_init(struct intel_rps *rps) { struct drm_i915_private *i915 = rps_to_i915(rps); - struct intel_uncore *uncore = rps_to_uncore(rps); + u32 rp_state_cap = intel_rps_read_state_cap(rps); /* All of these values are in units of 50MHz */ /* static values from HW: RP0 > RP1 > RPn (min_freq) */ if (IS_GEN9_LP(i915)) { - u32 rp_state_cap = intel_uncore_read(uncore, BXT_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 16) & 0xff; rps->rp1_freq = (rp_state_cap >> 8) & 0xff; rps->min_freq = (rp_state_cap >> 0) & 0xff; } else { - u32 rp_state_cap = intel_uncore_read(uncore, GEN6_RP_STATE_CAP); - rps->rp0_freq = (rp_state_cap >> 0) & 0xff; rps->rp1_freq = (rp_state_cap >> 8) & 0xff; rps->min_freq = (rp_state_cap >> 16) & 0xff; @@ -2140,6 +2136,17 @@ int intel_rps_set_min_frequency(struct intel_rps *rps, u32 val) return set_min_freq(rps, val); } +u32 intel_rps_read_state_cap(struct intel_rps *rps) +{ + struct drm_i915_private *i915 = rps_to_i915(rps); + struct intel_uncore *uncore = rps_to_uncore(rps); + + if (IS_GEN9_LP(i915)) + return intel_uncore_read(uncore, BXT_RP_STATE_CAP); + else + return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); +} + /* External interface for intel_ips.ko */ static struct drm_i915_private __rcu *ips_mchdev; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h b/drivers/gpu/drm/i915/gt/intel_rps.h index 4213bcce1667..11960d64ca82 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.h +++ b/drivers/gpu/drm/i915/gt/intel_rps.h @@ -41,6 +41,7 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps); u32 intel_rps_get_rpn_frequency(struct intel_rps *rps); u32 intel_rps_read_punit_req(struct intel_rps *rps); u32 intel_rps_read_punit_req_frequency(struct intel_rps *rps); +u32 intel_rps_read_state_cap(struct intel_rps *rps); void gen5_rps_irq_handler(struct intel_rps *rps); void gen6_rps_irq_handler(struct intel_rps *rps, u32 pm_iir); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cc745751ac53..6c83da3956b9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -420,13 +420,11 @@ static int i915_frequency_info(struct seq_file *m, void *unused) int max_freq; rp_state_limits = intel_uncore_read(&dev_priv->uncore, GEN6_RP_STATE_LIMITS); - if (IS_GEN9_LP(dev_priv)) { - rp_state_cap = intel_uncore_read(&dev_priv->uncore, BXT_RP_STATE_CAP); + rp_state_cap = intel_rps_read_state_cap(rps); + if (IS_GEN9_LP(dev_priv)) gt_perf_status = intel_uncore_read(&dev_priv->uncore, BXT_GT_PERF_STATUS); - } else { - rp_state_cap = intel_uncore_read(&dev_priv->uncore, GEN6_RP_STATE_CAP); + else gt_perf_status = intel_uncore_read(&dev_priv->uncore, GEN6_GT_PERF_STATUS); - } /* RPSTAT1 is in the GT power well */ intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); From ad482232e3cc6d65eaeb19ce2412887458b19559 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:44 -0700 Subject: [PATCH 14/85] drm/i915/xehpsdv: Read correct RP_STATE_CAP register The RP_STATE_CAP register is no longer part of the MCHBAR on XEHPSDV; this register is now a per-tile register at GTTMMADDR offset 0x250014. Cc: Rodrigo Vivi Signed-off-by: Matt Roper Signed-off-by: Lucas De Marchi Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-7-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 4 +++- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index a3e69eba376f..3489f5f0cac1 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -2141,7 +2141,9 @@ u32 intel_rps_read_state_cap(struct intel_rps *rps) struct drm_i915_private *i915 = rps_to_i915(rps); struct intel_uncore *uncore = rps_to_uncore(rps); - if (IS_GEN9_LP(i915)) + if (IS_XEHPSDV(i915)) + return intel_uncore_read(uncore, XEHPSDV_RP_STATE_CAP); + else if (IS_GEN9_LP(i915)) return intel_uncore_read(uncore, BXT_RP_STATE_CAP); else return intel_uncore_read(uncore, GEN6_RP_STATE_CAP); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c8db6e8ef1ad..f79f02ee12db 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -4124,6 +4124,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RPN_CAP_MASK REG_GENMASK(23, 16) #define BXT_RP_STATE_CAP _MMIO(0x138170) #define GEN9_RP_STATE_LIMITS _MMIO(0x138148) +#define XEHPSDV_RP_STATE_CAP _MMIO(0x250014) /* * Logical Context regs From d5ef86b38e4c2a65d5c1d64d8d0f3fcf58aa0884 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 12 Aug 2021 14:44:52 +0200 Subject: [PATCH 15/85] drm/i915: Add pci ids and uapi for DG1 DG1 has support for local memory, which requires the usage of the lmem placement extension for creating bo's, and memregion queries to obtain the size. Because of this, those parts of the uapi are no longer guarded behind FAKE_LMEM. According to the pull request referenced below, mesa should be mostly ready for DG1. VK_EXT_memory_budget is not hooked up yet, but we should definitely just enable the uapi parts by default. Signed-off-by: Maarten Lankhorst References: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11584 Cc: Jordan Justen Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210812124452.622233-2-maarten.lankhorst@linux.intel.com Acked-by: Daniel Vetter Acked-by: Jason Ekstrand --- drivers/gpu/drm/i915/gem/i915_gem_create.c | 3 --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_query.c | 3 --- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c index 23fee13a3384..1d341b8c47c0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c @@ -347,9 +347,6 @@ static int ext_set_placements(struct i915_user_extension __user *base, { struct drm_i915_gem_create_ext_memory_regions ext; - if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) - return -ENODEV; - if (copy_from_user(&ext, base, sizeof(ext))) return -EFAULT; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 32358f90b920..607459251b49 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1096,6 +1096,7 @@ static const struct pci_device_id pciidlist[] = { INTEL_RKL_IDS(&rkl_info), INTEL_ADLS_IDS(&adl_s_info), INTEL_ADLP_IDS(&adl_p_info), + INTEL_DG1_IDS(&dg1_info), {0, 0, 0} }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c index e49da36c62fb..5e2b909827f4 100644 --- a/drivers/gpu/drm/i915/i915_query.c +++ b/drivers/gpu/drm/i915/i915_query.c @@ -432,9 +432,6 @@ static int query_memregion_info(struct drm_i915_private *i915, u32 total_length; int ret, id, i; - if (!IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM)) - return -ENODEV; - if (query_item->flags != 0) return -EINVAL; From 90fd2194a0cc52eb7a61dfa6412a0e498c58c688 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Aug 2021 16:33:57 -0700 Subject: [PATCH 16/85] drm/i915: Use designated initializers for init/exit table The kernel builds with -Werror=designated-init, and __designated_init is used by CONFIG_GCC_PLUGIN_RANDSTRUCT for automatically selected (all function pointer) structures. Include the field names in the init/exit table. Avoids warnings like: drivers/gpu/drm/i915/i915_module.c:59:4: error: positional initialization of field in 'struct' declared with 'designated_init' attribute [-Werror=designated-init] Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: David Airlie Cc: intel-gfx@lists.freedesktop.org Cc: dri-devel@lists.freedesktop.org Fixes: a04ea6ae7c67 ("drm/i915: Use a table for i915_init/exit (v2)") Signed-off-by: Kees Cook Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210817233357.2379455-1-keescook@chromium.org --- drivers/gpu/drm/i915/i915_module.c | 37 +++++++++++++++++++----------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c index c578ea8f56a0..d8b4482c69d0 100644 --- a/drivers/gpu/drm/i915/i915_module.c +++ b/drivers/gpu/drm/i915/i915_module.c @@ -47,19 +47,30 @@ static const struct { int (*init)(void); void (*exit)(void); } init_funcs[] = { - { i915_check_nomodeset, NULL }, - { i915_active_module_init, i915_active_module_exit }, - { i915_buddy_module_init, i915_buddy_module_exit }, - { i915_context_module_init, i915_context_module_exit }, - { i915_gem_context_module_init, i915_gem_context_module_exit }, - { i915_objects_module_init, i915_objects_module_exit }, - { i915_request_module_init, i915_request_module_exit }, - { i915_scheduler_module_init, i915_scheduler_module_exit }, - { i915_vma_module_init, i915_vma_module_exit }, - { i915_mock_selftests, NULL }, - { i915_pmu_init, i915_pmu_exit }, - { i915_register_pci_driver, i915_unregister_pci_driver }, - { i915_perf_sysctl_register, i915_perf_sysctl_unregister }, + { .init = i915_check_nomodeset }, + { .init = i915_active_module_init, + .exit = i915_active_module_exit }, + { .init = i915_buddy_module_init, + .exit = i915_buddy_module_exit }, + { .init = i915_context_module_init, + .exit = i915_context_module_exit }, + { .init = i915_gem_context_module_init, + .exit = i915_gem_context_module_exit }, + { .init = i915_objects_module_init, + .exit = i915_objects_module_exit }, + { .init = i915_request_module_init, + .exit = i915_request_module_exit }, + { .init = i915_scheduler_module_init, + .exit = i915_scheduler_module_exit }, + { .init = i915_vma_module_init, + .exit = i915_vma_module_exit }, + { .init = i915_mock_selftests }, + { .init = i915_pmu_init, + .exit = i915_pmu_exit }, + { .init = i915_register_pci_driver, + .exit = i915_unregister_pci_driver }, + { .init = i915_perf_sysctl_register, + .exit = i915_perf_sysctl_unregister }, }; static int init_progress; From 9e9dfd080201ec6236df7151fb7127fe9c594996 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Thu, 5 Aug 2021 09:36:46 -0700 Subject: [PATCH 17/85] drm/i915/dg2: Maintain backward-compatible nested batch behavior For tgl+, the per-context setting of MI_MODE[12] determines whether the bits of a nested MI_BATCH_BUFFER_START instruction should be interpreted in the traditional manner or whether they should instead use a new tgl+ meaning that breaks backward compatibility, but allows nesting into 3rd-level batchbuffers. For previous platforms, the hardware default for this register bit is to maintain backward-compatible behavior unless a context intentionally opts into the new behavior; however Xe_HPG flips the hardware default behavior. From a SW perspective, we want to maintain the backward-compatible behavior for userspace, so we'll apply a fake workaround to set it back to the legacy behavior on platforms where the hardware default is to break compatibility. At the moment there is no Linux userspace that utilizes third-level batchbuffers, so this will avoid userspace from needing to make any changes. using the legacy meaning is the correct thing to do. If/when we have userspace consumers that want to utilize third-level batch nesting, we can provide a context parameter to allow them to opt-in. Bspec: 45974, 45718 Cc: John Harrison Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-9-matthew.d.roper@intel.com Reviewed-by: Anusha Srivatsa --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 39 +++++++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 1 + 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index e9b8af9f08ea..688ed04edbf6 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -644,6 +644,37 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); } +static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + /* + * This is a "fake" workaround defined by software to ensure we + * maintain reliable, backward-compatible behavior for userspace with + * regards to how nested MI_BATCH_BUFFER_START commands are handled. + * + * The per-context setting of MI_MODE[12] determines whether the bits + * of a nested MI_BATCH_BUFFER_START instruction should be interpreted + * in the traditional manner or whether they should instead use a new + * tgl+ meaning that breaks backward compatibility, but allows nesting + * into 3rd-level batchbuffers. When this new capability was first + * added in TGL, it remained off by default unless a context + * intentionally opted in to the new behavior. However Xe_HPG now + * flips this on by default and requires that we explicitly opt out if + * we don't want the new behavior. + * + * From a SW perspective, we want to maintain the backward-compatible + * behavior for userspace, so we'll apply a fake workaround to set it + * back to the legacy behavior on platforms where the hardware default + * is to break compatibility. At the moment there is no Linux + * userspace that utilizes third-level batchbuffers, so this will avoid + * userspace from needing to make any changes. using the legacy + * meaning is the correct thing to do. If/when we have userspace + * consumers that want to utilize third-level batch nesting, we can + * provide a context parameter to allow them to opt-in. + */ + wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN); +} + static void __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, struct i915_wa_list *wal, @@ -651,11 +682,15 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, { struct drm_i915_private *i915 = engine->i915; + wa_init_start(wal, name, engine->name); + + /* Applies to all engines */ + if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) + fakewa_disable_nestedbb_mode(engine, wal); + if (engine->class != RENDER_CLASS) return; - wa_init_start(wal, name, engine->name); - if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); else if (GRAPHICS_VER(i915) == 12) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f79f02ee12db..b289f5cefa59 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2830,6 +2830,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MI_MODE _MMIO(0x209c) # define VS_TIMER_DISPATCH (1 << 6) # define MI_FLUSH_ENABLE (1 << 12) +# define TGL_NESTED_BB_EN (1 << 12) # define ASYNC_FLIP_PERF_DISABLE (1 << 14) # define MODE_IDLE (1 << 9) # define STOP_RING (1 << 8) From faf890985e30d5e88cc3a7c50c1bcad32f89ab7c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 30 Jul 2021 12:53:42 -0700 Subject: [PATCH 18/85] drm/i915: Fix syncmap memory leak A small race exists between intel_gt_retire_requests_timeout and intel_timeline_exit which could result in the syncmap not getting free'd. Rather than work to hard to seal this race, simply cleanup the syncmap on fini. unreferenced object 0xffff88813bc53b18 (size 96): comm "gem_close_race", pid 5410, jiffies 4294917818 (age 1105.600s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 0a 00 00 00 ................ 00 00 00 00 00 00 00 00 6b 6b 6b 6b 06 00 00 00 ........kkkk.... backtrace: [<00000000120b863a>] __sync_alloc_leaf+0x1e/0x40 [i915] [<00000000042f6959>] __sync_set+0x1bb/0x240 [i915] [<0000000090f0e90f>] i915_request_await_dma_fence+0x1c7/0x400 [i915] [<0000000056a48219>] i915_request_await_object+0x222/0x360 [i915] [<00000000aaac4ee3>] i915_gem_do_execbuffer+0x1bd0/0x2250 [i915] [<000000003c9d830f>] i915_gem_execbuffer2_ioctl+0x405/0xce0 [i915] [<00000000fd7a8e68>] drm_ioctl_kernel+0xb0/0xf0 [drm] [<00000000e721ee87>] drm_ioctl+0x305/0x3c0 [drm] [<000000008b0d8986>] __x64_sys_ioctl+0x71/0xb0 [<0000000076c362a4>] do_syscall_64+0x33/0x80 [<00000000eb7a4831>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Matthew Brost Fixes: 531958f6f357 ("drm/i915/gt: Track timeline activeness in enter/exit") Cc: Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730195342.110234-1-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_timeline.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index c4a126c8caef..1257f4f11e66 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -127,6 +127,15 @@ static void intel_timeline_fini(struct rcu_head *rcu) i915_vma_put(timeline->hwsp_ggtt); i915_active_fini(&timeline->active); + + /* + * A small race exists between intel_gt_retire_requests_timeout and + * intel_timeline_exit which could result in the syncmap not getting + * free'd. Rather than work to hard to seal this race, simply cleanup + * the syncmap on fini. + */ + i915_syncmap_free(&timeline->sync); + kfree(timeline); } From c9b6e94963bc3a53110f1c9cd7e5e4ae571413cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Mon, 16 Aug 2021 19:14:44 +0200 Subject: [PATCH 19/85] drm/i915: Ditch the i915_gem_ww_ctx loop member MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's only used by the for_i915_gem_ww() macro and we can use the (typically) on-stack _err variable in its place. v2: - Don't clear the _err variable when entering the loop (Matthew Auld, Maarten Lankhorst). - Use parentheses around the _err macro argument. - Fix up comment. Cc: Matthew Auld Suggested-by: Maarten Lankhorst Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210816171444.105469-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_ww.h | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_ww.h b/drivers/gpu/drm/i915/i915_gem_ww.h index f6b1a796667b..86f0fe343de6 100644 --- a/drivers/gpu/drm/i915/i915_gem_ww.h +++ b/drivers/gpu/drm/i915/i915_gem_ww.h @@ -11,8 +11,7 @@ struct i915_gem_ww_ctx { struct ww_acquire_ctx ctx; struct list_head obj_list; struct drm_i915_gem_object *contended; - unsigned short intr; - unsigned short loop; + bool intr; }; void i915_gem_ww_ctx_init(struct i915_gem_ww_ctx *ctx, bool intr); @@ -20,31 +19,23 @@ void i915_gem_ww_ctx_fini(struct i915_gem_ww_ctx *ctx); int __must_check i915_gem_ww_ctx_backoff(struct i915_gem_ww_ctx *ctx); void i915_gem_ww_unlock_single(struct drm_i915_gem_object *obj); -/* Internal functions used by the inlines! Don't use. */ +/* Internal function used by the inlines! Don't use. */ static inline int __i915_gem_ww_fini(struct i915_gem_ww_ctx *ww, int err) { - ww->loop = 0; if (err == -EDEADLK) { err = i915_gem_ww_ctx_backoff(ww); if (!err) - ww->loop = 1; + err = -EDEADLK; } - if (!ww->loop) + if (err != -EDEADLK) i915_gem_ww_ctx_fini(ww); return err; } -static inline void -__i915_gem_ww_init(struct i915_gem_ww_ctx *ww, bool intr) -{ - i915_gem_ww_ctx_init(ww, intr); - ww->loop = 1; -} - -#define for_i915_gem_ww(_ww, _err, _intr) \ - for (__i915_gem_ww_init(_ww, _intr); (_ww)->loop; \ - _err = __i915_gem_ww_fini(_ww, _err)) - +#define for_i915_gem_ww(_ww, _err, _intr) \ + for (i915_gem_ww_ctx_init(_ww, _intr), (_err) = -EDEADLK; \ + (_err) == -EDEADLK; \ + (_err) = __i915_gem_ww_fini(_ww, _err)) #endif From 5359b745146aa596026addffd7843e1735db7bbd Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 19 Aug 2021 10:34:18 +0100 Subject: [PATCH 20/85] drm/i915/buddy: add some pretty printing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the debug hook for the buddy resource manager. For this we want to print out the status of the memory manager, including how much memory is still allocatable, what page sizes we have etc. This will be triggered when TTM is unable to fulfil an allocation request for device local-memory. v2(Thomas): - s/MB/MiB - s/KB/KiB Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210819093419.295636-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_buddy.c | 45 +++++++++++++++++++ drivers/gpu/drm/i915/i915_buddy.h | 8 ++++ drivers/gpu/drm/i915/i915_ttm_buddy_manager.c | 20 ++++++++- 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_buddy.c b/drivers/gpu/drm/i915/i915_buddy.c index 7b274c51cac0..6e2ad68f8f3f 100644 --- a/drivers/gpu/drm/i915/i915_buddy.c +++ b/drivers/gpu/drm/i915/i915_buddy.c @@ -4,6 +4,7 @@ */ #include +#include #include "i915_buddy.h" @@ -82,6 +83,7 @@ int i915_buddy_init(struct i915_buddy_mm *mm, u64 size, u64 chunk_size) size = round_down(size, chunk_size); mm->size = size; + mm->avail = size; mm->chunk_size = chunk_size; mm->max_order = ilog2(size) - ilog2(chunk_size); @@ -155,6 +157,8 @@ void i915_buddy_fini(struct i915_buddy_mm *mm) i915_block_free(mm, mm->roots[i]); } + GEM_WARN_ON(mm->avail != mm->size); + kfree(mm->roots); kfree(mm->free_list); } @@ -230,6 +234,7 @@ void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block) { GEM_BUG_ON(!i915_buddy_block_is_allocated(block)); + mm->avail += i915_buddy_block_size(mm, block); __i915_buddy_free(mm, block); } @@ -283,6 +288,7 @@ i915_buddy_alloc(struct i915_buddy_mm *mm, unsigned int order) } mark_allocated(block); + mm->avail -= i915_buddy_block_size(mm, block); kmemleak_update_trace(block); return block; @@ -368,6 +374,7 @@ int i915_buddy_alloc_range(struct i915_buddy_mm *mm, } mark_allocated(block); + mm->avail -= i915_buddy_block_size(mm, block); list_add_tail(&block->link, &allocated); continue; } @@ -402,6 +409,44 @@ err_free: return err; } +void i915_buddy_block_print(struct i915_buddy_mm *mm, + struct i915_buddy_block *block, + struct drm_printer *p) +{ + u64 start = i915_buddy_block_offset(block); + u64 size = i915_buddy_block_size(mm, block); + + drm_printf(p, "%#018llx-%#018llx: %llu\n", start, start + size, size); +} + +void i915_buddy_print(struct i915_buddy_mm *mm, struct drm_printer *p) +{ + int order; + + drm_printf(p, "chunk_size: %lluKiB, total: %lluMiB, free: %lluMiB\n", + mm->chunk_size >> 10, mm->size >> 20, mm->avail >> 20); + + for (order = mm->max_order; order >= 0; order--) { + struct i915_buddy_block *block; + u64 count = 0, free; + + list_for_each_entry(block, &mm->free_list[order], link) { + GEM_BUG_ON(!i915_buddy_block_is_free(block)); + count++; + } + + drm_printf(p, "order-%d ", order); + + free = count * (mm->chunk_size << order); + if (free < SZ_1M) + drm_printf(p, "free: %lluKiB", free >> 10); + else + drm_printf(p, "free: %lluMiB", free >> 20); + + drm_printf(p, ", pages: %llu\n", count); + } +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_buddy.c" #endif diff --git a/drivers/gpu/drm/i915/i915_buddy.h b/drivers/gpu/drm/i915/i915_buddy.h index 3940d632f208..7077742112ac 100644 --- a/drivers/gpu/drm/i915/i915_buddy.h +++ b/drivers/gpu/drm/i915/i915_buddy.h @@ -10,6 +10,8 @@ #include #include +#include + struct i915_buddy_block { #define I915_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) #define I915_BUDDY_HEADER_STATE GENMASK_ULL(11, 10) @@ -69,6 +71,7 @@ struct i915_buddy_mm { /* Must be at least PAGE_SIZE */ u64 chunk_size; u64 size; + u64 avail; }; static inline u64 @@ -129,6 +132,11 @@ void i915_buddy_free(struct i915_buddy_mm *mm, struct i915_buddy_block *block); void i915_buddy_free_list(struct i915_buddy_mm *mm, struct list_head *objects); +void i915_buddy_print(struct i915_buddy_mm *mm, struct drm_printer *p); +void i915_buddy_block_print(struct i915_buddy_mm *mm, + struct i915_buddy_block *block, + struct drm_printer *p); + void i915_buddy_module_exit(void); int i915_buddy_module_init(void); diff --git a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c index 6877362f6b85..d59fbb019032 100644 --- a/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c +++ b/drivers/gpu/drm/i915/i915_ttm_buddy_manager.c @@ -126,12 +126,30 @@ static void i915_ttm_buddy_man_free(struct ttm_resource_manager *man, kfree(bman_res); } +static void i915_ttm_buddy_man_debug(struct ttm_resource_manager *man, + struct drm_printer *printer) +{ + struct i915_ttm_buddy_manager *bman = to_buddy_manager(man); + struct i915_buddy_block *block; + + mutex_lock(&bman->lock); + drm_printf(printer, "default_page_size: %lluKiB\n", + bman->default_page_size >> 10); + + i915_buddy_print(&bman->mm, printer); + + drm_printf(printer, "reserved:\n"); + list_for_each_entry(block, &bman->reserved, link) + i915_buddy_block_print(&bman->mm, block, printer); + mutex_unlock(&bman->lock); +} + static const struct ttm_resource_manager_func i915_ttm_buddy_manager_func = { .alloc = i915_ttm_buddy_man_alloc, .free = i915_ttm_buddy_man_free, + .debug = i915_ttm_buddy_man_debug, }; - /** * i915_ttm_buddy_man_init - Setup buddy allocator based ttm manager * @bdev: The ttm device From 8c3363c67b885fe40f50a8010a0768c4fd1e4b95 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 19 Aug 2021 10:34:19 +0100 Subject: [PATCH 21/85] drm/i915/debugfs: hook up ttm_resource_manager_debug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should give a more complete view of the various bits of internal resource manager state, for device local-memory. v2(Thomas): - Move the region printing into a nice helper Signed-off-by: Matthew Auld Cc: Thomas Hellström Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210819093419.295636-2-matthew.auld@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/intel_memory_region.c | 12 ++++++++++++ drivers/gpu/drm/i915/intel_memory_region.h | 4 ++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6c83da3956b9..2829df0aa65c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -238,6 +238,7 @@ i915_debugfs_describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) static int i915_gem_object_info(struct seq_file *m, void *data) { struct drm_i915_private *i915 = node_to_i915(m->private); + struct drm_printer p = drm_seq_file_printer(m); struct intel_memory_region *mr; enum intel_region_id id; @@ -246,8 +247,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) atomic_read(&i915->mm.free_count), i915->mm.shrink_memory); for_each_memory_region(mr, i915, id) - seq_printf(m, "%s: total:%pa, available:%pa bytes\n", - mr->name, &mr->total, &mr->avail); + intel_memory_region_debug(mr, &p); return 0; } diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index 779eb2fa90b6..e7f7e6627750 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -78,6 +78,18 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, return i915_ttm_buddy_man_reserve(man, offset, size); } +void intel_memory_region_debug(struct intel_memory_region *mr, + struct drm_printer *printer) +{ + drm_printf(printer, "%s: ", mr->name); + + if (mr->region_private) + ttm_resource_manager_debug(mr->region_private, printer); + else + drm_printf(printer, "total:%pa, available:%pa bytes\n", + &mr->total, &mr->avail); +} + struct intel_memory_region * intel_memory_region_create(struct drm_i915_private *i915, resource_size_t start, diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 1f2b96efa69d..3feae3353d33 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -15,6 +15,7 @@ struct drm_i915_private; struct drm_i915_gem_object; +struct drm_printer; struct intel_memory_region; struct sg_table; struct ttm_resource; @@ -127,6 +128,9 @@ int intel_memory_region_reserve(struct intel_memory_region *mem, resource_size_t offset, resource_size_t size); +void intel_memory_region_debug(struct intel_memory_region *mr, + struct drm_printer *printer); + struct intel_memory_region * i915_gem_ttm_system_setup(struct drm_i915_private *i915, u16 type, u16 instance); From ff12ce2c9cb1cd09017151424db66de803984abc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Aug 2021 14:36:00 +0300 Subject: [PATCH 22/85] drm/i915/gt: Potential error pointer dereference in pinned_context() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the intel_engine_create_pinned_context() function returns an error pointer, then dereferencing "ce" will Oops. Use "vm" instead of "ce->vm". Fixes: cf586021642d ("drm/i915/gt: Pipelined page migration") Signed-off-by: Dan Carpenter Reviewed-by: Thomas Hellström Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210813113600.GC30697@kili --- drivers/gpu/drm/i915/gt/intel_migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_migrate.c b/drivers/gpu/drm/i915/gt/intel_migrate.c index d0a7c934fd3b..1dac21aa7e5c 100644 --- a/drivers/gpu/drm/i915/gt/intel_migrate.c +++ b/drivers/gpu/drm/i915/gt/intel_migrate.c @@ -177,7 +177,7 @@ static struct intel_context *pinned_context(struct intel_gt *gt) ce = intel_engine_create_pinned_context(engine, vm, SZ_512K, I915_GEM_HWS_MIGRATE, &key, "migrate"); - i915_vm_put(ce->vm); + i915_vm_put(vm); return ce; } From 81a14bedae5ba88e2e2c6a53fd8f62dddabf51d2 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Thu, 19 Aug 2021 14:03:49 -0700 Subject: [PATCH 23/85] drm/i915/dg1: remove __maybe_unused leftover This was added in commit 05e265841f7e ("drm/i915/dg1: add initial DG-1 definitions") so we could continue to add support for DG1 without risk to expose a broken UAPI. Now that we added DG1 to the PCI ID list i915 may bind to, remove the leftover. Fixes: d5ef86b38e4c ("drm/i915: Add pci ids and uapi for DG1") Signed-off-by: Lucas De Marchi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210819210349.95103-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 607459251b49..3fb55e2404b6 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -909,7 +909,7 @@ static const struct intel_device_info rkl_info = { .has_snoop = 1, \ .is_dgfx = 1 -static const struct intel_device_info dg1_info __maybe_unused = { +static const struct intel_device_info dg1_info = { GEN12_FEATURES, DGFX_FEATURES, PLATFORM(INTEL_DG1), From ac5a2dff428ab59b5c5bbb4b28311141aa461c07 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 13 Aug 2021 10:11:58 -0700 Subject: [PATCH 24/85] drm/i915/selftest: Fix use of err in igt_reset_{fail, nop}_engine() Clang warns: In file included from drivers/gpu/drm/i915/gt/intel_reset.c:1514: drivers/gpu/drm/i915/gt/selftest_hangcheck.c:465:62: warning: variable 'err' is uninitialized when used here [-Wuninitialized] pr_err("[%s] Create context failed: %d!\n", engine->name, err); ^~~ ... drivers/gpu/drm/i915/gt/selftest_hangcheck.c:580:62: warning: variable 'err' is uninitialized when used here [-Wuninitialized] pr_err("[%s] Create context failed: %d!\n", engine->name, err); ^~~ ... 2 warnings generated. This appears to be a copy and paste issue. Use ce directly using the %pe specifier to pretty print the error code so that err is not used uninitialized in these functions. Fixes: 3a7b72665ea5 ("drm/i915/selftest: Bump selftest timeouts for hangcheck") Reported-by: Dan Carpenter Signed-off-by: Nathan Chancellor Reviewed-by: Joonas Lahtinen Signed-off-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20210813171158.2665823-1-nathan@kernel.org --- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 08f011f893b2..2c1ed32ca5ac 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -462,7 +462,7 @@ static int igt_reset_nop_engine(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { - pr_err("[%s] Create context failed: %d!\n", engine->name, err); + pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); return PTR_ERR(ce); } @@ -577,7 +577,7 @@ static int igt_reset_fail_engine(void *arg) ce = intel_context_create(engine); if (IS_ERR(ce)) { - pr_err("[%s] Create context failed: %d!\n", engine->name, err); + pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); return PTR_ERR(ce); } From 6321a722374bf23b09095cf3077c59cf5e6d3a78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 12 Jul 2021 19:18:15 +0300 Subject: [PATCH 25/85] drm/i915: s/0/NULL/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use NULL where appropriate. drivers/gpu/drm/i915/gt/intel_ring_submission.c:1210:24: warning: Using plain integer as NULL pointer Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210712161815.24776-2-ville.syrjala@linux.intel.com Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 2958e2fae380..3c65efcb7bed 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -1265,7 +1265,7 @@ static struct i915_vma *gen7_ctx_vma(struct intel_engine_cs *engine) int size, err; if (GRAPHICS_VER(engine->i915) != 7 || engine->class != RENDER_CLASS) - return 0; + return NULL; err = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */); if (err < 0) From 5e076529e2652244ec20a86d8f99ba634a16c4f4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Jul 2021 20:17:03 -0700 Subject: [PATCH 26/85] drm/i915/selftests: Increase timeout in i915_gem_contexts selftests Like in the case of several other selftests, generating lots of requests in a loop takes a bit longer with GuC submission. Increase a timeout in i915_gem_contexts selftest to take this into account. Signed-off-by: Matthew Brost Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210727031703.40395-2-matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 8eb5050f8cb3..4d2758718d21 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -94,7 +94,7 @@ static int live_nop_switch(void *arg) rq = i915_request_get(this); i915_request_add(this); } - if (i915_request_wait(rq, 0, HZ / 5) < 0) { + if (i915_request_wait(rq, 0, HZ) < 0) { pr_err("Failed to populated %d contexts\n", nctx); intel_gt_set_wedged(&i915->gt); i915_request_put(rq); From ae4b0eacaffe6b69ace47b224909bf757767d40b Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Thu, 5 Aug 2021 09:36:45 -0700 Subject: [PATCH 27/85] drm/i915/dg2: Add new LRI reg offsets New LRI register offsets were introduced for DG2, this patch adds those extra registers, and create new register table for setting offsets to compare with HW generated context image - especially for gt_lrc test. Also updates general purpose register with scratch offset for DG2, in order to use it for live_lrc_fixed selftest. Cc: Chris P Wilson Cc: Prathap Kumar Valsan Signed-off-by: Akeem G Abodunrin Signed-off-by: Matt Roper Reviewed-by: Caz Yokoyama Link: https://patchwork.freedesktop.org/patch/msgid/20210805163647.801064-8-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_lrc.c | 85 ++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index bb4af4977920..6ba8daea2f56 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -226,6 +226,40 @@ static const u8 gen12_xcs_offsets[] = { END }; +static const u8 dg2_xcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + END +}; + static const u8 gen8_rcs_offsets[] = { NOP(1), LRI(14, POSTED), @@ -525,6 +559,49 @@ static const u8 xehp_rcs_offsets[] = { END }; +static const u8 dg2_rcs_offsets[] = { + NOP(1), + LRI(15, POSTED), + REG16(0x244), + REG(0x034), + REG(0x030), + REG(0x038), + REG(0x03c), + REG(0x168), + REG(0x140), + REG(0x110), + REG(0x1c0), + REG(0x1c4), + REG(0x1c8), + REG(0x180), + REG16(0x2b4), + REG(0x120), + REG(0x124), + + NOP(1), + LRI(9, POSTED), + REG16(0x3a8), + REG16(0x28c), + REG16(0x288), + REG16(0x284), + REG16(0x280), + REG16(0x27c), + REG16(0x278), + REG16(0x274), + REG16(0x270), + + LRI(3, POSTED), + REG(0x1b0), + REG16(0x5a8), + REG16(0x5ac), + + NOP(6), + LRI(1, 0), + REG(0x0c8), + + END +}; + #undef END #undef REG16 #undef REG @@ -543,7 +620,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) !intel_engine_has_relative_mmio(engine)); if (engine->class == RENDER_CLASS) { - if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + return dg2_rcs_offsets; + else if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 50)) return xehp_rcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_rcs_offsets; @@ -554,7 +633,9 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) else return gen8_rcs_offsets; } else { - if (GRAPHICS_VER(engine->i915) >= 12) + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + return dg2_xcs_offsets; + else if (GRAPHICS_VER(engine->i915) >= 12) return gen12_xcs_offsets; else if (GRAPHICS_VER(engine->i915) >= 9) return gen9_xcs_offsets; From d8ac30fd479cf0f0b37c7f06b06b50c18f57c548 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 13 Aug 2021 16:43:30 +0200 Subject: [PATCH 28/85] drm/i915/ttm: Reorganize the ttm move code somewhat MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to make the code a bit more readable and to facilitate async memcpy moves, reorganize the move code a little. Determine at an early stage whether to copy or to clear. v2: - Don't set up the memcpy iterators unless we are actually going to memcpy. Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://lore.kernel.org/r/20210813144331.372957-2-thomas.hellstrom@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20210813144331.372957-2-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 77 ++++++++++++++----------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 771eb2963123..d07de18529ab 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -431,6 +431,7 @@ i915_ttm_resource_get_st(struct drm_i915_gem_object *obj, } static int i915_ttm_accel_move(struct ttm_buffer_object *bo, + bool clear, struct ttm_resource *dst_mem, struct sg_table *dst_st) { @@ -449,13 +450,10 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return -EINVAL; dst_level = i915_ttm_cache_level(i915, dst_mem, ttm); - if (!ttm || !ttm_tt_is_populated(ttm)) { + if (clear) { if (bo->type == ttm_bo_type_kernel) return -EINVAL; - if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return 0; - intel_engine_pm_get(i915->gt.migrate.context->engine); ret = intel_context_migrate_clear(i915->gt.migrate.context, NULL, dst_st->sgl, dst_level, @@ -489,6 +487,41 @@ static int i915_ttm_accel_move(struct ttm_buffer_object *bo, return ret; } +static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, + struct ttm_resource *dst_mem, + struct sg_table *dst_st) +{ + int ret; + + ret = i915_ttm_accel_move(bo, clear, dst_mem, dst_st); + if (ret) { + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + struct intel_memory_region *dst_reg, *src_reg; + union { + struct ttm_kmap_iter_tt tt; + struct ttm_kmap_iter_iomap io; + } _dst_iter, _src_iter; + struct ttm_kmap_iter *dst_iter, *src_iter; + + dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); + src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); + GEM_BUG_ON(!dst_reg || !src_reg); + + dst_iter = !cpu_maps_iomem(dst_mem) ? + ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, + dst_st, dst_reg->region.start); + + src_iter = !cpu_maps_iomem(bo->resource) ? + ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : + ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, + obj->ttm.cached_io_st, + src_reg->region.start); + + ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + } +} + static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct ttm_operation_ctx *ctx, struct ttm_resource *dst_mem, @@ -497,19 +530,11 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); struct ttm_resource_manager *dst_man = ttm_manager_type(bo->bdev, dst_mem->mem_type); - struct intel_memory_region *dst_reg, *src_reg; - union { - struct ttm_kmap_iter_tt tt; - struct ttm_kmap_iter_iomap io; - } _dst_iter, _src_iter; - struct ttm_kmap_iter *dst_iter, *src_iter; + struct ttm_tt *ttm = bo->ttm; struct sg_table *dst_st; + bool clear; int ret; - dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type); - src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type); - GEM_BUG_ON(!dst_reg || !src_reg); - /* Sync for now. We could do the actual copy async. */ ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -526,9 +551,8 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, } /* Populate ttm with pages if needed. Typically system memory. */ - if (bo->ttm && (dst_man->use_tt || - (bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { - ret = ttm_tt_populate(bo->bdev, bo->ttm, ctx); + if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED))) { + ret = ttm_tt_populate(bo->bdev, ttm, ctx); if (ret) return ret; } @@ -537,23 +561,10 @@ static int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, if (IS_ERR(dst_st)) return PTR_ERR(dst_st); - ret = i915_ttm_accel_move(bo, dst_mem, dst_st); - if (ret) { - /* If we start mapping GGTT, we can no longer use man::use_tt here. */ - dst_iter = !cpu_maps_iomem(dst_mem) ? - ttm_kmap_iter_tt_init(&_dst_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_dst_iter.io, &dst_reg->iomap, - dst_st, dst_reg->region.start); + clear = !cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) + __i915_ttm_move(bo, clear, dst_mem, dst_st); - src_iter = !cpu_maps_iomem(bo->resource) ? - ttm_kmap_iter_tt_init(&_src_iter.tt, bo->ttm) : - ttm_kmap_iter_iomap_init(&_src_iter.io, &src_reg->iomap, - obj->ttm.cached_io_st, - src_reg->region.start); - - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); - } - /* Below dst_mem becomes bo->resource. */ ttm_bo_move_sync_cleanup(bo, dst_mem); i915_ttm_adjust_domains_after_move(obj); i915_ttm_free_cached_io_st(obj); From 669076334bfa7915e6856cf49c6408a2ec07df7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Fri, 13 Aug 2021 16:43:31 +0200 Subject: [PATCH 29/85] drm/ttm, drm/i915: Update ttm_move_memcpy for async use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buffer object argument to ttm_move_memcpy was only used to determine whether the destination memory should be cleared only or whether we should copy data. Replace it with a "clear" bool, and update the callers. The intention here is to be able to use ttm_move_memcpy() async under a dma-fence as a fallback if an accelerated blit fails in a security- critical path where data might leak if the blit is not properly performed. For that purpose the bo is an unsuitable argument since its relevant members might already have changed at call time. Finally, update the ttm_move_memcpy kerneldoc that seems to have ended up with a stale version. Cc: Christian König Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Reviewed-by: Christian König Link: https://lore.kernel.org/r/20210813144331.372957-3-thomas.hellstrom@linux.intel.com Link: https://patchwork.freedesktop.org/patch/msgid/20210813144331.372957-3-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 2 +- drivers/gpu/drm/ttm/ttm_bo_util.c | 20 ++++++++++---------- include/drm/ttm/ttm_bo_driver.h | 2 +- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index d07de18529ab..6995c66cbe21 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -518,7 +518,7 @@ static void __i915_ttm_move(struct ttm_buffer_object *bo, bool clear, obj->ttm.cached_io_st, src_reg->region.start); - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); } } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..e3747f069674 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -75,22 +75,21 @@ void ttm_mem_io_free(struct ttm_device *bdev, /** * ttm_move_memcpy - Helper to perform a memcpy ttm move operation. - * @bo: The struct ttm_buffer_object. - * @new_mem: The struct ttm_resource we're moving to (copy destination). - * @new_iter: A struct ttm_kmap_iter representing the destination resource. + * @clear: Whether to clear rather than copy. + * @num_pages: Number of pages of the operation. + * @dst_iter: A struct ttm_kmap_iter representing the destination resource. * @src_iter: A struct ttm_kmap_iter representing the source resource. * * This function is intended to be able to move out async under a * dma-fence if desired. */ -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter) { const struct ttm_kmap_iter_ops *dst_ops = dst_iter->ops; const struct ttm_kmap_iter_ops *src_ops = src_iter->ops; - struct ttm_tt *ttm = bo->ttm; struct dma_buf_map src_map, dst_map; pgoff_t i; @@ -99,10 +98,7 @@ void ttm_move_memcpy(struct ttm_buffer_object *bo, return; /* Don't move nonexistent data. Clear destination instead. */ - if (src_ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm))) { - if (ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)) - return; - + if (clear) { for (i = 0; i < num_pages; ++i) { dst_ops->map_local(dst_iter, &dst_map, i); if (dst_map.is_iomem) @@ -146,6 +142,7 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, struct ttm_kmap_iter_linear_io io; } _dst_iter, _src_iter; struct ttm_kmap_iter *dst_iter, *src_iter; + bool clear; int ret = 0; if (ttm && ((ttm->page_flags & TTM_PAGE_FLAG_SWAPPED) || @@ -169,7 +166,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, goto out_src_iter; } - ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter); + clear = src_iter->ops->maps_tt && (!ttm || !ttm_tt_is_populated(ttm)); + if (!(clear && ttm && !(ttm->page_flags & TTM_PAGE_FLAG_ZERO_ALLOC))) + ttm_move_memcpy(clear, dst_mem->num_pages, dst_iter, src_iter); + src_copy = *src_mem; ttm_bo_move_sync_cleanup(bo, dst_mem); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 68d6069572aa..5f087575194b 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -322,7 +322,7 @@ int ttm_bo_tt_bind(struct ttm_buffer_object *bo, struct ttm_resource *mem); */ void ttm_bo_tt_destroy(struct ttm_buffer_object *bo); -void ttm_move_memcpy(struct ttm_buffer_object *bo, +void ttm_move_memcpy(bool clear, u32 num_pages, struct ttm_kmap_iter *dst_iter, struct ttm_kmap_iter *src_iter); From f3ede209d44d71636890a78fa89c5b1c83340320 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 25 Aug 2021 18:06:23 +0300 Subject: [PATCH 30/85] drm/i915/pci: rename functions to have i915_pci prefix Follow the usual naming conventions. While at it, fix i915_pci.h SPDX license comment format and add header include guards. Cc: Daniel Vetter Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210825150623.28980-1-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_module.c | 4 ++-- drivers/gpu/drm/i915/i915_pci.c | 4 ++-- drivers/gpu/drm/i915/i915_pci.h | 12 ++++++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c index d8b4482c69d0..ab2295dd4500 100644 --- a/drivers/gpu/drm/i915/i915_module.c +++ b/drivers/gpu/drm/i915/i915_module.c @@ -67,8 +67,8 @@ static const struct { { .init = i915_mock_selftests }, { .init = i915_pmu_init, .exit = i915_pmu_exit }, - { .init = i915_register_pci_driver, - .exit = i915_unregister_pci_driver }, + { .init = i915_pci_register_driver, + .exit = i915_pci_unregister_driver }, { .init = i915_perf_sysctl_register, .exit = i915_perf_sysctl_unregister }, }; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 3fb55e2404b6..f0e2f5e67bda 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1216,12 +1216,12 @@ static struct pci_driver i915_pci_driver = { .driver.pm = &i915_pm_ops, }; -int i915_register_pci_driver(void) +int i915_pci_register_driver(void) { return pci_register_driver(&i915_pci_driver); } -void i915_unregister_pci_driver(void) +void i915_pci_unregister_driver(void) { pci_unregister_driver(&i915_pci_driver); } diff --git a/drivers/gpu/drm/i915/i915_pci.h b/drivers/gpu/drm/i915/i915_pci.h index b386f319f52e..ee048c238174 100644 --- a/drivers/gpu/drm/i915/i915_pci.h +++ b/drivers/gpu/drm/i915/i915_pci.h @@ -1,8 +1,12 @@ +/* SPDX-License-Identifier: MIT */ /* - * SPDX-License-Identifier: MIT - * * Copyright © 2021 Intel Corporation */ -int i915_register_pci_driver(void); -void i915_unregister_pci_driver(void); +#ifndef __I915_PCI_H__ +#define __I915_PCI_H__ + +int i915_pci_register_driver(void); +void i915_pci_unregister_driver(void); + +#endif /* __I915_PCI_H__ */ From ba3d8257f2d94ab227af880e3e40868c80ad8d93 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 25 Aug 2021 20:35:59 -0700 Subject: [PATCH 31/85] drm/i915: Ensure wa_init_finish() is called for ctx workaround list A recent restructuring of our context workaround list initialization added an early return for non-render engines; this caused us to potentially miss the wa_init_finish() call at the end of the function. The mistake is pretty harmless --- the only impact is that non-render engines on graphics version 12.50+ platforms we don't trim down the workaround list to reclaim some memory, and we don't print the usual "Initialized 1 context workaround" message in dmesg. Let's change the early return to a jump down to the wa_init_finish() call at the bottom of the function. Reported-by: Tvrtko Ursulin Fixes: 9e9dfd080201 ("drm/i915/dg2: Maintain backward-compatible nested batch behavior") Signed-off-by: Matt Roper Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210826033559.1209020-1-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 688ed04edbf6..94e1937f8d29 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -689,7 +689,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, fakewa_disable_nestedbb_mode(engine, wal); if (engine->class != RENDER_CLASS) - return; + goto done; if (IS_DG1(i915)) dg1_ctx_workarounds_init(engine, wal); @@ -720,6 +720,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, else MISSING_CASE(GRAPHICS_VER(i915)); +done: wa_init_finish(wal); } From f123efebe4361b9b16975fcc3dbc0a6331fa6a14 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 20 Aug 2021 17:49:32 +0200 Subject: [PATCH 32/85] drm/i915: Actually delete gpu reloc selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 8e02cceb1f1f ("drm/i915: delete gpu reloc code") I deleted the gpu relocation code and the selftest include and enabling, but accidentally forgot about the selftest source code. Fix this oversight. Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Joonas Lahtinen Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210820154932.296628-1-daniel.vetter@ffwll.ch --- .../i915/gem/selftests/i915_gem_execbuffer.c | 190 ------------------ 1 file changed, 190 deletions(-) delete mode 100644 drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c deleted file mode 100644 index 16162fc2782d..000000000000 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ /dev/null @@ -1,190 +0,0 @@ -// SPDX-License-Identifier: MIT -/* - * Copyright © 2020 Intel Corporation - */ - -#include "i915_selftest.h" - -#include "gt/intel_engine_pm.h" -#include "selftests/igt_flush_test.h" - -static u64 read_reloc(const u32 *map, int x, const u64 mask) -{ - u64 reloc; - - memcpy(&reloc, &map[x], sizeof(reloc)); - return reloc & mask; -} - -static int __igt_gpu_reloc(struct i915_execbuffer *eb, - struct drm_i915_gem_object *obj) -{ - const unsigned int offsets[] = { 8, 3, 0 }; - const u64 mask = - GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0); - const u32 *map = page_mask_bits(obj->mm.mapping); - struct i915_request *rq; - struct i915_vma *vma; - int err; - int i; - - vma = i915_vma_instance(obj, eb->context->vm, NULL); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_gem_object_lock(obj, &eb->ww); - if (err) - return err; - - err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, PIN_USER | PIN_HIGH); - if (err) - return err; - - /* 8-Byte aligned */ - err = __reloc_entry_gpu(eb, vma, offsets[0] * sizeof(u32), 0); - if (err <= 0) - goto reloc_err; - - /* !8-Byte aligned */ - err = __reloc_entry_gpu(eb, vma, offsets[1] * sizeof(u32), 1); - if (err <= 0) - goto reloc_err; - - /* Skip to the end of the cmd page */ - i = PAGE_SIZE / sizeof(u32) - 1; - i -= eb->reloc_cache.rq_size; - memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size, - MI_NOOP, i); - eb->reloc_cache.rq_size += i; - - /* Force next batch */ - err = __reloc_entry_gpu(eb, vma, offsets[2] * sizeof(u32), 2); - if (err <= 0) - goto reloc_err; - - GEM_BUG_ON(!eb->reloc_cache.rq); - rq = i915_request_get(eb->reloc_cache.rq); - reloc_gpu_flush(eb, &eb->reloc_cache); - GEM_BUG_ON(eb->reloc_cache.rq); - - err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2); - if (err) { - intel_gt_set_wedged(eb->engine->gt); - goto put_rq; - } - - if (!i915_request_completed(rq)) { - pr_err("%s: did not wait for relocations!\n", eb->engine->name); - err = -EINVAL; - goto put_rq; - } - - for (i = 0; i < ARRAY_SIZE(offsets); i++) { - u64 reloc = read_reloc(map, offsets[i], mask); - - if (reloc != i) { - pr_err("%s[%d]: map[%d] %llx != %x\n", - eb->engine->name, i, offsets[i], reloc, i); - err = -EINVAL; - } - } - if (err) - igt_hexdump(map, 4096); - -put_rq: - i915_request_put(rq); -unpin_vma: - i915_vma_unpin(vma); - return err; - -reloc_err: - if (!err) - err = -EIO; - goto unpin_vma; -} - -static int igt_gpu_reloc(void *arg) -{ - struct i915_execbuffer eb; - struct drm_i915_gem_object *scratch; - int err = 0; - u32 *map; - - eb.i915 = arg; - - scratch = i915_gem_object_create_internal(eb.i915, 4096); - if (IS_ERR(scratch)) - return PTR_ERR(scratch); - - map = i915_gem_object_pin_map_unlocked(scratch, I915_MAP_WC); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto err_scratch; - } - - intel_gt_pm_get(&eb.i915->gt); - - for_each_uabi_engine(eb.engine, eb.i915) { - if (intel_engine_requires_cmd_parser(eb.engine) || - intel_engine_using_cmd_parser(eb.engine)) - continue; - - reloc_cache_init(&eb.reloc_cache, eb.i915); - memset(map, POISON_INUSE, 4096); - - intel_engine_pm_get(eb.engine); - eb.context = intel_context_create(eb.engine); - if (IS_ERR(eb.context)) { - err = PTR_ERR(eb.context); - goto err_pm; - } - eb.reloc_pool = NULL; - eb.reloc_context = NULL; - - i915_gem_ww_ctx_init(&eb.ww, false); -retry: - err = intel_context_pin_ww(eb.context, &eb.ww); - if (!err) { - err = __igt_gpu_reloc(&eb, scratch); - - intel_context_unpin(eb.context); - } - if (err == -EDEADLK) { - err = i915_gem_ww_ctx_backoff(&eb.ww); - if (!err) - goto retry; - } - i915_gem_ww_ctx_fini(&eb.ww); - - if (eb.reloc_pool) - intel_gt_buffer_pool_put(eb.reloc_pool); - if (eb.reloc_context) - intel_context_put(eb.reloc_context); - - intel_context_put(eb.context); -err_pm: - intel_engine_pm_put(eb.engine); - if (err) - break; - } - - if (igt_flush_test(eb.i915)) - err = -EIO; - - intel_gt_pm_put(&eb.i915->gt); -err_scratch: - i915_gem_object_put(scratch); - return err; -} - -int i915_gem_execbuffer_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(igt_gpu_reloc), - }; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return i915_live_subtests(tests, i915); -} From 5db1856781e45c9610f7652a19cc656b984235e7 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Mon, 23 Aug 2021 09:31:37 -0700 Subject: [PATCH 33/85] drm/i915/guc: drop guc_communication_enabled The function is only used from within GEM_BUG_ON(), which is causing warnings with Wunneeded-internal-declaration in some builds. Since the function is a simple wrapper around a CT function, we can just call the CT function directly instead. Fixes: 1fb12c587152 ("drm/i915/guc: skip disabling CTBs before sanitizing the GuC") Reported-by: kernel test robot Signed-off-by: Daniele Ceraolo Spurio Cc: Matthew Brost Cc: John Harrison Reviewed-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210823163137.19770-1-daniele.ceraolospurio@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index b104fb7607eb..86c318516e14 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -172,11 +172,6 @@ void intel_uc_driver_remove(struct intel_uc *uc) __uc_free_load_err_log(uc); } -static inline bool guc_communication_enabled(struct intel_guc *guc) -{ - return intel_guc_ct_enabled(&guc->ct); -} - /* * Events triggered while CT buffers are disabled are logged in the SCRATCH_15 * register using the same bits used in the CT message payload. Since our @@ -210,7 +205,7 @@ static void guc_get_mmio_msg(struct intel_guc *guc) static void guc_handle_mmio_msg(struct intel_guc *guc) { /* we need communication to be enabled to reply to GuC */ - GEM_BUG_ON(!guc_communication_enabled(guc)); + GEM_BUG_ON(!intel_guc_ct_enabled(&guc->ct)); spin_lock_irq(&guc->irq_lock); if (guc->mmio_msg) { @@ -226,7 +221,7 @@ static int guc_enable_communication(struct intel_guc *guc) struct drm_i915_private *i915 = gt->i915; int ret; - GEM_BUG_ON(guc_communication_enabled(guc)); + GEM_BUG_ON(intel_guc_ct_enabled(&guc->ct)); ret = i915_inject_probe_error(i915, -ENXIO); if (ret) @@ -662,7 +657,7 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication) return 0; /* Make sure we enable communication if and only if it's disabled */ - GEM_BUG_ON(enable_communication == guc_communication_enabled(guc)); + GEM_BUG_ON(enable_communication == intel_guc_ct_enabled(&guc->ct)); if (enable_communication) guc_enable_communication(guc); From 450cede7f3804ca7f8b3da210ebefa61c0958f22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= Date: Tue, 31 Aug 2021 14:29:31 +0200 Subject: [PATCH 34/85] drm/i915/gem: Fix the mman selftest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the I915_MMAP_TYPE_FIXED mmap type requires the TTM backend, so for that mmap type, use __i915_gem_object_create_user() instead of i915_gem_object_create_internal(), as we really want to tests objects mmap-able by user-space. This also means that the out-of-space error happens at object creation and returns -ENXIO rather than -ENOSPC, so fix the code up to expect that on out-of-offset-space errors. Finally only use I915_MMAP_TYPE_FIXED for LMEM and SMEM for now if testing on LMEM-capable devices. For stolen LMEM, we still take the same path as for integrated, as that haven't been moved over to TTM yet, and user-space should not be able to create out of stolen LMEM anyway. v2: - Check the presence of the obj->ops->mmap_offset callback rather than hardcoding the supported mmap regions in can_mmap() (Maarten Lankhorst) Fixes: 7961c5b60f23 ("drm/i915: Add TTM offset argument to mmap.") Cc: Maarten Lankhorst Signed-off-by: Thomas Hellström Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20210831122931.157536-1-thomas.hellstrom@linux.intel.com --- .../drm/i915/gem/selftests/i915_gem_mman.c | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index b20f5621f62b..a2c34e5a1c54 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -581,6 +581,20 @@ static enum i915_mmap_type default_mapping(struct drm_i915_private *i915) return I915_MMAP_TYPE_GTT; } +static struct drm_i915_gem_object * +create_sys_or_internal(struct drm_i915_private *i915, + unsigned long size) +{ + if (HAS_LMEM(i915)) { + struct intel_memory_region *sys_region = + i915->mm.regions[INTEL_REGION_SMEM]; + + return __i915_gem_object_create_user(i915, size, &sys_region, 1); + } + + return i915_gem_object_create_internal(i915, size); +} + static bool assert_mmap_offset(struct drm_i915_private *i915, unsigned long size, int expected) @@ -589,7 +603,7 @@ static bool assert_mmap_offset(struct drm_i915_private *i915, u64 offset; int ret; - obj = i915_gem_object_create_internal(i915, size); + obj = create_sys_or_internal(i915, size); if (IS_ERR(obj)) return expected && expected == PTR_ERR(obj); @@ -633,6 +647,7 @@ static int igt_mmap_offset_exhaustion(void *arg) struct drm_mm_node *hole, *next; int loop, err = 0; u64 offset; + int enospc = HAS_LMEM(i915) ? -ENXIO : -ENOSPC; /* Disable background reaper */ disable_retire_worker(i915); @@ -683,14 +698,14 @@ static int igt_mmap_offset_exhaustion(void *arg) } /* Too large */ - if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) { + if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, enospc)) { pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); err = -EINVAL; goto out; } /* Fill the hole, further allocation attempts should then fail */ - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + obj = create_sys_or_internal(i915, PAGE_SIZE); if (IS_ERR(obj)) { err = PTR_ERR(obj); pr_err("Unable to create object for reclaimed hole\n"); @@ -703,7 +718,7 @@ static int igt_mmap_offset_exhaustion(void *arg) goto err_obj; } - if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { + if (!assert_mmap_offset(i915, PAGE_SIZE, enospc)) { pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); err = -EINVAL; goto err_obj; @@ -839,10 +854,9 @@ static int wc_check(struct drm_i915_gem_object *obj) static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type) { - struct drm_i915_private *i915 = to_i915(obj->base.dev); bool no_map; - if (HAS_LMEM(i915)) + if (obj->ops->mmap_offset) return type == I915_MMAP_TYPE_FIXED; else if (type == I915_MMAP_TYPE_FIXED) return false; From b62aa57e3c78d749a1932b636c8fa4e1ef655f4d Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Fri, 3 Sep 2021 14:51:49 +0530 Subject: [PATCH 35/85] drm/i915/gt: Add support of mocs propagation Now there are lots of Command and registers that require mocs index programming. So propagating mocs_index from mocs to gt so that it can be used directly without having platform-specific checks. V2: Changed 'i915_mocs_index_gt' to anonymous structure. Cc: CQ Tang Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210903092153.535736-2-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_gt.c | 2 ++ drivers/gpu/drm/i915/gt/intel_gt_types.h | 4 ++++ drivers/gpu/drm/i915/gt/intel_mocs.c | 13 +++++++++++++ drivers/gpu/drm/i915/gt/intel_mocs.h | 1 + 4 files changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 62d40c986642..2aeaae036a6f 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -682,6 +682,8 @@ int intel_gt_init(struct intel_gt *gt) goto err_pm; } + set_mocs_index(gt); + err = intel_engines_init(gt); if (err) goto err_engines; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index a81e21bf1bd1..6fdcde64c180 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -192,6 +192,10 @@ struct intel_gt { unsigned long mslice_mask; } info; + + struct { + u8 uc_index; + } mocs; }; enum intel_gt_scratch_field { diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 582c4423b95d..7ccac15d9a33 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -22,6 +22,7 @@ struct drm_i915_mocs_table { unsigned int size; unsigned int n_entries; const struct drm_i915_mocs_entry *table; + u8 uc_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -340,14 +341,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, { unsigned int flags; + memset(table, 0, sizeof(struct drm_i915_mocs_table)); + if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; + table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -504,6 +509,14 @@ static u32 global_mocs_offset(void) return i915_mmio_reg_offset(GEN12_GLOBAL_MOCS(0)); } +void set_mocs_index(struct intel_gt *gt) +{ + struct drm_i915_mocs_table table; + + get_mocs_settings(gt->i915, &table); + gt->mocs.uc_index = table.uc_index; +} + void intel_mocs_init(struct intel_gt *gt) { struct drm_i915_mocs_table table; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.h b/drivers/gpu/drm/i915/gt/intel_mocs.h index d83274f5163b..8a09d64b115f 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.h +++ b/drivers/gpu/drm/i915/gt/intel_mocs.h @@ -36,5 +36,6 @@ struct intel_gt; void intel_mocs_init(struct intel_gt *gt); void intel_mocs_init_engine(struct intel_engine_cs *engine); +void set_mocs_index(struct intel_gt *gt); #endif From d79a1d71318014066b6e1c78e5457a105d67f2ea Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Fri, 3 Sep 2021 14:51:50 +0530 Subject: [PATCH 36/85] drm/i915/gt: Set CMD_CCTL to UC for Gen12 Onward Cache-control registers for Command Stream(CMD_CCTL) are used to set catchability for memory writes and reads outputted by Command Streamers on Gen12 onward platforms. These registers need to point un-cached(UC) MOCS index. Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210903092153.535736-3-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 27 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_reg.h | 17 +++++++++++++ 2 files changed, 44 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 94e1937f8d29..ef7255a44b9a 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1640,6 +1640,31 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) i915_mmio_reg_offset(RING_NOPID(base))); } +/* + * engine_fake_wa_init(), a place holder to program the registers + * which are not part of an official workaround defined by the + * hardware team. + * Adding programming of those register inside workaround will + * allow utilizing wa framework to proper application and verification. + */ +static void +engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) +{ + u8 mocs; + + /* + * RING_CMD_CCTL are need to be programed to un-cached + * for memory writes and reads outputted by Command + * Streamers on Gen12 onward platforms. + */ + if (GRAPHICS_VER(engine->i915) >= 12) { + mocs = engine->gt->mocs.uc_index; + wa_masked_field_set(wal, + RING_CMD_CCTL(engine->mmio_base), + CMD_CCTL_MOCS_MASK, + CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); + } +} static void rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { @@ -2080,6 +2105,8 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal if (I915_SELFTEST_ONLY(GRAPHICS_VER(engine->i915) < 4)) return; + engine_fake_wa_init(engine, wal); + if (engine->class == RENDER_CLASS) rcs_engine_wa_init(engine, wal); else diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b289f5cefa59..0f44fcf8d6a4 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2560,6 +2560,23 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_HWS_PGA(base) _MMIO((base) + 0x80) #define RING_ID(base) _MMIO((base) + 0x8c) #define RING_HWS_PGA_GEN6(base) _MMIO((base) + 0x2080) + +#define RING_CMD_CCTL(base) _MMIO((base) + 0xc4) +/* + * CMD_CCTL read/write fields take a MOCS value and _not_ a table index. + * The lsb of each can be considered a separate enabling bit for encryption. + * 6:0 == default MOCS value for reads => 6:1 == table index for reads. + * 13:7 == default MOCS value for writes => 13:8 == table index for writes. + * 15:14 == Reserved => 31:30 are set to 0. + */ +#define CMD_CCTL_WRITE_OVERRIDE_MASK REG_GENMASK(13, 7) +#define CMD_CCTL_READ_OVERRIDE_MASK REG_GENMASK(6, 0) +#define CMD_CCTL_MOCS_MASK (CMD_CCTL_WRITE_OVERRIDE_MASK | \ + CMD_CCTL_READ_OVERRIDE_MASK) +#define CMD_CCTL_MOCS_OVERRIDE(write, read) \ + (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ + REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) From c6b248489dc3f780ee91e187a1431825d6f298fd Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Fri, 3 Sep 2021 14:51:51 +0530 Subject: [PATCH 37/85] drm/i915/gt: Set BLIT_CCTL reg to un-cached Blitter commands which do not have MOCS fields rely on cacheability of BlitterCacheControlRegister which was mapped to index 0 by default.Once we changed the MOCS value of index 0 to L3 WB, tests like gem_linear_blits started failing due to a change in cacheability from UC to WB. Program and place the BlitterCacheControlRegister in build_aux_regs(). Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210903092153.535736-4-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 43 ++++++++++++++++++++- drivers/gpu/drm/i915/i915_reg.h | 9 +++++ 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index ef7255a44b9a..c314d4917b6b 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -675,6 +675,41 @@ static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine, wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN); } +static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + u8 mocs; + + /* + * Some blitter commands do not have a field for MOCS, those + * commands will use MOCS index pointed by BLIT_CCTL. + * BLIT_CCTL registers are needed to be programmed to un-cached. + */ + if (engine->class == COPY_ENGINE_CLASS) { + mocs = engine->gt->mocs.uc_index; + wa_write_clr_set(wal, + BLIT_CCTL(engine->mmio_base), + BLIT_CCTL_MASK, + BLIT_CCTL_MOCS(mocs, mocs)); + } +} + +/* + * gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround + * defined by the hardware team, but it programming general context registers. + * Adding those context register programming in context workaround + * allow us to use the wa framework for proper application and validation. + */ +static void +gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine, + struct i915_wa_list *wal) +{ + if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55)) + fakewa_disable_nestedbb_mode(engine, wal); + + gen12_ctx_gt_mocs_init(engine, wal); +} + static void __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, struct i915_wa_list *wal, @@ -685,8 +720,12 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine, wa_init_start(wal, name, engine->name); /* Applies to all engines */ - if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) - fakewa_disable_nestedbb_mode(engine, wal); + /* + * Fake workarounds are not the actual workaround but + * programming of context registers using workaround framework. + */ + if (GRAPHICS_VER(i915) >= 12) + gen12_ctx_gt_fake_wa_init(engine, wal); if (engine->class != RENDER_CLASS) goto done; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0f44fcf8d6a4..00b17bc32afb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2577,6 +2577,15 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) (REG_FIELD_PREP(CMD_CCTL_WRITE_OVERRIDE_MASK, (write) << 1) | \ REG_FIELD_PREP(CMD_CCTL_READ_OVERRIDE_MASK, (read) << 1)) +#define BLIT_CCTL(base) _MMIO((base) + 0x204) +#define BLIT_CCTL_DST_MOCS_MASK REG_GENMASK(14, 8) +#define BLIT_CCTL_SRC_MOCS_MASK REG_GENMASK(6, 0) +#define BLIT_CCTL_MASK (BLIT_CCTL_DST_MOCS_MASK | \ + BLIT_CCTL_SRC_MOCS_MASK) +#define BLIT_CCTL_MOCS(dst, src) \ + (REG_FIELD_PREP(BLIT_CCTL_DST_MOCS_MASK, (dst) << 1) | \ + REG_FIELD_PREP(BLIT_CCTL_SRC_MOCS_MASK, (src) << 1)) + #define RING_RESET_CTL(base) _MMIO((base) + 0xd0) #define RESET_CTL_CAT_ERROR REG_BIT(2) #define RESET_CTL_READY_TO_RESET REG_BIT(1) From cfbe5291a1890b688e6f3accbe2b0e1cf3c601fb Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Fri, 3 Sep 2021 14:51:52 +0530 Subject: [PATCH 38/85] drm/i915/gt: Initialize unused MOCS entries with device specific values Historically we've initialized all undefined/reserved entries in a platform's MOCS table to the contents of table entry #1 (i.e., I915_MOCS_PTE). Going forward, we can't assume that table entry #1 will always contain suitable values to use for undefined/reserved table indices. We'll allow a platform-specific table index to be selected at table initialization time in these cases. This new mechanism to select L3 WB entry will be applicable for all the Gen12+ platforms except TGL and RKL. Since TGL and RLK are already in production so their mocs settings are intact to avoid ABI break. Reviewed-by: Matt Roper Signed-off-by: Ayaz A Siddiqui Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210903092153.535736-5-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 46 ++++++++++++++++------------ 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 7ccac15d9a33..552bfd1c113b 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { unsigned int n_entries; const struct drm_i915_mocs_entry *table; u8 uc_index; + u8 unused_entries_index; }; /* Defines for the tables (XXX_MOCS_0 - XXX_MOCS_63) */ @@ -89,18 +90,25 @@ struct drm_i915_mocs_table { * * Entries not part of the following tables are undefined as far as * userspace is concerned and shouldn't be relied upon. For Gen < 12 - * they will be initialized to PTE. Gen >= 12 onwards don't have a setting for - * PTE and will be initialized to an invalid value. + * they will be initialized to PTE. Gen >= 12 don't have a setting for + * PTE and those platforms except TGL/RKL will be initialized L3 WB to + * catch accidental use of reserved and unused mocs indexes. * * The last few entries are reserved by the hardware. For ICL+ they * should be initialized according to bspec and never used, for older * platforms they should never be written to. * - * NOTE: These tables are part of bspec and defined as part of hardware + * NOTE1: These tables are part of bspec and defined as part of hardware * interface for ICL+. For older platforms, they are part of kernel * ABI. It is expected that, for specific hardware platform, existing * entries will remain constant and the table will only be updated by * adding new entries, filling unused positions. + * + * NOTE2: For GEN >= 12 except TGL and RKL, reserved and unspecified MOCS + * indices have been set to L3 WB. These reserved entries should never + * be used, they may be changed to low performant variants with better + * coherency in the future if more entries are needed. + * For TGL/RKL, all the unspecified MOCS indexes are mapped to L3 UC. */ #define GEN9_MOCS_ENTRIES \ MOCS_ENTRY(I915_MOCS_UNCACHED, \ @@ -283,17 +291,9 @@ static const struct drm_i915_mocs_entry icl_mocs_table[] = { }; static const struct drm_i915_mocs_entry dg1_mocs_table[] = { - /* Error */ - MOCS_ENTRY(0, 0, L3_0_DIRECT), /* UC */ MOCS_ENTRY(1, 0, L3_1_UC), - - /* Reserved */ - MOCS_ENTRY(2, 0, L3_0_DIRECT), - MOCS_ENTRY(3, 0, L3_0_DIRECT), - MOCS_ENTRY(4, 0, L3_0_DIRECT), - /* WB - L3 */ MOCS_ENTRY(5, 0, L3_3_WB), /* WB - L3 50% */ @@ -343,16 +343,22 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); + table->unused_entries_index = I915_MOCS_PTE; if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->uc_index = 1; table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 1; + table->unused_entries_index = 5; } else if (GRAPHICS_VER(i915) >= 12) { table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 3; + /* For TGL/RKL, Can't be changed now for ABI reasons */ + if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915)) + table->unused_entries_index = 2; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; @@ -398,16 +404,16 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, } /* - * Get control_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get control_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is non-zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u32 get_entry_control(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].control_value; - - return table->table[I915_MOCS_PTE].control_value; + return table->table[table->unused_entries_index].control_value; } #define for_each_mocs(mocs, t, i) \ @@ -422,6 +428,8 @@ static void __init_mocs_table(struct intel_uncore *uncore, unsigned int i; u32 mocs; + drm_WARN_ONCE(&uncore->i915->drm, !table->unused_entries_index, + "Unused entries index should have been defined\n"); for_each_mocs(mocs, table, i) intel_uncore_write_fw(uncore, _MMIO(addr + i * 4), mocs); } @@ -448,16 +456,16 @@ static void init_mocs_table(struct intel_engine_cs *engine, } /* - * Get l3cc_value from MOCS entry taking into account when it's not used: - * I915_MOCS_PTE's value is returned in this case. + * Get l3cc_value from MOCS entry taking into account when it's not used + * then if unused_entries_index is not zero then its value will be returned + * otherwise I915_MOCS_PTE's value is returned in this case. */ static u16 get_entry_l3cc(const struct drm_i915_mocs_table *table, unsigned int index) { if (index < table->size && table->table[index].used) return table->table[index].l3cc_value; - - return table->table[I915_MOCS_PTE].l3cc_value; + return table->table[table->unused_entries_index].l3cc_value; } static u32 l3cc_combine(u16 low, u16 high) From fb1e95bc2755dd29625c6ba7d553284112761f88 Mon Sep 17 00:00:00 2001 From: Sreedhar Telukuntla Date: Fri, 3 Sep 2021 14:51:53 +0530 Subject: [PATCH 39/85] drm/i915/gt: Initialize L3CC table in mocs init Initialize the L3CC table as part of mocs initialization to program LNCFCMOCSx registers so that the mocs settings are available for selection for subsequent memory transactions in the driver load path. We need to keep L3CC initialization in intel_mocs_init_engine() also so that in execlists submission, these registers can be rewritten during engine reset. Reviewed-by: Matt Roper Signed-off-by: Sreedhar Telukuntla Signed-off-by: Ayaz A Siddiqui Signed-off-by: Ramalingam C Link: https://patchwork.freedesktop.org/patch/msgid/20210903092153.535736-6-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 552bfd1c113b..e96afd7beb49 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -481,10 +481,9 @@ static u32 l3cc_combine(u16 low, u16 high) 0; \ i++) -static void init_l3cc_table(struct intel_engine_cs *engine, +static void init_l3cc_table(struct intel_uncore *uncore, const struct drm_i915_mocs_table *table) { - struct intel_uncore *uncore = engine->uncore; unsigned int i; u32 l3cc; @@ -509,7 +508,7 @@ void intel_mocs_init_engine(struct intel_engine_cs *engine) init_mocs_table(engine, &table); if (flags & HAS_RENDER_L3CC && engine->class == RENDER_CLASS) - init_l3cc_table(engine, &table); + init_l3cc_table(engine->uncore, &table); } static u32 global_mocs_offset(void) @@ -536,6 +535,14 @@ void intel_mocs_init(struct intel_gt *gt) flags = get_mocs_settings(gt->i915, &table); if (flags & HAS_GLOBAL_MOCS) __init_mocs_table(gt->uncore, &table, global_mocs_offset()); + + /* + * Initialize the L3CC table as part of mocs initalization to make + * sure the LNCFCMOCSx registers are programmed for the subsequent + * memory transactions including guc transactions + */ + if (flags & HAS_RENDER_L3CC) + init_l3cc_table(gt->uncore, &table); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) From 75eefd82581f32da77d7017d11a932ee12a998eb Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:47 +0200 Subject: [PATCH 40/85] drm/i915: Release i915_gem_context from a worker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only reason for this really is the i915_gem_engines->fence callback engines_notify(), which exists purely as a fairly funky reference counting scheme for that. Otherwise all other callers are from process context, and generally fairly benign locking context. Unfortunately untangling that requires some major surgery, and we have a few i915_gem_context reference counting bugs that need fixing, and they blow in the current hardirq calling context, so we need a stop-gap measure. Put a FIXME comment in when this should be removable again. v2: Fix mock_context(), noticed by intel-gfx-ci. Acked-by: Acked-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-1-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 13 +++++++++++-- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 12 ++++++++++++ drivers/gpu/drm/i915/gem/selftests/mock_context.c | 1 + 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fd169cf2f75a..051bc357ff65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -986,9 +986,10 @@ free_engines: return err; } -void i915_gem_context_release(struct kref *ref) +static void i915_gem_context_release_work(struct work_struct *work) { - struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); + struct i915_gem_context *ctx = container_of(work, typeof(*ctx), + release_work); trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); @@ -1002,6 +1003,13 @@ void i915_gem_context_release(struct kref *ref) kfree_rcu(ctx, rcu); } +void i915_gem_context_release(struct kref *ref) +{ + struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); + + queue_work(ctx->i915->wq, &ctx->release_work); +} + static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) { @@ -1303,6 +1311,7 @@ i915_gem_create_context(struct drm_i915_private *i915, ctx->sched = pc->sched; mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->link); + INIT_WORK(&ctx->release_work, i915_gem_context_release_work); spin_lock_init(&ctx->stale.lock); INIT_LIST_HEAD(&ctx->stale.engines); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 94c03a97cb77..0c38789bd4a8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -288,6 +288,18 @@ struct i915_gem_context { */ struct kref ref; + /** + * @release_work: + * + * Work item for deferred cleanup, since i915_gem_context_put() tends to + * be called from hardirq context. + * + * FIXME: The only real reason for this is &i915_gem_engines.fence, all + * other callers are from process context and need at most some mild + * shuffling to pull the i915_gem_context_put() call out of a spinlock. + */ + struct work_struct release_work; + /** * @rcu: rcu_head for deferred freeing. */ diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index fee070df1c97..067d68a6fe4c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -23,6 +23,7 @@ mock_context(struct drm_i915_private *i915, kref_init(&ctx->ref); INIT_LIST_HEAD(&ctx->link); ctx->i915 = i915; + INIT_WORK(&ctx->release_work, i915_gem_context_release_work); mutex_init(&ctx->mutex); From c238980efd3b35af70fc926066cf7440f50a97a9 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:48 +0200 Subject: [PATCH 41/85] drm/i915: Release ctx->syncobj on final put, not on ctx close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gem context refcounting is another exercise in least locking design it seems, where most things get destroyed upon context closure (which can race with anything really). Only the actual memory allocation and the locks survive while holding a reference. This tripped up Jason when reimplementing the single timeline feature in commit 00dae4d3d35d4f526929633b76e00b0ab4d3970d Author: Jason Ekstrand Date: Thu Jul 8 10:48:12 2021 -0500 drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4) We could fix the bug by holding ctx->mutex in execbuf and clear the pointer (again while holding the mutex) context_close, but it's cleaner to just make the context object actually invariant over its _entire_ lifetime. This way any other ioctl that's potentially racing, but holding a full reference, can still rely on ctx->syncobj being an immutable pointer. Which without this change, is not the case. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Fixes: 00dae4d3d35d ("drm/i915: Implement SINGLE_TIMELINE with a syncobj (v4)") Cc: Jason Ekstrand Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Brost Cc: Matthew Auld Cc: Maarten Lankhorst Cc: "Thomas Hellström" Cc: Lionel Landwerlin Cc: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-2-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 051bc357ff65..5a053cf14948 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -994,6 +994,9 @@ static void i915_gem_context_release_work(struct work_struct *work) trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + if (ctx->syncobj) + drm_syncobj_put(ctx->syncobj); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); @@ -1220,9 +1223,6 @@ static void context_close(struct i915_gem_context *ctx) if (vm) i915_vm_close(vm); - if (ctx->syncobj) - drm_syncobj_put(ctx->syncobj); - ctx->file_priv = ERR_PTR(-EBADF); /* From 8cf97637ff8891be040bac37b96dd97e5996ca93 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:49 +0200 Subject: [PATCH 42/85] drm/i915: Keep gem ctx->vm alive until the final put MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comment added in commit b81dde719439c8f09bb61e742ed95bfc4b33946b Author: Chris Wilson Date: Tue May 21 22:11:29 2019 +0100 drm/i915: Allow userspace to clone contexts on creation and moved in commit 27dbae8f36c1c25008b7885fc07c57054b7dfba3 Author: Chris Wilson Date: Wed Nov 6 09:13:12 2019 +0000 drm/i915/gem: Safely acquire the ctx->vm when copying suggested that i915_address_space were at least intended to be managed through SLAB_TYPESAFE_BY_RCU: * This ppgtt may have be reallocated between * the read and the kref, and reassigned to a third * context. In order to avoid inadvertent sharing * of this ppgtt with that third context (and not * src), we have to confirm that we have the same * ppgtt after passing through the strong memory * barrier implied by a successful * kref_get_unless_zero(). But extensive git history search has not brough any such reuse to light. What has come to light though is that ever since commit 2850748ef8763ab46958e43a4d1c445f29eeb37d Author: Chris Wilson Date: Fri Oct 4 14:39:58 2019 +0100 drm/i915: Pull i915_vma_pin under the vm->mutex (yes this commit is earlier) the final i915_vma_put call has been moved from i915_gem_context_free (now called _release) to context_close, which means it's not actually safe anymore to access the ctx->vm pointer without lock helds, because it might disappear at any moment. Note that superficially things all still work, because the i915_address_space is RCU protected since commit b32fa811156328aea5a3c2ff05cc096490382456 Author: Chris Wilson Date: Thu Jun 20 19:37:05 2019 +0100 drm/i915/gtt: Defer address space cleanup to an RCU worker except the very clever macro above (which is designed to protected against object reuse due to SLAB_TYPESAFE_BY_RCU or similar tricks) results in an endless loop if the refcount of the ctx->vm ever permanently drops to 0. Which it totally now can. Fix that by moving the final i915_vm_put to where it should be. Note that i915_gem_context is rcu protected, but _only_ the final kfree. This means anyone who chases a pointer to a gem ctx solely under the protection can pretty only call kref_get_unless_zero(). This seems to be pretty much the case, aside from a bunch of cases that consult the scheduling information without any further protection. Reviewed-by: Maarten Lankhorst Cc: Jason Ekstrand Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Matthew Brost Cc: Matthew Auld Cc: Maarten Lankhorst Cc: "Thomas Hellström" Cc: Lionel Landwerlin Cc: Dave Airlie Fixes: 2850748ef876 ("drm/i915: Pull i915_vma_pin under the vm->mutex") Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-3-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 5a053cf14948..12e2de1db1a2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -990,6 +990,7 @@ static void i915_gem_context_release_work(struct work_struct *work) { struct i915_gem_context *ctx = container_of(work, typeof(*ctx), release_work); + struct i915_address_space *vm; trace_i915_context_free(ctx); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); @@ -997,6 +998,10 @@ static void i915_gem_context_release_work(struct work_struct *work) if (ctx->syncobj) drm_syncobj_put(ctx->syncobj); + vm = i915_gem_context_vm(ctx); + if (vm) + i915_vm_put(vm); + mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); @@ -1220,8 +1225,15 @@ static void context_close(struct i915_gem_context *ctx) set_closed_name(ctx); vm = i915_gem_context_vm(ctx); - if (vm) + if (vm) { + /* i915_vm_close drops the final reference, which is a bit too + * early and could result in surprises with concurrent + * operations racing with thist ctx close. Keep a full reference + * until the end. + */ + i915_vm_get(vm); i915_vm_close(vm); + } ctx->file_priv = ERR_PTR(-EBADF); From e1068a9e808a14cd532ede325e5e16df45c63f18 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:50 +0200 Subject: [PATCH 43/85] drm/i915: Drop code to handle set-vm races from execbuf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changing the vm from a finalized gem ctx is no longer possible, which means we don't have to check for that anymore. I was pondering whether to keep the check as a WARN_ON, but things go boom real bad real fast if the vm of a vma is wrong. Plus we'd need to also get the ggtt vm for !full-ppgtt platforms. Ditching it all seemed like a better idea. Reviewed-by: Maarten Lankhorst References: ccbc1b97948a ("drm/i915/gem: Don't allow changing the VM on running contexts (v4)") Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-4-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 45bc4e4ff048..4816359c4c21 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -759,11 +759,7 @@ static int __eb_add_lut(struct i915_execbuffer *eb, /* Check that the context hasn't been closed in the meantime */ err = -EINTR; if (!mutex_lock_interruptible(&ctx->lut_mutex)) { - struct i915_address_space *vm = rcu_access_pointer(ctx->vm); - - if (unlikely(vm && vma->vm != vm)) - err = -EAGAIN; /* user racing with ctx set-vm */ - else if (likely(!i915_gem_context_is_closed(ctx))) + if (likely(!i915_gem_context_is_closed(ctx))) err = radix_tree_insert(&ctx->handles_vma, handle, vma); else err = -ENOENT; From c6d04e48d2e6d0e41c4cc4098c5494713086b597 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:51 +0200 Subject: [PATCH 44/85] drm/i915: Rename i915_gem_context_get_vm_rcu to i915_gem_context_get_eb_vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The important part isn't so much that this does an rcu lookup - that's more an implementation detail, which will also be removed. The thing that makes this different from other functions is that it's gettting you the vm that batchbuffers will run in for that gem context, which is either a full ppgtt stored in gem->ctx, or the ggtt. We'll make more use of this function later on. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-5-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.h | 2 +- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 4 ++-- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 4 ++-- drivers/gpu/drm/i915/gt/selftest_execlists.c | 2 +- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 4 ++-- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index 18060536b0c2..da6e8b506d96 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -155,7 +155,7 @@ i915_gem_context_vm(struct i915_gem_context *ctx) } static inline struct i915_address_space * -i915_gem_context_get_vm_rcu(struct i915_gem_context *ctx) +i915_gem_context_get_eb_vm(struct i915_gem_context *ctx) { struct i915_address_space *vm; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index a094f3ce1a90..6c68fe26bb32 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1456,7 +1456,7 @@ static int igt_tmpfs_fallback(void *arg) struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; struct vfsmount *gemfs = i915->mm.gemfs; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_vma *vma; u32 *vaddr; @@ -1512,7 +1512,7 @@ static int igt_shrink_thp(void *arg) { struct i915_gem_context *ctx = arg; struct drm_i915_private *i915 = ctx->i915; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(ctx); struct drm_i915_gem_object *obj; struct i915_gem_engines_iter it; struct intel_context *ce; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 4d2758718d21..fc7fb33a3a52 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -1528,7 +1528,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, intel_gt_chipset_flush(engine->gt); - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); @@ -1607,7 +1607,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, if (GRAPHICS_VER(i915) >= 8) { const u32 GPR0 = engine->mmio_base + 0x600; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) { err = PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c index f12ffe797639..b3863abc51f5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_execlists.c +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -3493,7 +3493,7 @@ static int smoke_submit(struct preempt_smoke *smoke, if (batch) { struct i915_address_space *vm; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = i915_vma_instance(batch, vm, NULL); i915_vm_put(vm); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 2c1ed32ca5ac..8be23e0f9306 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -117,7 +117,7 @@ static struct i915_request * hang_create_request(struct hang *h, struct intel_engine_cs *engine) { struct intel_gt *gt = h->gt; - struct i915_address_space *vm = i915_gem_context_get_vm_rcu(h->ctx); + struct i915_address_space *vm = i915_gem_context_get_eb_vm(h->ctx); struct drm_i915_gem_object *obj; struct i915_request *rq = NULL; struct i915_vma *hws, *vma; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f843a5040706..2d60a5a5b065 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1300,7 +1300,7 @@ static int exercise_mock(struct drm_i915_private *i915, if (!ctx) return -ENOMEM; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); err = func(vm, 0, min(vm->total, limit), end_time); i915_vm_put(vm); @@ -1848,7 +1848,7 @@ static int igt_cs_tlb(void *arg) goto out_unlock; } - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); if (i915_is_ggtt(vm)) goto out_vm; diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index dd0607254a95..79ba72da0813 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -118,7 +118,7 @@ static int create_vmas(struct drm_i915_private *i915, struct i915_vma *vma; int err; - vm = i915_gem_context_get_vm_rcu(ctx); + vm = i915_gem_context_get_eb_vm(ctx); vma = checked_vma_instance(obj, vm, NULL); i915_vm_put(vm); if (IS_ERR(vma)) From 24fad29e52e087317e91f08513b15ff7151d6d32 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:52 +0200 Subject: [PATCH 45/85] drm/i915: Use i915_gem_context_get_eb_vm in ctx_getparam MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consolidates the "which is the vm my execbuf runs in" code a bit. We do some get/put which isn't really required, but all the other users want the refcounting, and I figured doing a function just for this getparam to avoid 2 atomis is a bit much. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-6-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 12e2de1db1a2..7a566fb7cca4 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2108,6 +2108,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_gem_context_param *args = data; struct i915_gem_context *ctx; + struct i915_address_space *vm; int ret = 0; ctx = i915_gem_context_lookup(file_priv, args->ctx_id); @@ -2117,12 +2118,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, switch (args->param) { case I915_CONTEXT_PARAM_GTT_SIZE: args->size = 0; - rcu_read_lock(); - if (rcu_access_pointer(ctx->vm)) - args->value = rcu_dereference(ctx->vm)->total; - else - args->value = to_i915(dev)->ggtt.vm.total; - rcu_read_unlock(); + vm = i915_gem_context_get_eb_vm(ctx); + args->value = vm->total; + i915_vm_put(vm); + break; case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE: From a82a9979de227ac45d513ecade54fc9478a4181b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:53 +0200 Subject: [PATCH 46/85] drm/i915: Add i915_gem_context_is_full_ppgtt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And use it anywhere we have open-coded checks for ctx->vm that really only check for full ppgtt. Plus for paranoia add a GEM_BUG_ON that checks it's really only set when we have full ppgtt, just in case. gem_context->vm is different since it's NULL in ggtt mode, unlike intel_context->vm or gt->vm, which is always set. v2: 0day found a testcase that I missed. v3: Repaint shed (Jon, Tvrtko) Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-7-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gem/i915_gem_context.h | 7 +++++++ drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c | 6 +++--- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7a566fb7cca4..b8067b35d2d6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1566,7 +1566,7 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, int err; u32 id; - if (!rcu_access_pointer(ctx->vm)) + if (!i915_gem_context_has_full_ppgtt(ctx)) return -ENODEV; rcu_read_lock(); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index da6e8b506d96..c50ded3d1cd3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -154,6 +154,13 @@ i915_gem_context_vm(struct i915_gem_context *ctx) return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex)); } +static inline bool i915_gem_context_has_full_ppgtt(struct i915_gem_context *ctx) +{ + GEM_BUG_ON(!!rcu_access_pointer(ctx->vm) != HAS_FULL_PPGTT(ctx->i915)); + + return !!rcu_access_pointer(ctx->vm); +} + static inline struct i915_address_space * i915_gem_context_get_eb_vm(struct i915_gem_context *ctx) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4816359c4c21..d2308145b4f1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -733,7 +733,7 @@ static int eb_select_context(struct i915_execbuffer *eb) return PTR_ERR(ctx); eb->gem_context = ctx; - if (rcu_access_pointer(ctx->vm)) + if (i915_gem_context_has_full_ppgtt(ctx)) eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT; return 0; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index fc7fb33a3a52..1536c50144f8 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -704,7 +704,7 @@ static int igt_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!rcu_access_pointer(ctx->vm)), + yesno(i915_gem_context_has_full_ppgtt(ctx)), err); intel_context_put(ce); kernel_context_close(ctx); @@ -838,7 +838,7 @@ static int igt_shared_ctx_exec(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), engine->name, - yesno(!!rcu_access_pointer(ctx->vm)), + yesno(i915_gem_context_has_full_ppgtt(ctx)), err); intel_context_put(ce); kernel_context_close(ctx); @@ -1417,7 +1417,7 @@ static int igt_ctx_readonly(void *arg) pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", ndwords, dw, max_dwords(obj), ce->engine->name, - yesno(!!ctx_vm(ctx)), + yesno(i915_gem_context_has_full_ppgtt(ctx)), err); i915_gem_context_unlock_engines(ctx); goto out_file; From 0483a301873309a285b2eccac723601006b990d7 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:54 +0200 Subject: [PATCH 47/85] drm/i915: Use i915_gem_context_get_eb_vm in intel_context_set_gem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit ccbc1b97948ab671335e950271e39766729736c3 Author: Jason Ekstrand Date: Thu Jul 8 10:48:30 2021 -0500 drm/i915/gem: Don't allow changing the VM on running contexts (v4) the gem_ctx->vm can't change anymore. Plus we always set the intel_context->vm, so might as well use the helper we have for that. This makes it very clear that we always overwrite intel_context->vm for userspace contexts, since the default is gt->vm, which is explicitly reserved for kernel context use. It would be good to split things up a bit further and avoid any possibility for an accident where we run kernel stuff in userspace vm or the other way round. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-8-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index b8067b35d2d6..48ee96bf2643 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -791,16 +791,8 @@ static int intel_context_set_gem(struct intel_context *ce, ce->ring_size = SZ_16K; - if (rcu_access_pointer(ctx->vm)) { - struct i915_address_space *vm; - - rcu_read_lock(); - vm = context_get_vm_rcu(ctx); /* hmm */ - rcu_read_unlock(); - - i915_vm_put(ce->vm); - ce->vm = vm; - } + i915_vm_put(ce->vm); + ce->vm = i915_gem_context_get_eb_vm(ctx); if (ctx->sched.priority >= I915_PRIORITY_NORMAL && intel_engine_has_timeslices(ce->engine) && From 9ec8795e7d91bc650db03dc6f5315667555dae11 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:55 +0200 Subject: [PATCH 48/85] drm/i915: Drop __rcu from gem_context->vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's been invariant since commit ccbc1b97948ab671335e950271e39766729736c3 Author: Jason Ekstrand Date: Thu Jul 8 10:48:30 2021 -0500 drm/i915/gem: Don't allow changing the VM on running contexts (v4) this just completes the deed. I've tried to split out prep work for more careful review as much as possible, this is what's left: - get_ppgtt gets simplified since we don't need to grab a temporary reference - we can rely on the temporary reference for the gem_ctx while we inspect the vm. The new vm_id still needs a full i915_vm_open ofc. This also removes the final caller of context_get_vm_rcu - A pile of selftests can now just look at ctx->vm instead of rcu_dereference_protected( , true) or similar things. - All callers of i915_gem_context_vm also disappear. - I've changed the hugepage selftest to set scrub_64K without any locking, because when we inspect that setting we're also not taking any locks either. It works because it's a selftests that's careful (single threaded gives you nice ordering) and not a live driver where races can happen from anywhere. These can only be split up further if we have some intermediate state with a bunch more rcu_dereference_protected(ctx->vm, true), just to shut up lockdep and sparse. The conversion to __rcu happened in commit a4e7ccdac38ec8335d9e4e2656c1a041c77feae1 Author: Chris Wilson Date: Fri Oct 4 14:40:09 2019 +0100 drm/i915: Move context management under GEM Note that we're not breaking the actual bugfix in there: The real bugfix is pushing the i915_vm_relase onto a separate worker, to avoid locking inversion issues. The rcu conversion was just thrown in for entertainment value on top (no vm lookup isn't even close to anything that's a hotpath where removing the single spinlock can be measured). v2: Rebase over the change to move the i915_vm_put() into i915_gem_context_release(). v3: Trivial conflict against repainted shed. Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-9-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 55 ++----------------- drivers/gpu/drm/i915/gem/i915_gem_context.h | 8 +-- .../gpu/drm/i915/gem/i915_gem_context_types.h | 2 +- .../gpu/drm/i915/gem/selftests/huge_pages.c | 4 +- .../drm/i915/gem/selftests/i915_gem_context.c | 24 +++----- drivers/gpu/drm/i915/i915_trace.h | 2 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 2 +- 7 files changed, 22 insertions(+), 75 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 48ee96bf2643..c2ab0e22db0a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -742,44 +742,6 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, return ret; } -static struct i915_address_space * -context_get_vm_rcu(struct i915_gem_context *ctx) -{ - GEM_BUG_ON(!rcu_access_pointer(ctx->vm)); - - do { - struct i915_address_space *vm; - - /* - * We do not allow downgrading from full-ppgtt [to a shared - * global gtt], so ctx->vm cannot become NULL. - */ - vm = rcu_dereference(ctx->vm); - if (!kref_get_unless_zero(&vm->ref)) - continue; - - /* - * This ppgtt may have be reallocated between - * the read and the kref, and reassigned to a third - * context. In order to avoid inadvertent sharing - * of this ppgtt with that third context (and not - * src), we have to confirm that we have the same - * ppgtt after passing through the strong memory - * barrier implied by a successful - * kref_get_unless_zero(). - * - * Once we have acquired the current ppgtt of ctx, - * we no longer care if it is released from ctx, as - * it cannot be reallocated elsewhere. - */ - - if (vm == rcu_access_pointer(ctx->vm)) - return rcu_pointer_handoff(vm); - - i915_vm_put(vm); - } while (1); -} - static int intel_context_set_gem(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_sseu sseu) @@ -990,7 +952,7 @@ static void i915_gem_context_release_work(struct work_struct *work) if (ctx->syncobj) drm_syncobj_put(ctx->syncobj); - vm = i915_gem_context_vm(ctx); + vm = ctx->vm; if (vm) i915_vm_put(vm); @@ -1216,7 +1178,7 @@ static void context_close(struct i915_gem_context *ctx) set_closed_name(ctx); - vm = i915_gem_context_vm(ctx); + vm = ctx->vm; if (vm) { /* i915_vm_close drops the final reference, which is a bit too * early and could result in surprises with concurrent @@ -1335,7 +1297,7 @@ i915_gem_create_context(struct drm_i915_private *i915, vm = &ppgtt->vm; } if (vm) { - RCU_INIT_POINTER(ctx->vm, i915_vm_open(vm)); + ctx->vm = i915_vm_open(vm); /* i915_vm_open() takes a reference */ i915_vm_put(vm); @@ -1561,15 +1523,12 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, if (!i915_gem_context_has_full_ppgtt(ctx)) return -ENODEV; - rcu_read_lock(); - vm = context_get_vm_rcu(ctx); - rcu_read_unlock(); - if (!vm) - return -ENODEV; + vm = ctx->vm; + GEM_BUG_ON(!vm); err = xa_alloc(&file_priv->vm_xa, &id, vm, xa_limit_32b, GFP_KERNEL); if (err) - goto err_put; + return err; i915_vm_open(vm); @@ -1577,8 +1536,6 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv, args->value = id; args->size = 0; -err_put: - i915_vm_put(vm); return err; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index c50ded3d1cd3..d3279086a5e7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -156,9 +156,9 @@ i915_gem_context_vm(struct i915_gem_context *ctx) static inline bool i915_gem_context_has_full_ppgtt(struct i915_gem_context *ctx) { - GEM_BUG_ON(!!rcu_access_pointer(ctx->vm) != HAS_FULL_PPGTT(ctx->i915)); + GEM_BUG_ON(!!ctx->vm != HAS_FULL_PPGTT(ctx->i915)); - return !!rcu_access_pointer(ctx->vm); + return !!ctx->vm; } static inline struct i915_address_space * @@ -166,12 +166,10 @@ i915_gem_context_get_eb_vm(struct i915_gem_context *ctx) { struct i915_address_space *vm; - rcu_read_lock(); - vm = rcu_dereference(ctx->vm); + vm = ctx->vm; if (!vm) vm = &ctx->i915->ggtt.vm; vm = i915_vm_get(vm); - rcu_read_unlock(); return vm; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index 0c38789bd4a8..c4617e4d9fa9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -262,7 +262,7 @@ struct i915_gem_context { * In other modes, this is a NULL pointer with the expectation that * the caller uses the shared global GTT. */ - struct i915_address_space __rcu *vm; + struct i915_address_space *vm; /** * @pid: process id of creator diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 6c68fe26bb32..5d71626a1ee5 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1688,11 +1688,9 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) goto out_file; } - mutex_lock(&ctx->mutex); - vm = i915_gem_context_vm(ctx); + vm = ctx->vm; if (vm) WRITE_ONCE(vm->scrub_64K, true); - mutex_unlock(&ctx->mutex); err = i915_subtests(tests, ctx); diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index 1536c50144f8..b32f7fed2d9c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -27,12 +27,6 @@ #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) -static inline struct i915_address_space *ctx_vm(struct i915_gem_context *ctx) -{ - /* single threaded, private ctx */ - return rcu_dereference_protected(ctx->vm, true); -} - static int live_nop_switch(void *arg) { const unsigned int nctx = 1024; @@ -813,7 +807,7 @@ static int igt_shared_ctx_exec(void *arg) struct i915_gem_context *ctx; struct intel_context *ce; - ctx = kernel_context(i915, ctx_vm(parent)); + ctx = kernel_context(i915, parent->vm); if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_test; @@ -823,7 +817,7 @@ static int igt_shared_ctx_exec(void *arg) GEM_BUG_ON(IS_ERR(ce)); if (!obj) { - obj = create_test_object(ctx_vm(parent), + obj = create_test_object(parent->vm, file, &objects); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@ -1380,7 +1374,7 @@ static int igt_ctx_readonly(void *arg) goto out_file; } - vm = ctx_vm(ctx) ?: &i915->ggtt.alias->vm; + vm = ctx->vm ?: &i915->ggtt.alias->vm; if (!vm || !vm->has_read_only) { err = 0; goto out_file; @@ -1499,7 +1493,7 @@ static int write_to_scratch(struct i915_gem_context *ctx, GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); - err = check_scratch(ctx_vm(ctx), offset); + err = check_scratch(ctx->vm, offset); if (err) return err; @@ -1596,7 +1590,7 @@ static int read_from_scratch(struct i915_gem_context *ctx, GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); - err = check_scratch(ctx_vm(ctx), offset); + err = check_scratch(ctx->vm, offset); if (err) return err; @@ -1739,7 +1733,7 @@ static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) u32 *vaddr; int err = 0; - vm = ctx_vm(ctx); + vm = ctx->vm; if (!vm) return -ENODEV; @@ -1801,7 +1795,7 @@ static int igt_vm_isolation(void *arg) } /* We can only test vm isolation, if the vm are distinct */ - if (ctx_vm(ctx_a) == ctx_vm(ctx_b)) + if (ctx_a->vm == ctx_b->vm) goto out_file; /* Read the initial state of the scratch page */ @@ -1813,8 +1807,8 @@ static int igt_vm_isolation(void *arg) if (err) goto out_file; - vm_total = ctx_vm(ctx_a)->total; - GEM_BUG_ON(ctx_vm(ctx_b)->total != vm_total); + vm_total = ctx_a->vm->total; + GEM_BUG_ON(ctx_b->vm->total != vm_total); count = 0; num_engines = 0; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 806ad688274b..237e5061381b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -1246,7 +1246,7 @@ DECLARE_EVENT_CLASS(i915_context, TP_fast_assign( __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; - __entry->vm = rcu_access_pointer(ctx->vm); + __entry->vm = ctx->vm; ), TP_printk("dev=%u, ctx=%p, ctx_vm=%p", diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 79ba72da0813..1f10fe36619b 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -39,7 +39,7 @@ static bool assert_vma(struct i915_vma *vma, { bool ok = true; - if (vma->vm != rcu_access_pointer(ctx->vm)) { + if (vma->vm != ctx->vm) { pr_err("VMA created with wrong VM\n"); ok = false; } From 843151521844af6c3e22d4bef42d292c04f05fa2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:56 +0200 Subject: [PATCH 49/85] drm/i915: use xa_lock/unlock for fpriv->vm_xa lookups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need the absolute speed of rcu for this. And i915_address_space in general dont need rcu protection anywhere else, after we've made gem contexts and engines a lot more immutable. Note that this semantically reverts commit aabbe344dc3ca5f7d8263a02608ba6179e8a4499 Author: Chris Wilson Date: Fri Aug 30 19:03:25 2019 +0100 drm/i915: Use RCU for unlocked vm_idr lookup except we have the conversion from idr to xarray in between. v2: kref_get_unless_zero is no longer required (Maarten) Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-10-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0558921663d9..94175a49c151 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1888,11 +1888,11 @@ i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id) { struct i915_address_space *vm; - rcu_read_lock(); + xa_lock(&file_priv->vm_xa); vm = xa_load(&file_priv->vm_xa, id); - if (vm && !kref_get_unless_zero(&vm->ref)) - vm = NULL; - rcu_read_unlock(); + if (vm) + kref_get(&vm->ref); + xa_unlock(&file_priv->vm_xa); return vm; } From dcc5d82063d9055cecd09bec4d280c5ab62b9d8f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 2 Sep 2021 16:20:57 +0200 Subject: [PATCH 50/85] drm/i915: Stop rcu support for i915_address_space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The full audit is quite a bit of work: - i915_dpt has very simple lifetime (somehow we create a display pagetable vm per object, so its _very_ simple, there's only ever a single vma in there), and uses i915_vm_close(), which internally does a i915_vm_put(). No rcu. Aside: wtf is i915_dpt doing in the intel_display.c garbage collector as a new feature, instead of added as a separate file with some clean-ish interface. Also, i915_dpt unfortunately re-introduces some coding patterns from pre-dma_resv_lock conversion times. - i915_gem_proto_ctx is fully refcounted and no rcu, all protected by fpriv->proto_context_lock. - i915_gem_context is itself rcu protected, and that might leak to anything it points at. Before commit cf977e18610e66e48c31619e7e0cfa871be9eada Author: Chris Wilson Date: Wed Dec 2 11:21:40 2020 +0000 drm/i915/gem: Spring clean debugfs and commit db80a1294c231b6ac725085f046bb2931e00c9db Author: Chris Wilson Date: Mon Jan 18 11:08:54 2021 +0000 drm/i915/gem: Remove per-client stats from debugfs/i915_gem_objects we had a bunch of debugfs files that relied on rcu protecting everything, but those are gone now. The main one was removed even earlier with There doesn't seem to be anything left that's actually protecting stuff now that the ctx->vm itself is invariant. See commit ccbc1b97948ab671335e950271e39766729736c3 Author: Jason Ekstrand Date: Thu Jul 8 10:48:30 2021 -0500 drm/i915/gem: Don't allow changing the VM on running contexts (v4) Note that we drop the vm refcount before the final release of the gem context refcount, so this is all very dangerous even without rcu. Note that aside from later on creating new engines (a defunct feature) and debug output we're never looked at gem_ctx->vm for anything functional, hence why this is ok. Fingers crossed. Preceeding patches removed all vestiges of rcu use from gem_ctx->vm derferencing to make it clear it's really not used. The gem_ctx->rcu protection was introduced in commit a4e7ccdac38ec8335d9e4e2656c1a041c77feae1 Author: Chris Wilson Date: Fri Oct 4 14:40:09 2019 +0100 drm/i915: Move context management under GEM The commit message is somewhat entertaining because it fails to mention this fact completely, and compensates that by an in-commit changelog entry that claims that ctx->vm is protected by ctx->mutex. Which was the case _before_ this commit, but no longer after it. - intel_context holds a full reference. Unfortunately intel_context is also rcu protected and the reference to the ->vm is dropped before the rcu barrier - only the kfree is delayed. So again we need to check whether that leaks anywhere on the intel_context->vm. RCU is only used to protect intel_context sitting on the breadcrumb lists, which don't look at the vm anywhere, so we are fine. Nothing else relies on rcu protection of intel_context and hence is fully protected by the kref refcount alone, which protects intel_context->vm in turn. The breadcrumbs rcu usage was added in commit c744d50363b714783bbc88d986cc16def13710f7 Author: Chris Wilson Date: Thu Nov 26 14:04:06 2020 +0000 drm/i915/gt: Split the breadcrumb spinlock between global and contexts its parent commit added the intel_context rcu protection: commit 14d1eaf08845c534963c83f754afe0cb14cb2512 Author: Chris Wilson Date: Thu Nov 26 14:04:05 2020 +0000 drm/i915/gt: Protect context lifetime with RCU given some credence to my claim that I've actually caught them all. - drm_i915_gem_object's shares_resv_from pointer has a full refcount to the dma_resv, which is a sub-refcount that's released after the final i915_vm_put() has been called. Safe. Aside: Maybe we should have a struct dma_resv_shared which is just dma_resv + kref as a stand-alone thing. It's a pretty useful pattern which other drivers might want to copy. For a bit more context see commit 4d8151ae5329cf50781a02fd2298a909589a5bab Author: Thomas Hellström Date: Tue Jun 1 09:46:41 2021 +0200 drm/i915: Don't free shared locks while shared - the fpriv->vm_xa was relying on rcu_read_lock for lookup, but that was updated in a prep patch too to just be a spinlock-protected lookup. - intel_gt->vm is set at driver load in intel_gt_init() and released in intel_gt_driver_release(). There seems to be some issue that in some error paths this is called twice, but otherwise no rcu to be found anywhere. This was added in the below commit, which unfortunately doesn't explain why this complication exists. commit e6ba76480299a0d77c51d846f7467b1673aad25b Author: Chris Wilson Date: Sat Dec 21 16:03:24 2019 +0000 drm/i915: Remove i915->kernel_context The proper fix most likely for this is to start using drmm_ at large scale, but that's also huge amounts of work. - i915_vma->vm is some real pain, because rcu is rcu protected, at least in the vma lookup in the context lookup cache in eb_lookup_vma(). This was added in commit 4ff4b44cbb70c269259958cbcc48d7b8a2cb9ec8 Author: Chris Wilson Date: Fri Jun 16 15:05:16 2017 +0100 drm/i915: Store a direct lookup from object handle to vma This was changed to a radix tree from the hashtable in, but with the locking unchanged, in commit d1b48c1e7184d9bc4ae6d7f9fe2eed9efed11ffc Author: Chris Wilson Date: Wed Aug 16 09:52:08 2017 +0100 drm/i915: Replace execbuf vma ht with an idr In commit 93159e12353c2a47e5576d642845a91fa00530bf Author: Chris Wilson Date: Mon Mar 23 09:28:41 2020 +0000 drm/i915/gem: Avoid gem_context->mutex for simple vma lookup the locking was changed from dev->struct_mutex to rcu, which added the requirement to rcu protect i915_vma. Somehow this was missed in review (or I'm completely blind). Irrespective of all that the vma lookup cache rcu_read_lock grabs a full reference of the vma and the rcu doesn't leak further. So no impact on i915_address_space from that. I have not found any other rcu use for i915_vma, but given that it seems broken I also didn't bother to do a careful in-depth audit. Alltogether there's nothing left in-tree anymore which requires that a pointer deref to an i915_address_space is safe undre rcu_read_lock only. rcu protection of i915_address_space was introduced in commit b32fa811156328aea5a3c2ff05cc096490382456 Author: Chris Wilson Date: Thu Jun 20 19:37:05 2019 +0100 drm/i915/gtt: Defer address space cleanup to an RCU worker by mixing up a bugfixing (i915_address_space needs to be released from a worker) with enabling rcu support. The commit message also seems somewhat confused, because it talks about cleanup of WC pages requiring sleep, while the code and linked bugzilla are about a requirement to take dev->struct_mutex (which yes sleeps but it's a much more specific problem). Since final kref_put can be called from pretty much anywhere (including hardirq context through the scheduler's i915_active cleanup) we need a worker here. Hence that part must be kept. Ideally all these reclaim workers should have some kind of integration with our shrinkers, but for some of these it's rather tricky. Anyway, that's a preexisting condition in the codeebase that we wont fix in this patch here. We also remove the rcu_barrier in ggtt_cleanup_hw added in commit 60a4233a4952729089e4df152e730f8f4d0e82ce Author: Chris Wilson Date: Mon Jul 29 14:24:12 2019 +0100 drm/i915: Flush the i915_vm_release before ggtt shutdown Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter Cc: Jon Bloomfield Cc: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Cc: "Thomas Hellström" Cc: Matthew Auld Cc: Lionel Landwerlin Cc: Dave Airlie Cc: Jason Ekstrand Link: https://patchwork.freedesktop.org/patch/msgid/20210902142057.929669-11-daniel.vetter@ffwll.ch --- drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 - drivers/gpu/drm/i915/gt/intel_gtt.c | 6 +++--- drivers/gpu/drm/i915/gt/intel_gtt.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index de3ac58fceec..8d71f67926f1 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -727,7 +727,6 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) atomic_set(&ggtt->vm.open, 0); - rcu_barrier(); /* flush the RCU'ed__i915_vm_release */ flush_workqueue(ggtt->vm.i915->wq); mutex_lock(&ggtt->vm.mutex); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index e137dd32b5b8..a0c2b952aa57 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -155,7 +155,7 @@ void i915_vm_resv_release(struct kref *kref) static void __i915_vm_release(struct work_struct *work) { struct i915_address_space *vm = - container_of(work, struct i915_address_space, rcu.work); + container_of(work, struct i915_address_space, release_work); vm->cleanup(vm); i915_address_space_fini(vm); @@ -171,7 +171,7 @@ void i915_vm_release(struct kref *kref) GEM_BUG_ON(i915_is_ggtt(vm)); trace_i915_ppgtt_release(vm); - queue_rcu_work(vm->i915->wq, &vm->rcu); + queue_work(vm->i915->wq, &vm->release_work); } void i915_address_space_init(struct i915_address_space *vm, int subclass) @@ -185,7 +185,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->resv_ref)) kref_init(&vm->resv_ref); - INIT_RCU_WORK(&vm->rcu, __i915_vm_release); + INIT_WORK(&vm->release_work, __i915_vm_release); atomic_set(&vm->open, 1); /* diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index bc7153018ebd..5b539bd7645d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -213,7 +213,7 @@ struct i915_vma_ops { struct i915_address_space { struct kref ref; - struct rcu_work rcu; + struct work_struct release_work; struct drm_mm mm; struct intel_gt *gt; From f5392e5f8ef300c5d8fb97fb441aad217e44f394 Mon Sep 17 00:00:00 2001 From: ravitejax Date: Fri, 3 Sep 2021 23:50:34 +0530 Subject: [PATCH 51/85] drm/i915/adl_s: Remove require_force_probe protection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removing force probe protection from ADLS platform. Did not observe warnings, errors, flickering or any visual defects while doing ordinary tasks like browsing and editing documents in a two monitor setup. For more info drm-tip idle run results : https://intel-gfx-ci.01.org/tree/drm-tip/bat-all.html? Cc: Lucas De Marchi Signed-off-by: ravitejax Reviewed-by: Ayaz A Siddiqui Signed-off-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20210903182034.668467-1-ravitejax.gpud.talla@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index f0e2f5e67bda..4745f7aad848 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -926,7 +926,6 @@ static const struct intel_device_info adl_s_info = { GEN12_FEATURES, PLATFORM(INTEL_ALDERLAKE_S), .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D), - .require_force_probe = 1, .display.has_hti = 1, .display.has_psr_hw_tracking = 0, .platform_engine_mask = From 3f027d61663fc20622a9563ab1463fab17672289 Mon Sep 17 00:00:00 2001 From: Ayaz A Siddiqui Date: Tue, 7 Sep 2021 22:46:39 +0530 Subject: [PATCH 52/85] drm/i915/gt: Add separate MOCS table for Gen12 devices other than TGL/RKL MOCS table of TGL/RKL has MOCS[1] set to L3_UC. While for other gen12 devices we need to set MOCS[1] as L3_WB, So adding a new MOCS table for other gen 12 devices eg. ADL. Fixes: cfbe5291a189 ("drm/i915/gt: Initialize unused MOCS entries with device specific values") Cc: Matt Roper Signed-off-by: Ayaz A Siddiqui Reviewed-by: Matt Roper [mattrope: fix whitespace error] Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20210907171639.1221287-1-ayaz.siddiqui@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 40 +++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index e96afd7beb49..f47521b8e941 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -315,6 +315,34 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = { MOCS_ENTRY(63, 0, L3_1_UC), }; +static const struct drm_i915_mocs_entry gen12_mocs_table[] = { + GEN11_MOCS_ENTRIES, + /* Implicitly enable L1 - HDC:L1 + L3 + LLC */ + MOCS_ENTRY(48, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + L3 */ + MOCS_ENTRY(49, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), + /* Implicitly enable L1 - HDC:L1 + LLC */ + MOCS_ENTRY(50, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* Implicitly enable L1 - HDC:L1 */ + MOCS_ENTRY(51, + LE_1_UC | LE_TC_1_LLC, + L3_1_UC), + /* HW Special Case (CCS) */ + MOCS_ENTRY(60, + LE_3_WB | LE_TC_1_LLC | LE_LRUM(3), + L3_1_UC), + /* HW Special Case (Displayable) */ + MOCS_ENTRY(61, + LE_1_UC | LE_TC_1_LLC, + L3_3_WB), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -351,14 +379,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 1; table->unused_entries_index = 5; - } else if (GRAPHICS_VER(i915) >= 12) { + } else if (IS_TIGERLAKE(i915) || IS_ROCKETLAKE(i915)) { + /* For TGL/RKL, Can't be changed now for ABI reasons */ table->size = ARRAY_SIZE(tgl_mocs_table); table->table = tgl_mocs_table; table->n_entries = GEN9_NUM_MOCS_ENTRIES; table->uc_index = 3; - /* For TGL/RKL, Can't be changed now for ABI reasons */ - if (!IS_TIGERLAKE(i915) && !IS_ROCKETLAKE(i915)) - table->unused_entries_index = 2; + } else if (GRAPHICS_VER(i915) >= 12) { + table->size = ARRAY_SIZE(gen12_mocs_table); + table->table = gen12_mocs_table; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->uc_index = 3; + table->unused_entries_index = 2; } else if (GRAPHICS_VER(i915) == 11) { table->size = ARRAY_SIZE(icl_mocs_table); table->table = icl_mocs_table; From 502d0609fc418e674f1e8bd30aa02748e4c6b465 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 3 Sep 2021 16:53:17 +0100 Subject: [PATCH 53/85] drm/i915/gtt: add some flushing for the 64K GTT path If we need to mark the PDE as operating in 64K GTT mode, we should be paranoid and flush the extra writes, like we already do for the PTEs. On some platforms the clflush can apparently add the just the right amount of magical delay to force the GPU to see the updated entry. Signed-off-by: Matthew Auld Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20210903155317.1854012-1-matthew.auld@intel.com --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 6e0e52eeb87a..6a5af995f5b1 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -548,6 +548,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, I915_GTT_PAGE_SIZE_2M)))) { vaddr = px_vaddr(pd); vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; + clflush_cache_range(vaddr, PAGE_SIZE); page_size = I915_GTT_PAGE_SIZE_64K; /* @@ -568,6 +569,7 @@ static void gen8_ppgtt_insert_huge(struct i915_vma *vma, for (i = 1; i < index; i += 16) memset64(vaddr + i, encode, 15); + clflush_cache_range(vaddr, PAGE_SIZE); } } From f503eb0cf2badfd8a70dac5d2a48a3e83550278e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Mon, 6 Sep 2021 10:17:29 +0100 Subject: [PATCH 54/85] drm/i915/selftests: fixup igt_shrink_thp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the object might still be active here, the shrink_all will simply ignore it, which blows up in the test, since the pages will still be there. Currently THP is disabled which should result in the test being skipped, but if we ever re-enable THP we might start seeing the failure. Fix this by forcing I915_SHRINK_ACTIVE. v2: Some machine in the shard runs doesn't seem to have any available swap when running this test. Try to handle this. Signed-off-by: Matthew Auld Cc: Tvrtko Ursulin Cc: Thomas Hellström Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210906091729.2093312-1-matthew.auld@intel.com --- .../gpu/drm/i915/gem/selftests/huge_pages.c | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 5d71626a1ee5..0827634c842c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1519,6 +1519,7 @@ static int igt_shrink_thp(void *arg) struct i915_vma *vma; unsigned int flags = PIN_USER; unsigned int n; + bool should_swap; int err = 0; /* @@ -1567,23 +1568,38 @@ static int igt_shrink_thp(void *arg) break; } i915_gem_context_unlock_engines(ctx); + /* + * Nuke everything *before* we unpin the pages so we can be reasonably + * sure that when later checking get_nr_swap_pages() that some random + * leftover object doesn't steal the remaining swap space. + */ + i915_gem_shrink(NULL, i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); i915_vma_unpin(vma); if (err) goto out_put; /* - * Now that the pages are *unpinned* shrink-all should invoke - * shmem to truncate our pages. + * Now that the pages are *unpinned* shrinking should invoke + * shmem to truncate our pages, if we have available swap. */ - i915_gem_shrink_all(i915); - if (i915_gem_object_has_pages(obj)) { - pr_err("shrink-all didn't truncate the pages\n"); + should_swap = get_nr_swap_pages() > 0; + i915_gem_shrink(NULL, i915, -1UL, NULL, + I915_SHRINK_BOUND | + I915_SHRINK_UNBOUND | + I915_SHRINK_ACTIVE); + if (should_swap == i915_gem_object_has_pages(obj)) { + pr_err("unexpected pages mismatch, should_swap=%s\n", + yesno(should_swap)); err = -EINVAL; goto out_put; } - if (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys) { - pr_err("residual page-size bits left\n"); + if (should_swap == (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys)) { + pr_err("unexpected residual page-size bits, should_swap=%s\n", + yesno(should_swap)); err = -EINVAL; goto out_put; } From 058d7d62602868fa430555311fa45dfda2168349 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 2 Sep 2021 22:57:37 +0100 Subject: [PATCH 55/85] drm/i915: clean up inconsistent indenting There is a statement that is indented one character too deeply, clean this up. Signed-off-by: Colin Ian King Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20210902215737.55570-1-colin.king@canonical.com --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index de5f9c86b9a4..aeb324b701ec 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2565,7 +2565,7 @@ __execlists_context_pre_pin(struct intel_context *ce, if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) { lrc_init_state(ce, engine, *vaddr); - __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); + __i915_gem_object_flush_map(ce->state->obj, 0, engine->context_size); } return 0; From 74388ca483a416a92cee69dcbeeb793d39199371 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 9 Sep 2021 12:44:48 +0100 Subject: [PATCH 56/85] drm/i915: Use Transparent Hugepages when IOMMU is enabled Usage of Transparent Hugepages was disabled in 9987da4b5dcf ("drm/i915: Disable THP until we have a GPU read BW W/A"), but since it appears majority of performance regressions reported with an enabled IOMMU can be almost eliminated by turning them on, lets just do that. To err on the side of safety we keep the current default in cases where IOMMU is not active, and only when it is default to the "huge=within_size" mode. Although there probably would be wins to enable them throughout, more extensive testing across benchmarks and platforms would need to be done. With the patch and IOMMU enabled my local testing on a small Skylake part shows OglVSTangent regression being reduced from ~14% (IOMMU on versus IOMMU off) to ~2% (same comparison but with THP on). More detailed testing done in the below referenced Gitlab issue by Eero: Skylake GT4e: Performance drops from enabling IOMMU: 30-35% SynMark CSDof 20-25% Unigine Heaven, MemBW GPU write, SynMark VSTangent ~20% GLB Egypt (1/2 screen window) 10-15% GLB T-Rex (1/2 screen window) 8-10% GfxBench T-Rex, MemBW GPU blit 7-8% SynMark DeferredAA + TerrainFly* + ZBuffer 6-7% GfxBench Manhattan 3.0 + 3.1, SynMark TexMem128 & CSCloth 5-6% GfxBench CarChase, Unigine Valley 3-5% GfxBench Vulkan & GL AztecRuins + ALU2, MemBW GPU texture, SynMark Fill*, Deferred, TerrainPan* 1-2% Most of the other tests With the patch drops become: 20-25% SynMark TexMem* 15-20% GLB Egypt (1/2 screen window) 10-15% GLB T-Rex (1/2 screen window) 4-7% GfxBench T-Rex, GpuTest Triangle 1-8% GfxBench ALU2 (offscreen 1%, onscreen 8%) 3% GfxBench Manhattan 3.0, SynMark CSDof 2-3% Unigine Heaven + Valley, MemBW GPU texture 1-3 GfxBench Manhattan 3.1 + CarChase + Vulkan & GL AztecRuins Broxton: Performance drops from IOMMU, without patch: 30% MemBW GPU write 25% SynMark ZBuffer + Fill* 20% MemBW GPU blit 15% MemBW GPU blend, GpuTest Triangle 10-15% MemBW GPU texture 10% GLB Egypt, Unigine Heaven (had hangs), SynMark TerrainFly* 7-9% GLB T-Rex, GfxBench Manhattan 3.0 + T-Rex, SynMark Deferred* + TexMem* 6-8% GfxBench CarChase, Unigine Valley, SynMark CSCloth + ShMapVsm + TerrainPan* 5-6% GfxBench Manhattan 3.1 + GL AztecRuins, SynMark CSDof + TexFilterTri 2-4% GfxBench ALU2, SynMark DrvRes + GSCloth + ShMapPcf + Batch[0-5] + TexFilterAniso, GpuTest GiMark + 32-bit Julia And with patch: 15-20% MemBW GPU texture 10% SynMark TexMem* 8-9% GLB Egypt (1/2 screen window) 4-5% GLB T-Rex (1/2 screen window) 3-6% GfxBench Manhattan 3.0, GpuTest FurMark, SynMark Deferred + TexFilterTri 3-4% GfxBench Manhattan 3.1 + T-Rex, SynMark VSInstancing 2-4% GpuTest Triangle, SynMark DeferredAA 2-3% Unigine Heaven + Valley 1-3% SynMark Terrain* 1-2% GfxBench CarChase, SynMark TexFilterAniso + ZBuffer Tigerlake-H: 20-25% MemBW GPU texture 15-20% GpuTest Triangle 13-15% SynMark TerrainFly* + DeferredAA + HdrBloom 8-10% GfxBench Manhattan 3.1, SynMark TerrainPan* + DrvRes 6-7% GfxBench Manhattan 3.0, SynMark TexMem* 4-8% GLB onscreen Fill + T-Rex + Egypt (more in onscreen than offscreen versions of T-Rex/Egypt) 4-6% GfxBench CarChase + GLES AztecRuins + ALU2, GpuTest 32-bit Julia, SynMark CSDof + DrvState 3-5% GfxBench T-Rex + Egypt, Unigine Heaven + Valley, GpuTest Plot3D 1-7% Media tests 2-3% MemBW GPU blit 1-3% Most of the rest of 3D tests With the patch: 6-8% MemBW GPU blend => the only regression in these tests (compared to IOMMU without THP) 4-6% SynMark DrvState (not impacted) + HdrBloom (improved) 3-4% GLB T-Rex ~3% GLB Egypt, SynMark DrvRes 1-3% GfxBench T-Rex + Egypt, SynMark TexFilterTri 1-2% GfxBench CarChase + GLES AztecRuins, Unigine Valley, GpuTest Triangle ~1% GfxBench Manhattan 3.0/3.1, Unigine Heaven Perf of several tests actually improved with IOMMU + THP, compared to no IOMMU / no THP: 10-15% SynMark Batch[0-3] 5-10% MemBW GPU texture, SynMark ShMapVsm 3-4% SynMark Fill* + Geom* 2-3% SynMark TexMem512 + CSCloth 1-2% SynMark TexMem128 + DeferredAA As a summary across all platforms, these are the benchmarks where enabling THP on top of IOMMU enabled brings regressions: * Skylake GT4e: 20-25% SynMark TexMem* (whereas all MemBW GPU tests either improve or are not affected) * Broxton J4205: 7% MemBW GPU texture 2-3% SynMark TexMem* * Tigerlake-H: 7% MemBW GPU blend Other benchmarks show either lowering of regressions or improvements. v2: * Add Kconfig dependency to transparent hugepages and some help text. * Move to helper for easier handling of kernel build options. v3: * Drop Kconfig. (Daniel) v4: * Add some benchmark results to commit message. v5: * Add explicit regression summary to commit message. (Eero) References: b901bb89324a ("drm/i915/gemfs: enable THP") References: 9987da4b5dcf ("drm/i915: Disable THP until we have a GPU read BW W/A") References: https://gitlab.freedesktop.org/drm/intel/-/issues/430 Co-developed-by: Chris Wilson Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Cc: Eero Tamminen Cc: Tvrtko Ursulin Cc: Rodrigo Vivi Cc: Daniel Vetter Signed-off-by: Tvrtko Ursulin Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20210909114448.508493-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gemfs.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c index 5e6e8c91ab38..dbdbdc344d87 100644 --- a/drivers/gpu/drm/i915/gem/i915_gemfs.c +++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c @@ -6,7 +6,6 @@ #include #include -#include #include "i915_drv.h" #include "i915_gemfs.h" @@ -15,6 +14,7 @@ int i915_gemfs_init(struct drm_i915_private *i915) { struct file_system_type *type; struct vfsmount *gemfs; + char *opts; type = get_fs_type("tmpfs"); if (!type) @@ -26,10 +26,26 @@ int i915_gemfs_init(struct drm_i915_private *i915) * * One example, although it is probably better with a per-file * control, is selecting huge page allocations ("huge=within_size"). - * Currently unused due to bandwidth issues (slow reads) on Broadwell+. + * However, we only do so to offset the overhead of iommu lookups + * due to bandwidth issues (slow reads) on Broadwell+. */ - gemfs = kern_mount(type); + opts = NULL; + if (intel_vtd_active()) { + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + static char huge_opt[] = "huge=within_size"; /* r/w */ + + opts = huge_opt; + drm_info(&i915->drm, + "Transparent Hugepage mode '%s'\n", + opts); + } else { + drm_notice(&i915->drm, + "Transparent Hugepage support is recommended for optimal performance when IOMMU is enabled!\n"); + } + } + + gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, opts); if (IS_ERR(gemfs)) return PTR_ERR(gemfs); From f25e3908b9cd4a3fe819e9bdcdde58f20bacb34c Mon Sep 17 00:00:00 2001 From: Vinay Belgaumkar Date: Tue, 7 Sep 2021 16:27:04 -0700 Subject: [PATCH 57/85] drm/i915: Get PM ref before accessing HW register Seeing these errors when GT is likely in suspend state- "RPM wakelock ref not held during HW access" Ensure GT is awake before trying to access HW registers. Avoid reading the register if that is not the case. Signed-off-by: Vinay Belgaumkar Fixes: 41e5c17ebfc2 ("drm/i915/guc/slpc: Sysfs hooks for SLPC") Reviewed-by: Tvrtko Ursulin Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210907232704.12982-1-vinay.belgaumkar@intel.com --- drivers/gpu/drm/i915/gt/intel_rps.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index 3489f5f0cac1..e1a198bbd135 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -1969,8 +1969,14 @@ u32 intel_rps_read_actual_frequency(struct intel_rps *rps) u32 intel_rps_read_punit_req(struct intel_rps *rps) { struct intel_uncore *uncore = rps_to_uncore(rps); + struct intel_runtime_pm *rpm = rps_to_uncore(rps)->rpm; + intel_wakeref_t wakeref; + u32 freq = 0; - return intel_uncore_read(uncore, GEN6_RPNSWREQ); + with_intel_runtime_pm_if_in_use(rpm, wakeref) + freq = intel_uncore_read(uncore, GEN6_RPNSWREQ); + + return freq; } static u32 intel_rps_get_req(u32 pureq) From fc30a6764a54dea42291aeb7009bef7aa2fc1cd4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:22 -0700 Subject: [PATCH 58/85] drm/i915/guc: Fix blocked context accounting Prior to this patch the blocked context counter was cleared on init_sched_state (used during registering a context & resets) which is incorrect. This state needs to be persistent or the counter can read the incorrect value resulting in scheduling never getting enabled again. Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation") Signed-off-by: Matthew Brost Reviewed-by: Daniel Vetter Cc: Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-2-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 87d8dc8f51b9..69faa39da178 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -152,7 +152,7 @@ static inline void init_sched_state(struct intel_context *ce) { /* Only should be called from guc_lrc_desc_pin() */ atomic_set(&ce->guc_sched_state_no_lock, 0); - ce->guc_state.sched_state = 0; + ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; } static inline bool From 669b949c1a44d0cb2bcd18ff6ab4fd0c21e7cf6f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:23 -0700 Subject: [PATCH 59/85] drm/i915/guc: Fix outstanding G2H accounting A small race that could result in incorrect accounting of the number of outstanding G2H. Basically prior to this patch we did not increment the number of outstanding G2H if we encoutered a GT reset while sending a H2G. This was incorrect as the context state had already been updated to anticipate a G2H response thus the counter should be incremented. As part of this change we remove a legacy (now unused) path that was the last caller requiring a G2H response that was not guaranteed to loop. This allows us to simplify the accounting as we don't need to handle the case where the send fails due to the channel being busy. Also always use helper when decrementing this value. v2 (Daniele): update GEM_BUG_ON check, pull in dead code removal from later patch, remove loop param from context_deregister. Fixes: f4eb1f3fe946 ("drm/i915/guc: Ensure G2H response has space in buffer") Signed-off-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Cc: Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-3-matthew.brost@intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 79 +++++++++---------- 1 file changed, 37 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 69faa39da178..aff5dd247a88 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -352,20 +352,29 @@ static inline void set_lrc_desc_registered(struct intel_guc *guc, u32 id, xa_unlock_irqrestore(&guc->context_lookup, flags); } +static void decr_outstanding_submission_g2h(struct intel_guc *guc) +{ + if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) + wake_up_all(&guc->ct.wq); +} + static int guc_submission_send_busy_loop(struct intel_guc *guc, const u32 *action, u32 len, u32 g2h_len_dw, bool loop) { - int err; + /* + * We always loop when a send requires a reply (i.e. g2h_len_dw > 0), + * so we don't handle the case where we don't get a reply because we + * aborted the send due to the channel being busy. + */ + GEM_BUG_ON(g2h_len_dw && !loop); - err = intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); - - if (!err && g2h_len_dw) + if (g2h_len_dw) atomic_inc(&guc->outstanding_submission_g2h); - return err; + return intel_guc_send_busy_loop(guc, action, len, g2h_len_dw, loop); } int intel_guc_wait_for_pending_msg(struct intel_guc *guc, @@ -616,7 +625,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) init_sched_state(ce); if (pending_enable || destroyed || deregister) { - atomic_dec(&guc->outstanding_submission_g2h); + decr_outstanding_submission_g2h(guc); if (deregister) guc_signal_context_fence(ce); if (destroyed) { @@ -635,7 +644,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) intel_engine_signal_breadcrumbs(ce->engine); } intel_context_sched_disable_unpin(ce); - atomic_dec(&guc->outstanding_submission_g2h); + decr_outstanding_submission_g2h(guc); spin_lock_irqsave(&ce->guc_state.lock, flags); guc_blocked_fence_complete(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -1233,8 +1242,7 @@ static int register_context(struct intel_context *ce, bool loop) } static int __guc_action_deregister_context(struct intel_guc *guc, - u32 guc_id, - bool loop) + u32 guc_id) { u32 action[] = { INTEL_GUC_ACTION_DEREGISTER_CONTEXT, @@ -1243,16 +1251,16 @@ static int __guc_action_deregister_context(struct intel_guc *guc, return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), G2H_LEN_DW_DEREGISTER_CONTEXT, - loop); + true); } -static int deregister_context(struct intel_context *ce, u32 guc_id, bool loop) +static int deregister_context(struct intel_context *ce, u32 guc_id) { struct intel_guc *guc = ce_to_guc(ce); trace_intel_context_deregister(ce); - return __guc_action_deregister_context(guc, guc_id, loop); + return __guc_action_deregister_context(guc, guc_id); } static intel_engine_mask_t adjust_engine_mask(u8 class, intel_engine_mask_t mask) @@ -1340,26 +1348,23 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) * registering this context. */ if (context_registered) { + bool disabled; + unsigned long flags; + trace_intel_context_steal_guc_id(ce); - if (!loop) { + GEM_BUG_ON(!loop); + + /* Seal race with Reset */ + spin_lock_irqsave(&ce->guc_state.lock, flags); + disabled = submission_disabled(guc); + if (likely(!disabled)) { set_context_wait_for_deregister_to_register(ce); intel_context_get(ce); - } else { - bool disabled; - unsigned long flags; - - /* Seal race with Reset */ - spin_lock_irqsave(&ce->guc_state.lock, flags); - disabled = submission_disabled(guc); - if (likely(!disabled)) { - set_context_wait_for_deregister_to_register(ce); - intel_context_get(ce); - } - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - if (unlikely(disabled)) { - reset_lrc_desc(guc, desc_idx); - return 0; /* Will get registered later */ - } + } + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(disabled)) { + reset_lrc_desc(guc, desc_idx); + return 0; /* Will get registered later */ } /* @@ -1367,13 +1372,9 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) * context whose guc_id was stolen. */ with_intel_runtime_pm(runtime_pm, wakeref) - ret = deregister_context(ce, ce->guc_id, loop); - if (unlikely(ret == -EBUSY)) { - clr_context_wait_for_deregister_to_register(ce); - intel_context_put(ce); - } else if (unlikely(ret == -ENODEV)) { + ret = deregister_context(ce, ce->guc_id); + if (unlikely(ret == -ENODEV)) ret = 0; /* Will get registered later */ - } } else { with_intel_runtime_pm(runtime_pm, wakeref) ret = register_context(ce, loop); @@ -1730,7 +1731,7 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) GEM_BUG_ON(context_enabled(ce)); clr_context_registered(ce); - deregister_context(ce, ce->guc_id, true); + deregister_context(ce, ce->guc_id); } static void __guc_context_destroy(struct intel_context *ce) @@ -2583,12 +2584,6 @@ g2h_context_lookup(struct intel_guc *guc, u32 desc_idx) return ce; } -static void decr_outstanding_submission_g2h(struct intel_guc *guc) -{ - if (atomic_dec_and_test(&guc->outstanding_submission_g2h)) - wake_up_all(&guc->ct.wq); -} - int intel_guc_deregister_done_process_msg(struct intel_guc *guc, const u32 *msg, u32 len) From c39f51cc980dd918c5b3da61d54c4725785e766e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:24 -0700 Subject: [PATCH 60/85] drm/i915/guc: Unwind context requests in reverse order When unwinding requests on a reset context, if other requests in the context are in the priority list the requests could be resubmitted out of seqno order. Traverse the list of active requests in reverse and append to the head of the priority list to fix this. Fixes: eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Cc: Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-4-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index aff5dd247a88..0c1e6b465fba 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -806,9 +806,9 @@ __unwind_incomplete_requests(struct intel_context *ce) spin_lock_irqsave(&sched_engine->lock, flags); spin_lock(&ce->guc_active.lock); - list_for_each_entry_safe(rq, rn, - &ce->guc_active.requests, - sched.link) { + list_for_each_entry_safe_reverse(rq, rn, + &ce->guc_active.requests, + sched.link) { if (i915_request_completed(rq)) continue; @@ -825,7 +825,7 @@ __unwind_incomplete_requests(struct intel_context *ce) } GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine)); - list_add_tail(&rq->sched.link, pl); + list_add(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); spin_lock(&ce->guc_active.lock); From 88209a8ecb8b8752322908a3c3362a001bdc3a39 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:25 -0700 Subject: [PATCH 61/85] drm/i915/guc: Don't drop ce->guc_active.lock when unwinding context Don't drop ce->guc_active.lock when unwinding a context after reset. At one point we had to drop this because of a lock inversion but that is no longer the case. It is much safer to hold the lock so let's do that. Fixes: eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface") Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Cc: Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-5-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 0c1e6b465fba..31bbfe5479ae 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -813,8 +813,6 @@ __unwind_incomplete_requests(struct intel_context *ce) continue; list_del_init(&rq->sched.link); - spin_unlock(&ce->guc_active.lock); - __i915_request_unsubmit(rq); /* Push the request back into the queue for later resubmission. */ @@ -827,8 +825,6 @@ __unwind_incomplete_requests(struct intel_context *ce) list_add(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - - spin_lock(&ce->guc_active.lock); } spin_unlock(&ce->guc_active.lock); spin_unlock_irqrestore(&sched_engine->lock, flags); From d67e3d5a5da8ddcad7fcfac6a2a521128e4304af Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:26 -0700 Subject: [PATCH 62/85] drm/i915/guc: Process all G2H message at once in work queue Rather than processing 1 G2H at a time and re-queuing the work queue if more messages exist, process all the G2H in a single pass of the work queue. Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Cc: Daniel Vetter Cc: Michal Wajdeczko Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-6-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c index 22b4733b55e2..20c710a74498 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ct.c @@ -1042,9 +1042,9 @@ static void ct_incoming_request_worker_func(struct work_struct *w) container_of(w, struct intel_guc_ct, requests.worker); bool done; - done = ct_process_incoming_requests(ct); - if (!done) - queue_work(system_unbound_wq, &ct->requests.worker); + do { + done = ct_process_incoming_requests(ct); + } while (!done); } static int ct_handle_event(struct intel_guc_ct *ct, struct ct_incoming_msg *request) From 1ca36cff0166b0483fe3b99e711e9c800ebbfaa4 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:27 -0700 Subject: [PATCH 63/85] drm/i915/guc: Workaround reset G2H is received after schedule done G2H If the context is reset as a result of the request cancellation the context reset G2H is received after schedule disable done G2H which is the wrong order. The schedule disable done G2H release the waiting request cancellation code which resubmits the context. This races with the context reset G2H which also wants to resubmit the context but in this case it really should be a NOP as request cancellation code owns the resubmit. Use some clever tricks of checking the context state to seal this race until the GuC firmware is fixed. v2: (Checkpatch) - Fix typos v3: (Daniele) - State that is a bug in the GuC firmware Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation") Signed-off-by: Matthew Brost Cc: Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-7-matthew.brost@intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 41 ++++++++++++++++--- 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 31bbfe5479ae..f9e3725b94c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -833,17 +833,33 @@ __unwind_incomplete_requests(struct intel_context *ce) static void __guc_reset_context(struct intel_context *ce, bool stalled) { struct i915_request *rq; + unsigned long flags; u32 head; + bool skip = false; intel_context_get(ce); /* - * GuC will implicitly mark the context as non-schedulable - * when it sends the reset notification. Make sure our state - * reflects this change. The context will be marked enabled - * on resubmission. + * GuC will implicitly mark the context as non-schedulable when it sends + * the reset notification. Make sure our state reflects this change. The + * context will be marked enabled on resubmission. + * + * XXX: If the context is reset as a result of the request cancellation + * this G2H is received after the schedule disable complete G2H which is + * wrong as this creates a race between the request cancellation code + * re-submitting the context and this G2H handler. This is a bug in the + * GuC but can be worked around in the meantime but converting this to a + * NOP if a pending enable is in flight as this indicates that a request + * cancellation has occurred. */ - clr_context_enabled(ce); + spin_lock_irqsave(&ce->guc_state.lock, flags); + if (likely(!context_pending_enable(ce))) + clr_context_enabled(ce); + else + skip = true; + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (unlikely(skip)) + goto out_put; rq = intel_context_find_active_request(ce); if (!rq) { @@ -862,6 +878,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) out_replay: guc_reset_state(ce, head, stalled); __unwind_incomplete_requests(ce); +out_put: intel_context_put(ce); } @@ -1598,6 +1615,13 @@ static void guc_context_cancel_request(struct intel_context *ce, guc_reset_state(ce, intel_ring_wrap(ce->ring, rq->head), true); } + + /* + * XXX: Racey if context is reset, see comment in + * __guc_reset_context(). + */ + flush_work(&ce_to_guc(ce)->ct.requests.worker); + guc_context_unblock(ce); } } @@ -2712,7 +2736,12 @@ static void guc_handle_context_reset(struct intel_guc *guc, { trace_intel_context_reset(ce); - if (likely(!intel_context_is_banned(ce))) { + /* + * XXX: Racey if request cancellation has occurred, see comment in + * __guc_reset_context(). + */ + if (likely(!intel_context_is_banned(ce) && + !context_blocked(ce))) { capture_error_state(guc, ce); guc_context_replay(ce); } From ac653dd7996edf1770959e11a078312928bd7315 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:28 -0700 Subject: [PATCH 64/85] Revert "drm/i915/gt: Propagate change in error status to children on unhold" Propagating errors to dependent fences is broken and can lead to errors from one client ending up in another. In commit 3761baae908a ("Revert "drm/i915: Propagate errors on awaiting already signaled fences""), we attempted to get rid of fence error propagation but missed the case added in commit 8e9f84cf5cac ("drm/i915/gt: Propagate change in error status to children on unhold"). Revert that one too. This error was found by an up-and-coming selftest which triggers a reset during request cancellation and verifies that subsequent requests complete successfully. v2: (Daniel Vetter) - Use revert v3: (Jason) - Update commit message v4 (Daniele): - fix checkpatch error in commit message. References: '3761baae908a ("Revert "drm/i915: Propagate errors on awaiting already signaled fences"")' Signed-off-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Reviewed-by: Daniel Vetter Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-8-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_execlists_submission.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index aeb324b701ec..87c595e4efe2 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -2140,10 +2140,6 @@ static void __execlists_unhold(struct i915_request *rq) if (p->flags & I915_DEPENDENCY_WEAK) continue; - /* Propagate any change in error status */ - if (rq->fence.error) - i915_request_set_error_once(w, rq->fence.error); - if (w->engine != rq->engine) continue; From cf37e5c820f16972bd806e06632eb83e7a152d60 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:29 -0700 Subject: [PATCH 65/85] drm/i915/guc: Kick tasklet after queuing a request Kick tasklet after queuing a request so it submitted in a timely manner. Fixes: 3a4cdf1982f0 ("drm/i915/guc: Implement GuC context operations for new inteface") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-9-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f9e3725b94c1..bd401a5be87c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1049,6 +1049,7 @@ static inline void queue_request(struct i915_sched_engine *sched_engine, list_add_tail(&rq->sched.link, i915_sched_lookup_priolist(sched_engine, prio)); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + tasklet_hi_schedule(&sched_engine->tasklet); } static int guc_bypass_tasklet_submit(struct intel_guc *guc, From 9888beaaf118b6878347e1fe2b369fc66d756d18 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:30 -0700 Subject: [PATCH 66/85] drm/i915/guc: Don't enable scheduling on a banned context, guc_id invalid, not registered When unblocking a context, do not enable scheduling if the context is banned, guc_id invalid, or not registered. v2: (Daniele) - Add helper for unblock Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Cc: Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-10-matthew.brost@intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index bd401a5be87c..f5bee833ee7d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -148,6 +148,7 @@ static inline void clr_context_registered(struct intel_context *ce) #define SCHED_STATE_BLOCKED_SHIFT 4 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) + static inline void init_sched_state(struct intel_context *ce) { /* Only should be called from guc_lrc_desc_pin() */ @@ -1563,6 +1564,23 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) return &ce->guc_blocked; } +#define SCHED_STATE_MULTI_BLOCKED_MASK \ + (SCHED_STATE_BLOCKED_MASK & ~SCHED_STATE_BLOCKED) +#define SCHED_STATE_NO_UNBLOCK \ + (SCHED_STATE_MULTI_BLOCKED_MASK | \ + SCHED_STATE_PENDING_DISABLE | \ + SCHED_STATE_BANNED) + +static bool context_cant_unblock(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + + return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || + context_guc_id_invalid(ce) || + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id) || + !intel_context_is_pinned(ce); +} + static void guc_context_unblock(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); @@ -1577,9 +1595,7 @@ static void guc_context_unblock(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); if (unlikely(submission_disabled(guc) || - !intel_context_is_pinned(ce) || - context_pending_disable(ce) || - context_blocked(ce) > 1)) { + context_cant_unblock(ce))) { enable = false; } else { enable = true; From d135865cb8e396c0cc1d7d52dbb980fde39da641 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:31 -0700 Subject: [PATCH 67/85] drm/i915/guc: Copy whole golden context, set engine state size of subset When the GuC does a media reset, it copies a golden context state back into the corrupted context's state. The address of the golden context and the size of the engine state restore are passed in via the GuC ADS. The i915 had a bug where it passed in the whole size of the golden context, not the size of the engine state to restore resulting in a memory corruption. Also copy the entire golden context on init rather than just the engine state that is restored. v2 (Daniele): use defines to avoid duplicated const variables (John). Fixes: 481d458caede ("drm/i915/guc: Add golden context to GuC ADS") Signed-off-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-11-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 26 ++++++++++++++-------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 6926919bcac6..2c6ea64af7ec 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -349,6 +349,8 @@ static void fill_engine_enable_masks(struct intel_gt *gt, info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); } +#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) +#define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE) static int guc_prep_golden_context(struct intel_guc *guc, struct __guc_ads_blob *blob) { @@ -396,7 +398,18 @@ static int guc_prep_golden_context(struct intel_guc *guc, if (!blob) continue; - blob->ads.eng_state_size[guc_class] = real_size; + /* + * This interface is slightly confusing. We need to pass the + * base address of the full golden context and the size of just + * the engine state, which is the section of the context image + * that starts after the execlists context. This is required to + * allow the GuC to restore just the engine state when a + * watchdog reset occurs. + * We calculate the engine state size by removing the size of + * what comes before it in the context image (which is identical + * on all engines). + */ + blob->ads.eng_state_size[guc_class] = real_size - LRC_SKIP_SIZE; blob->ads.golden_context_lrca[guc_class] = addr_ggtt; addr_ggtt += alloc_size; } @@ -436,11 +449,6 @@ static void guc_init_golden_context(struct intel_guc *guc) u8 engine_class, guc_class; u8 *ptr; - /* Skip execlist and PPGTT registers + HWSP */ - const u32 lr_hw_context_size = 80 * sizeof(u32); - const u32 skip_size = LRC_PPHWSP_SZ * PAGE_SIZE + - lr_hw_context_size; - if (!intel_uc_uses_guc_submission(>->uc)) return; @@ -476,12 +484,12 @@ static void guc_init_golden_context(struct intel_guc *guc) continue; } - GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != real_size); + GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != + real_size - LRC_SKIP_SIZE); GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt); addr_ggtt += alloc_size; - shmem_read(engine->default_state, skip_size, ptr + skip_size, - real_size - skip_size); + shmem_read(engine->default_state, 0, ptr, real_size); ptr += alloc_size; } From d2420c2ed8f1bae5f36f681aad73b3d4c5a57d39 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:32 -0700 Subject: [PATCH 68/85] drm/i915/selftests: Add initial GuC selftest for scrubbing lost G2H While debugging an issue with full GT resets I went down a rabbit hole thinking the scrubbing of lost G2H wasn't working correctly. This proved to be incorrect as this was working just fine but this chase inspired me to write a selftest to prove that this works. This simple selftest injects errors dropping various G2H and then issues a full GT reset proving that the scrubbing of these G2H doesn't blow up. v2: (Daniel Vetter) - Use ifdef instead of macros for selftests v3: (Checkpatch) - A space after 'switch' statement v4: (Daniele) - A comment saying GT won't idle if G2H are lost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-12-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 18 +++ .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 25 ++++ drivers/gpu/drm/i915/gt/uc/selftest_guc.c | 127 ++++++++++++++++++ .../drm/i915/selftests/i915_live_selftests.h | 1 + .../i915/selftests/intel_scheduler_helpers.c | 12 ++ .../i915/selftests/intel_scheduler_helpers.h | 2 + 6 files changed, 185 insertions(+) create mode 100644 drivers/gpu/drm/i915/gt/uc/selftest_guc.c diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index e54351a170e2..3a73f3117873 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -198,6 +198,24 @@ struct intel_context { */ u8 guc_prio; u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; + +#ifdef CONFIG_DRM_I915_SELFTEST + /** + * @drop_schedule_enable: Force drop of schedule enable G2H for selftest + */ + bool drop_schedule_enable; + + /** + * @drop_schedule_disable: Force drop of schedule disable G2H for + * selftest + */ + bool drop_schedule_disable; + + /** + * @drop_deregister: Force drop of deregister G2H for selftest + */ + bool drop_deregister; +#endif }; #endif /* __INTEL_CONTEXT_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index f5bee833ee7d..5970c54a2e72 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2639,6 +2639,13 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc, trace_intel_context_deregister_done(ce); +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_deregister)) { + ce->drop_deregister = false; + return 0; + } +#endif + if (context_wait_for_deregister_to_register(ce)) { struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; @@ -2693,10 +2700,24 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, trace_intel_context_sched_done(ce); if (context_pending_enable(ce)) { +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_enable)) { + ce->drop_schedule_enable = false; + return 0; + } +#endif + clr_context_pending_enable(ce); } else if (context_pending_disable(ce)) { bool banned; +#ifdef CONFIG_DRM_I915_SELFTEST + if (unlikely(ce->drop_schedule_disable)) { + ce->drop_schedule_disable = false; + return 0; + } +#endif + /* * Unpin must be done before __guc_signal_context_fence, * otherwise a race exists between the requests getting @@ -3073,3 +3094,7 @@ bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve) return false; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_guc.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/uc/selftest_guc.c b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c new file mode 100644 index 000000000000..fb0e4a7bd8ca --- /dev/null +++ b/drivers/gpu/drm/i915/gt/uc/selftest_guc.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright �� 2021 Intel Corporation + */ + +#include "selftests/intel_scheduler_helpers.h" + +static struct i915_request *nop_user_request(struct intel_context *ce, + struct i915_request *from) +{ + struct i915_request *rq; + int ret; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + if (from) { + ret = i915_sw_fence_await_dma_fence(&rq->submit, + &from->fence, 0, + I915_FENCE_GFP); + if (ret < 0) { + i915_request_put(rq); + return ERR_PTR(ret); + } + } + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static int intel_guc_scrub_ctbs(void *arg) +{ + struct intel_gt *gt = arg; + int ret = 0; + int i; + struct i915_request *last[3] = {NULL, NULL, NULL}, *rq; + intel_wakeref_t wakeref; + struct intel_engine_cs *engine; + struct intel_context *ce; + + wakeref = intel_runtime_pm_get(gt->uncore->rpm); + engine = intel_selftest_find_any_engine(gt); + + /* Submit requests and inject errors forcing G2H to be dropped */ + for (i = 0; i < 3; ++i) { + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + pr_err("Failed to create context, %d: %d\n", i, ret); + goto err; + } + + switch (i) { + case 0: + ce->drop_schedule_enable = true; + break; + case 1: + ce->drop_schedule_disable = true; + break; + case 2: + ce->drop_deregister = true; + break; + } + + rq = nop_user_request(ce, NULL); + intel_context_put(ce); + + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + pr_err("Failed to create request, %d: %d\n", i, ret); + goto err; + } + + last[i] = rq; + } + + for (i = 0; i < 3; ++i) { + ret = i915_request_wait(last[i], 0, HZ); + if (ret < 0) { + pr_err("Last request failed to complete: %d\n", ret); + goto err; + } + i915_request_put(last[i]); + last[i] = NULL; + } + + /* Force all H2G / G2H to be submitted / processed */ + intel_gt_retire_requests(gt); + msleep(500); + + /* Scrub missing G2H */ + intel_gt_handle_error(engine->gt, -1, 0, "selftest reset"); + + /* GT will not idle if G2H are lost */ + ret = intel_gt_wait_for_idle(gt, HZ); + if (ret < 0) { + pr_err("GT failed to idle: %d\n", ret); + goto err; + } + +err: + for (i = 0; i < 3; ++i) + if (last[i]) + i915_request_put(last[i]); + intel_runtime_pm_put(gt->uncore->rpm, wakeref); + + return ret; +} + +int intel_guc_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_guc_scrub_ctbs), + }; + struct intel_gt *gt = &i915->gt; + + if (intel_gt_is_wedged(gt)) + return 0; + + if (!intel_uc_uses_guc_submission(>->uc)) + return 0; + + return intel_gt_live_subtests(tests, gt); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index cfa5c4165a4f..3cf6758931f9 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -47,5 +47,6 @@ selftest(execlists, intel_execlists_live_selftests) selftest(ring_submission, intel_ring_submission_live_selftests) selftest(perf, i915_perf_live_selftests) selftest(slpc, intel_slpc_live_selftests) +selftest(guc, intel_guc_live_selftests) /* Here be dragons: keep last to run last! */ selftest(late_gt_pm, intel_gt_pm_late_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c index 4b328346b48a..310fb83c527e 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.c @@ -14,6 +14,18 @@ #define REDUCED_PREEMPT 10 #define WAIT_FOR_RESET_TIME 10000 +struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, gt, id) + return engine; + + pr_err("No valid engine found!\n"); + return NULL; +} + int intel_selftest_modify_policy(struct intel_engine_cs *engine, struct intel_selftest_saved_policy *saved, u32 modify_type) diff --git a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h index 35c098601ac0..ae60bb507f45 100644 --- a/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h +++ b/drivers/gpu/drm/i915/selftests/intel_scheduler_helpers.h @@ -10,6 +10,7 @@ struct i915_request; struct intel_engine_cs; +struct intel_gt; struct intel_selftest_saved_policy { u32 flags; @@ -23,6 +24,7 @@ enum selftest_scheduler_modify { SELFTEST_SCHEDULER_MODIFY_FAST_RESET, }; +struct intel_engine_cs *intel_selftest_find_any_engine(struct intel_gt *gt); int intel_selftest_modify_policy(struct intel_engine_cs *engine, struct intel_selftest_saved_policy *saved, enum selftest_scheduler_modify modify_type); From 422cda4f50091bdfa114c7d19fce31919c920fe1 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:33 -0700 Subject: [PATCH 69/85] drm/i915/guc: Take context ref when cancelling request A context can get destroyed after cancelling a request, if a context or GT reset occurs, so take a reference to context when cancelling a request. Fixes: 62eaf0ae217d ("drm/i915/guc: Support request cancellation") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-13-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 5970c54a2e72..e036a171ff17 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1624,8 +1624,10 @@ static void guc_context_cancel_request(struct intel_context *ce, struct i915_request *rq) { if (i915_sw_fence_signaled(&rq->submit)) { - struct i915_sw_fence *fence = guc_context_block(ce); + struct i915_sw_fence *fence; + intel_context_get(ce); + fence = guc_context_block(ce); i915_sw_fence_wait(fence); if (!i915_request_completed(rq)) { __i915_request_skip(rq); @@ -1640,6 +1642,7 @@ static void guc_context_cancel_request(struct intel_context *ce, flush_work(&ce_to_guc(ce)->ct.requests.worker); guc_context_unblock(ce); + intel_context_put(ce); } } From f16d5cb981a557c1a32bc43ef28b5dc254f7239c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:34 -0700 Subject: [PATCH 70/85] drm/i915/guc: Don't touch guc_state.sched_state without a lock Before we did some clever tricks to not use the a lock when touching guc_state.sched_state in certain cases. Don't do that, enforce the use of the lock. v2: (kernel test robo ) - Add __maybe_unused to sched_state_is_init() v3: rebase after the unused code path removal has been moved to an earlier patch. Signed-off-by: Matthew Brost Reported-by: kernel test robot Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-14-matthew.brost@intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e036a171ff17..ca73128d7b4d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -151,11 +151,23 @@ static inline void clr_context_registered(struct intel_context *ce) static inline void init_sched_state(struct intel_context *ce) { - /* Only should be called from guc_lrc_desc_pin() */ + lockdep_assert_held(&ce->guc_state.lock); atomic_set(&ce->guc_sched_state_no_lock, 0); ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; } +__maybe_unused +static bool sched_state_is_init(struct intel_context *ce) +{ + /* + * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after + * suspend. + */ + return !(atomic_read(&ce->guc_sched_state_no_lock) & + ~SCHED_STATE_NO_LOCK_REGISTERED) && + !(ce->guc_state.sched_state &= ~SCHED_STATE_BLOCKED_MASK); +} + static inline bool context_wait_for_deregister_to_register(struct intel_context *ce) { @@ -166,7 +178,7 @@ context_wait_for_deregister_to_register(struct intel_context *ce) static inline void set_context_wait_for_deregister_to_register(struct intel_context *ce) { - /* Only should be called from guc_lrc_desc_pin() without lock */ + lockdep_assert_held(&ce->guc_state.lock); ce->guc_state.sched_state |= SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER; } @@ -607,9 +619,7 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) bool pending_disable, pending_enable, deregister, destroyed, banned; xa_for_each(&guc->context_lookup, index, ce) { - /* Flush context */ spin_lock_irqsave(&ce->guc_state.lock, flags); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); /* * Once we are at this point submission_disabled() is guaranteed @@ -625,6 +635,8 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) banned = context_banned(ce); init_sched_state(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + if (pending_enable || destroyed || deregister) { decr_outstanding_submission_g2h(guc); if (deregister) @@ -1324,6 +1336,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) int ret = 0; GEM_BUG_ON(!engine->mask); + GEM_BUG_ON(!sched_state_is_init(ce)); /* * Ensure LRC + CT vmas are is same region as write barrier is done @@ -1352,7 +1365,6 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) desc->priority = ce->guc_prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); - init_sched_state(ce); /* * The context_lookup xarray is used to determine if the hardware From ae36b62927f1cfe81095641d6279cbf23fb64b2a Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:35 -0700 Subject: [PATCH 71/85] drm/i915/guc: Reset LRC descriptor if register returns -ENODEV Reset LRC descriptor if a context register returns -ENODEV as this means we are mid-reset. Fixes: eb5e7da736f3 ("drm/i915/guc: Reset implementation for new GuC interface") Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-15-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index ca73128d7b4d..dcd7a09f8559 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1405,10 +1405,12 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) } else { with_intel_runtime_pm(runtime_pm, wakeref) ret = register_context(ce, loop); - if (unlikely(ret == -EBUSY)) + if (unlikely(ret == -EBUSY)) { + reset_lrc_desc(guc, desc_idx); + } else if (unlikely(ret == -ENODEV)) { reset_lrc_desc(guc, desc_idx); - else if (unlikely(ret == -ENODEV)) ret = 0; /* Will get registered later */ + } } return ret; From b0d83888a32b30cb95bee7385151ac58d51a2340 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:36 -0700 Subject: [PATCH 72/85] drm/i915/guc: Release submit fence from an irq_work A subsequent patch will flip the locking hierarchy from ce->guc_state.lock -> sched_engine->lock to sched_engine->lock -> ce->guc_state.lock. As such we need to release the submit fence for a request from an IRQ to break a lock inversion - i.e. the fence must be release went holding ce->guc_state.lock and the releasing of the can acquire sched_engine->lock. v2: (Daniele) - Delete request from list before calling irq_work_queue Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-16-matthew.brost@intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 22 ++++++++++++++++--- drivers/gpu/drm/i915/i915_request.h | 5 +++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index dcd7a09f8559..4b7ccf9730a3 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -2049,17 +2049,32 @@ static const struct intel_context_ops guc_context_ops = { .create_virtual = guc_create_virtual, }; +static void submit_work_cb(struct irq_work *wrk) +{ + struct i915_request *rq = container_of(wrk, typeof(*rq), submit_work); + + might_lock(&rq->engine->sched_engine->lock); + i915_sw_fence_complete(&rq->submit); +} + static void __guc_signal_context_fence(struct intel_context *ce) { - struct i915_request *rq; + struct i915_request *rq, *rn; lockdep_assert_held(&ce->guc_state.lock); if (!list_empty(&ce->guc_state.fences)) trace_intel_context_fence_release(ce); - list_for_each_entry(rq, &ce->guc_state.fences, guc_fence_link) - i915_sw_fence_complete(&rq->submit); + /* + * Use an IRQ to ensure locking order of sched_engine->lock -> + * ce->guc_state.lock is preserved. + */ + list_for_each_entry_safe(rq, rn, &ce->guc_state.fences, + guc_fence_link) { + list_del(&rq->guc_fence_link); + irq_work_queue(&rq->submit_work); + } INIT_LIST_HEAD(&ce->guc_state.fences); } @@ -2169,6 +2184,7 @@ out: spin_lock_irqsave(&ce->guc_state.lock, flags); if (context_wait_for_deregister_to_register(ce) || context_pending_disable(ce)) { + init_irq_work(&rq->submit_work, submit_work_cb); i915_sw_fence_await(&rq->submit); list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 1bc1349ba3c2..d818cfbfc41d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -218,6 +218,11 @@ struct i915_request { }; struct llist_head execute_cb; struct i915_sw_fence semaphore; + /** + * @submit_work: complete submit fence from an IRQ if needed for + * locking hierarchy reasons. + */ + struct irq_work submit_work; /* * A list of everyone we wait upon, and everyone who waits upon us. From 52d66c06fd9412e9738330b0502b4b89bf079405 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:37 -0700 Subject: [PATCH 73/85] drm/i915/guc: Move guc_blocked fence to struct guc_state Move guc_blocked fence to struct guc_state as the lock which protects the fence lives there. s/ce->guc_blocked/ce->guc_state.blocked/g v2: (Daniele) - s/blocked_fence/blocked/g Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-17-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 5 +++-- drivers/gpu/drm/i915/gt/intel_context_types.h | 5 ++--- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 18 +++++++++--------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 745e84c72c90..3048267ddc7e 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -405,8 +405,9 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) * Initialize fence to be complete as this is expected to be complete * unless there is a pending schedule disable outstanding. */ - i915_sw_fence_init(&ce->guc_blocked, sw_fence_dummy_notify); - i915_sw_fence_commit(&ce->guc_blocked); + i915_sw_fence_init(&ce->guc_state.blocked, + sw_fence_dummy_notify); + i915_sw_fence_commit(&ce->guc_state.blocked); i915_active_init(&ce->active, __intel_context_active, __intel_context_retire, 0); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 3a73f3117873..5aecb9038b5b 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -167,6 +167,8 @@ struct intel_context { * fence related to GuC submission */ struct list_head fences; + /* GuC context blocked fence */ + struct i915_sw_fence blocked; } guc_state; struct { @@ -190,9 +192,6 @@ struct intel_context { */ struct list_head guc_id_link; - /* GuC context blocked fence */ - struct i915_sw_fence guc_blocked; - /* * GuC priority management */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 4b7ccf9730a3..99f223114331 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1502,24 +1502,24 @@ static void guc_blocked_fence_complete(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); - if (!i915_sw_fence_done(&ce->guc_blocked)) - i915_sw_fence_complete(&ce->guc_blocked); + if (!i915_sw_fence_done(&ce->guc_state.blocked)) + i915_sw_fence_complete(&ce->guc_state.blocked); } static void guc_blocked_fence_reinit(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); - GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_blocked)); + GEM_BUG_ON(!i915_sw_fence_done(&ce->guc_state.blocked)); /* * This fence is always complete unless a pending schedule disable is * outstanding. We arm the fence here and complete it when we receive * the pending schedule disable complete message. */ - i915_sw_fence_fini(&ce->guc_blocked); - i915_sw_fence_reinit(&ce->guc_blocked); - i915_sw_fence_await(&ce->guc_blocked); - i915_sw_fence_commit(&ce->guc_blocked); + i915_sw_fence_fini(&ce->guc_state.blocked); + i915_sw_fence_reinit(&ce->guc_state.blocked); + i915_sw_fence_await(&ce->guc_state.blocked); + i915_sw_fence_commit(&ce->guc_state.blocked); } static u16 prep_context_pending_disable(struct intel_context *ce) @@ -1559,7 +1559,7 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) if (enabled) clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); - return &ce->guc_blocked; + return &ce->guc_state.blocked; } /* @@ -1575,7 +1575,7 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) with_intel_runtime_pm(runtime_pm, wakeref) __guc_context_sched_disable(guc, ce, guc_id); - return &ce->guc_blocked; + return &ce->guc_state.blocked; } #define SCHED_STATE_MULTI_BLOCKED_MASK \ From 0f7976506de615abfcc54e2469417c69ff2b030f Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:38 -0700 Subject: [PATCH 74/85] drm/i915/guc: Rework and simplify locking Rework and simplify the locking with GuC subission. Drop sched_state_no_lock and move all fields under the guc_state.sched_state and protect all these fields with guc_state.lock . This requires changing the locking hierarchy from guc_state.lock -> sched_engine.lock to sched_engine.lock -> guc_state.lock. v2: (Daniele) - Don't check fields outside of lock during sched disable, check less fields within lock as some of the outside are no longer needed Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-18-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 5 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 199 ++++++++---------- drivers/gpu/drm/i915/i915_trace.h | 6 +- 3 files changed, 90 insertions(+), 120 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 5aecb9038b5b..d2f798ef678c 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -161,7 +161,7 @@ struct intel_context { * sched_state: scheduling state of this context using GuC * submission */ - u16 sched_state; + u32 sched_state; /* * fences: maintains of list of requests that have a submit * fence related to GuC submission @@ -178,9 +178,6 @@ struct intel_context { struct list_head requests; } guc_active; - /* GuC scheduling state flags that do not require a lock. */ - atomic_t guc_sched_state_no_lock; - /* GuC LRC descriptor ID */ u16 guc_id; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 99f223114331..383dc1017004 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -72,87 +72,24 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count); #define GUC_REQUEST_SIZE 64 /* bytes */ -/* - * Below is a set of functions which control the GuC scheduling state which do - * not require a lock as all state transitions are mutually exclusive. i.e. It - * is not possible for the context pinning code and submission, for the same - * context, to be executing simultaneously. We still need an atomic as it is - * possible for some of the bits to changing at the same time though. - */ -#define SCHED_STATE_NO_LOCK_ENABLED BIT(0) -#define SCHED_STATE_NO_LOCK_PENDING_ENABLE BIT(1) -#define SCHED_STATE_NO_LOCK_REGISTERED BIT(2) -static inline bool context_enabled(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_ENABLED); -} - -static inline void set_context_enabled(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_enabled(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED, - &ce->guc_sched_state_no_lock); -} - -static inline bool context_pending_enable(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_PENDING_ENABLE); -} - -static inline void set_context_pending_enable(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_PENDING_ENABLE, - &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_pending_enable(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_PENDING_ENABLE, - &ce->guc_sched_state_no_lock); -} - -static inline bool context_registered(struct intel_context *ce) -{ - return (atomic_read(&ce->guc_sched_state_no_lock) & - SCHED_STATE_NO_LOCK_REGISTERED); -} - -static inline void set_context_registered(struct intel_context *ce) -{ - atomic_or(SCHED_STATE_NO_LOCK_REGISTERED, - &ce->guc_sched_state_no_lock); -} - -static inline void clr_context_registered(struct intel_context *ce) -{ - atomic_and((u32)~SCHED_STATE_NO_LOCK_REGISTERED, - &ce->guc_sched_state_no_lock); -} - /* * Below is a set of functions which control the GuC scheduling state which - * require a lock, aside from the special case where the functions are called - * from guc_lrc_desc_pin(). In that case it isn't possible for any other code - * path to be executing on the context. + * require a lock. */ #define SCHED_STATE_WAIT_FOR_DEREGISTER_TO_REGISTER BIT(0) #define SCHED_STATE_DESTROYED BIT(1) #define SCHED_STATE_PENDING_DISABLE BIT(2) #define SCHED_STATE_BANNED BIT(3) -#define SCHED_STATE_BLOCKED_SHIFT 4 +#define SCHED_STATE_ENABLED BIT(4) +#define SCHED_STATE_PENDING_ENABLE BIT(5) +#define SCHED_STATE_REGISTERED BIT(6) +#define SCHED_STATE_BLOCKED_SHIFT 7 #define SCHED_STATE_BLOCKED BIT(SCHED_STATE_BLOCKED_SHIFT) #define SCHED_STATE_BLOCKED_MASK (0xfff << SCHED_STATE_BLOCKED_SHIFT) static inline void init_sched_state(struct intel_context *ce) { lockdep_assert_held(&ce->guc_state.lock); - atomic_set(&ce->guc_sched_state_no_lock, 0); ce->guc_state.sched_state &= SCHED_STATE_BLOCKED_MASK; } @@ -163,9 +100,8 @@ static bool sched_state_is_init(struct intel_context *ce) * XXX: Kernel contexts can have SCHED_STATE_NO_LOCK_REGISTERED after * suspend. */ - return !(atomic_read(&ce->guc_sched_state_no_lock) & - ~SCHED_STATE_NO_LOCK_REGISTERED) && - !(ce->guc_state.sched_state &= ~SCHED_STATE_BLOCKED_MASK); + return !(ce->guc_state.sched_state &= + ~(SCHED_STATE_BLOCKED_MASK | SCHED_STATE_REGISTERED)); } static inline bool @@ -238,6 +174,57 @@ static inline void clr_context_banned(struct intel_context *ce) ce->guc_state.sched_state &= ~SCHED_STATE_BANNED; } +static inline bool context_enabled(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_ENABLED; +} + +static inline void set_context_enabled(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_ENABLED; +} + +static inline void clr_context_enabled(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_ENABLED; +} + +static inline bool context_pending_enable(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_PENDING_ENABLE; +} + +static inline void set_context_pending_enable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_PENDING_ENABLE; +} + +static inline void clr_context_pending_enable(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_PENDING_ENABLE; +} + +static inline bool context_registered(struct intel_context *ce) +{ + return ce->guc_state.sched_state & SCHED_STATE_REGISTERED; +} + +static inline void set_context_registered(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state |= SCHED_STATE_REGISTERED; +} + +static inline void clr_context_registered(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ce->guc_state.sched_state &= ~SCHED_STATE_REGISTERED; +} + static inline u32 context_blocked(struct intel_context *ce) { return (ce->guc_state.sched_state & SCHED_STATE_BLOCKED_MASK) >> @@ -246,7 +233,6 @@ static inline u32 context_blocked(struct intel_context *ce) static inline void incr_context_blocked(struct intel_context *ce) { - lockdep_assert_held(&ce->engine->sched_engine->lock); lockdep_assert_held(&ce->guc_state.lock); ce->guc_state.sched_state += SCHED_STATE_BLOCKED; @@ -256,7 +242,6 @@ static inline void incr_context_blocked(struct intel_context *ce) static inline void decr_context_blocked(struct intel_context *ce) { - lockdep_assert_held(&ce->engine->sched_engine->lock); lockdep_assert_held(&ce->guc_state.lock); GEM_BUG_ON(!context_blocked(ce)); /* Underflow check */ @@ -452,6 +437,8 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) u32 g2h_len_dw = 0; bool enabled; + lockdep_assert_held(&rq->engine->sched_engine->lock); + /* * Corner case where requests were sitting in the priority list or a * request resubmitted after the context was banned. @@ -459,7 +446,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (unlikely(intel_context_is_banned(ce))) { i915_request_put(i915_request_mark_eio(rq)); intel_engine_signal_breadcrumbs(ce->engine); - goto out; + return 0; } GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); @@ -472,9 +459,11 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) { err = guc_lrc_desc_pin(ce, false); if (unlikely(err)) - goto out; + return err; } + spin_lock(&ce->guc_state.lock); + /* * The request / context will be run on the hardware when scheduling * gets enabled in the unblock. @@ -509,6 +498,7 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) trace_i915_request_guc_submit(rq); out: + spin_unlock(&ce->guc_state.lock); return err; } @@ -747,6 +737,7 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) wait_for_reset(guc, &guc->outstanding_submission_g2h); } while (!list_empty(&guc->ct.requests.incoming)); } + scrub_guc_desc_for_outstanding_g2h(guc); } @@ -1147,7 +1138,11 @@ static int steal_guc_id(struct intel_guc *guc) list_del_init(&ce->guc_id_link); guc_id = ce->guc_id; + + spin_lock(&ce->guc_state.lock); clr_context_registered(ce); + spin_unlock(&ce->guc_state.lock); + set_context_guc_id_invalid(ce); return guc_id; } else { @@ -1183,6 +1178,8 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) try_again: spin_lock_irqsave(&guc->contexts_lock, flags); + might_lock(&ce->guc_state.lock); + if (context_guc_id_invalid(ce)) { ret = assign_guc_id(guc, &ce->guc_id); if (ret) @@ -1262,8 +1259,13 @@ static int register_context(struct intel_context *ce, bool loop) trace_intel_context_register(ce); ret = __guc_action_register_context(guc, ce->guc_id, offset, loop); - if (likely(!ret)) + if (likely(!ret)) { + unsigned long flags; + + spin_lock_irqsave(&ce->guc_state.lock, flags); set_context_registered(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); + } return ret; } @@ -1537,7 +1539,6 @@ static u16 prep_context_pending_disable(struct intel_context *ce) static struct i915_sw_fence *guc_context_block(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - struct i915_sched_engine *sched_engine = ce->engine->sched_engine; unsigned long flags; struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; intel_wakeref_t wakeref; @@ -1546,13 +1547,7 @@ static struct i915_sw_fence *guc_context_block(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); - /* - * Sync with submission path, increment before below changes to context - * state. - */ - spin_lock(&sched_engine->lock); incr_context_blocked(ce); - spin_unlock(&sched_engine->lock); enabled = context_enabled(ce); if (unlikely(!enabled || submission_disabled(guc))) { @@ -1598,7 +1593,6 @@ static bool context_cant_unblock(struct intel_context *ce) static void guc_context_unblock(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - struct i915_sched_engine *sched_engine = ce->engine->sched_engine; unsigned long flags; struct intel_runtime_pm *runtime_pm = ce->engine->uncore->rpm; intel_wakeref_t wakeref; @@ -1618,13 +1612,7 @@ static void guc_context_unblock(struct intel_context *ce) intel_context_get(ce); } - /* - * Sync with submission path, decrement after above changes to context - * state. - */ - spin_lock(&sched_engine->lock); decr_context_blocked(ce); - spin_unlock(&sched_engine->lock); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -1730,16 +1718,6 @@ static void guc_context_sched_disable(struct intel_context *ce) struct intel_runtime_pm *runtime_pm = &ce->engine->gt->i915->runtime_pm; intel_wakeref_t wakeref; u16 guc_id; - bool enabled; - - if (submission_disabled(guc) || context_guc_id_invalid(ce) || - !lrc_desc_registered(guc, ce->guc_id)) { - clr_context_enabled(ce); - goto unpin; - } - - if (!context_enabled(ce)) - goto unpin; spin_lock_irqsave(&ce->guc_state.lock, flags); @@ -1753,10 +1731,8 @@ static void guc_context_sched_disable(struct intel_context *ce) * sleep) ensures another process doesn't pin this context and generate * a request before we set the 'context_pending_disable' flag here. */ - enabled = context_enabled(ce); - if (unlikely(!enabled || submission_disabled(guc))) { - if (enabled) - clr_context_enabled(ce); + if (unlikely(!context_enabled(ce) || submission_disabled(guc))) { + clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); goto unpin; } @@ -1784,7 +1760,6 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); GEM_BUG_ON(context_enabled(ce)); - clr_context_registered(ce); deregister_context(ce, ce->guc_id); } @@ -1857,8 +1832,10 @@ static void guc_context_destroy(struct kref *kref) /* Seal race with Reset */ spin_lock_irqsave(&ce->guc_state.lock, flags); disabled = submission_disabled(guc); - if (likely(!disabled)) + if (likely(!disabled)) { set_context_destroyed(ce); + clr_context_registered(ce); + } spin_unlock_irqrestore(&ce->guc_state.lock, flags); if (unlikely(disabled)) { release_guc_id(guc, ce); @@ -2724,8 +2701,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, (!context_pending_enable(ce) && !context_pending_disable(ce)))) { drm_err(&guc_to_gt(guc)->i915->drm, - "Bad context sched_state 0x%x, 0x%x, desc_idx %u", - atomic_read(&ce->guc_sched_state_no_lock), + "Bad context sched_state 0x%x, desc_idx %u", ce->guc_state.sched_state, desc_idx); return -EPROTO; } @@ -2740,7 +2716,9 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc, } #endif + spin_lock_irqsave(&ce->guc_state.lock, flags); clr_context_pending_enable(ce); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); } else if (context_pending_disable(ce)) { bool banned; @@ -3014,9 +2992,8 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", atomic_read(&ce->guc_id_ref)); - drm_printf(p, "\t\tSchedule State: 0x%x, 0x%x\n\n", - ce->guc_state.sched_state, - atomic_read(&ce->guc_sched_state_no_lock)); + drm_printf(p, "\t\tSchedule State: 0x%x\n\n", + ce->guc_state.sched_state); guc_log_context_priority(p, ce); } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 237e5061381b..510168c1d703 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -903,7 +903,6 @@ DECLARE_EVENT_CLASS(intel_context, __field(u32, guc_id) __field(int, pin_count) __field(u32, sched_state) - __field(u32, guc_sched_state_no_lock) __field(u8, guc_prio) ), @@ -911,15 +910,12 @@ DECLARE_EVENT_CLASS(intel_context, __entry->guc_id = ce->guc_id; __entry->pin_count = atomic_read(&ce->pin_count); __entry->sched_state = ce->guc_state.sched_state; - __entry->guc_sched_state_no_lock = - atomic_read(&ce->guc_sched_state_no_lock); __entry->guc_prio = ce->guc_prio; ), - TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x,0x%x, guc_prio=%u", + TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x, guc_prio=%u", __entry->guc_id, __entry->pin_count, __entry->sched_state, - __entry->guc_sched_state_no_lock, __entry->guc_prio) ); From 1424ba81a2d056008adebab21bf633c420235e3c Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:39 -0700 Subject: [PATCH 75/85] drm/i915/guc: Proper xarray usage for contexts_lookup Lock the xarray and take ref to the context if needed. v2: (Checkpatch) - Add new line after declaration (Daniel Vetter) - Correct put / get accounting in xa_for_loops v3: (Checkpatch) - Extra new line Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-19-matthew.brost@intel.com --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 102 +++++++++++++++--- 1 file changed, 87 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 383dc1017004..4814acf1c2ac 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -608,8 +608,18 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) unsigned long index, flags; bool pending_disable, pending_enable, deregister, destroyed, banned; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - spin_lock_irqsave(&ce->guc_state.lock, flags); + /* + * Corner case where the ref count on the object is zero but and + * deregister G2H was lost. In this case we don't touch the ref + * count and finish the destroy of the context. + */ + bool do_put = kref_get_unless_zero(&ce->ref); + + xa_unlock(&guc->context_lookup); + + spin_lock(&ce->guc_state.lock); /* * Once we are at this point submission_disabled() is guaranteed @@ -625,7 +635,9 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) banned = context_banned(ce); init_sched_state(ce); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); + spin_unlock(&ce->guc_state.lock); + + GEM_BUG_ON(!do_put && !destroyed); if (pending_enable || destroyed || deregister) { decr_outstanding_submission_g2h(guc); @@ -648,13 +660,19 @@ static void scrub_guc_desc_for_outstanding_g2h(struct intel_guc *guc) } intel_context_sched_disable_unpin(ce); decr_outstanding_submission_g2h(guc); - spin_lock_irqsave(&ce->guc_state.lock, flags); + + spin_lock(&ce->guc_state.lock); guc_blocked_fence_complete(ce); - spin_unlock_irqrestore(&ce->guc_state.lock, flags); + spin_unlock(&ce->guc_state.lock); intel_context_put(ce); } + + if (do_put) + intel_context_put(ce); + xa_lock(&guc->context_lookup); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } static inline bool @@ -890,16 +908,29 @@ void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) { struct intel_context *ce; unsigned long index; + unsigned long flags; if (unlikely(!guc_submission_initialized(guc))) { /* Reset called during driver load? GuC not yet initialised! */ return; } - xa_for_each(&guc->context_lookup, index, ce) + xa_lock_irqsave(&guc->context_lookup, flags); + xa_for_each(&guc->context_lookup, index, ce) { + if (!kref_get_unless_zero(&ce->ref)) + continue; + + xa_unlock(&guc->context_lookup); + if (intel_context_is_pinned(ce)) __guc_reset_context(ce, stalled); + intel_context_put(ce); + + xa_lock(&guc->context_lookup); + } + xa_unlock_irqrestore(&guc->context_lookup, flags); + /* GuC is blown away, drop all references to contexts */ xa_destroy(&guc->context_lookup); } @@ -974,11 +1005,24 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) { struct intel_context *ce; unsigned long index; + unsigned long flags; + + xa_lock_irqsave(&guc->context_lookup, flags); + xa_for_each(&guc->context_lookup, index, ce) { + if (!kref_get_unless_zero(&ce->ref)) + continue; + + xa_unlock(&guc->context_lookup); - xa_for_each(&guc->context_lookup, index, ce) if (intel_context_is_pinned(ce)) guc_cancel_context_requests(ce); + intel_context_put(ce); + + xa_lock(&guc->context_lookup); + } + xa_unlock_irqrestore(&guc->context_lookup, flags); + guc_cancel_sched_engine_requests(guc->sched_engine); /* GuC is blown away, drop all references to contexts */ @@ -2866,21 +2910,28 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine) struct intel_context *ce; struct i915_request *rq; unsigned long index; + unsigned long flags; /* Reset called during driver load? GuC not yet initialised! */ if (unlikely(!guc_submission_initialized(guc))) return; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - if (!intel_context_is_pinned(ce)) + if (!kref_get_unless_zero(&ce->ref)) continue; + xa_unlock(&guc->context_lookup); + + if (!intel_context_is_pinned(ce)) + goto next; + if (intel_engine_is_virtual(ce->engine)) { if (!(ce->engine->mask & engine->mask)) - continue; + goto next; } else { if (ce->engine != engine) - continue; + goto next; } list_for_each_entry(rq, &ce->guc_active.requests, sched.link) { @@ -2890,9 +2941,16 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine) intel_engine_set_hung_context(engine, ce); /* Can only cope with one hang at a time... */ - return; + intel_context_put(ce); + xa_lock(&guc->context_lookup); + goto done; } +next: + intel_context_put(ce); + xa_lock(&guc->context_lookup); } +done: + xa_unlock_irqrestore(&guc->context_lookup, flags); } void intel_guc_dump_active_requests(struct intel_engine_cs *engine, @@ -2908,23 +2966,34 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine, if (unlikely(!guc_submission_initialized(guc))) return; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - if (!intel_context_is_pinned(ce)) + if (!kref_get_unless_zero(&ce->ref)) continue; + xa_unlock(&guc->context_lookup); + + if (!intel_context_is_pinned(ce)) + goto next; + if (intel_engine_is_virtual(ce->engine)) { if (!(ce->engine->mask & engine->mask)) - continue; + goto next; } else { if (ce->engine != engine) - continue; + goto next; } - spin_lock_irqsave(&ce->guc_active.lock, flags); + spin_lock(&ce->guc_active.lock); intel_engine_dump_active_requests(&ce->guc_active.requests, hung_rq, m); - spin_unlock_irqrestore(&ce->guc_active.lock, flags); + spin_unlock(&ce->guc_active.lock); + +next: + intel_context_put(ce); + xa_lock(&guc->context_lookup); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } void intel_guc_submission_print_info(struct intel_guc *guc, @@ -2978,7 +3047,9 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, { struct intel_context *ce; unsigned long index; + unsigned long flags; + xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); @@ -2997,6 +3068,7 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, guc_log_context_priority(p, ce); } + xa_unlock_irqrestore(&guc->context_lookup, flags); } static struct intel_context * From 5b116c17e6babc6de2e26714bc66228c74038b71 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:40 -0700 Subject: [PATCH 76/85] drm/i915/guc: Drop pin count check trick between sched_disable and re-pin Drop pin count check trick between a sched_disable and re-pin, now rely on the lock and counter of the number of committed requests to determine if scheduling should be disabled on the context. Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-20-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 2 + .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 53 +++++++++++-------- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index d2f798ef678c..3a5d98e908f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -169,6 +169,8 @@ struct intel_context { struct list_head fences; /* GuC context blocked fence */ struct i915_sw_fence blocked; + /* GuC committed requests */ + int number_committed_requests; } guc_state; struct { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 4814acf1c2ac..31f911ae14fa 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -249,6 +249,25 @@ static inline void decr_context_blocked(struct intel_context *ce) ce->guc_state.sched_state -= SCHED_STATE_BLOCKED; } +static inline bool context_has_committed_requests(struct intel_context *ce) +{ + return !!ce->guc_state.number_committed_requests; +} + +static inline void incr_context_committed_requests(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + ++ce->guc_state.number_committed_requests; + GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); +} + +static inline void decr_context_committed_requests(struct intel_context *ce) +{ + lockdep_assert_held(&ce->guc_state.lock); + --ce->guc_state.number_committed_requests; + GEM_BUG_ON(ce->guc_state.number_committed_requests < 0); +} + static inline bool context_guc_id_invalid(struct intel_context *ce) { return ce->guc_id == GUC_INVALID_LRC_ID; @@ -1766,24 +1785,18 @@ static void guc_context_sched_disable(struct intel_context *ce) spin_lock_irqsave(&ce->guc_state.lock, flags); /* - * We have to check if the context has been disabled by another thread. - * We also have to check if the context has been pinned again as another - * pin operation is allowed to pass this function. Checking the pin - * count, within ce->guc_state.lock, synchronizes this function with - * guc_request_alloc ensuring a request doesn't slip through the - * 'context_pending_disable' fence. Checking within the spin lock (can't - * sleep) ensures another process doesn't pin this context and generate - * a request before we set the 'context_pending_disable' flag here. + * We have to check if the context has been disabled by another thread, + * check if submssion has been disabled to seal a race with reset and + * finally check if any more requests have been committed to the + * context ensursing that a request doesn't slip through the + * 'context_pending_disable' fence. */ - if (unlikely(!context_enabled(ce) || submission_disabled(guc))) { + if (unlikely(!context_enabled(ce) || submission_disabled(guc) || + context_has_committed_requests(ce))) { clr_context_enabled(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); goto unpin; } - if (unlikely(atomic_add_unless(&ce->pin_count, -2, 2))) { - spin_unlock_irqrestore(&ce->guc_state.lock, flags); - return; - } guc_id = prep_context_pending_disable(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); @@ -1813,6 +1826,7 @@ static void __guc_context_destroy(struct intel_context *ce) ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] || ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + GEM_BUG_ON(ce->guc_state.number_committed_requests); lrc_fini(ce); intel_context_fini(ce); @@ -2043,6 +2057,10 @@ static void remove_from_context(struct i915_request *rq) spin_unlock_irq(&ce->guc_active.lock); + spin_lock_irq(&ce->guc_state.lock); + decr_context_committed_requests(ce); + spin_unlock_irq(&ce->guc_state.lock); + atomic_dec(&ce->guc_id_ref); i915_request_notify_execute_cb_imm(rq); } @@ -2193,15 +2211,7 @@ out: * schedule enable or context registration if either G2H is pending * respectfully. Once a G2H returns, the fence is released that is * blocking these requests (see guc_signal_context_fence). - * - * We can safely check the below fields outside of the lock as it isn't - * possible for these fields to transition from being clear to set but - * converse is possible, hence the need for the check within the lock. */ - if (likely(!context_wait_for_deregister_to_register(ce) && - !context_pending_disable(ce))) - return 0; - spin_lock_irqsave(&ce->guc_state.lock, flags); if (context_wait_for_deregister_to_register(ce) || context_pending_disable(ce)) { @@ -2210,6 +2220,7 @@ out: list_add_tail(&rq->guc_fence_link, &ce->guc_state.fences); } + incr_context_committed_requests(ce); spin_unlock_irqrestore(&ce->guc_state.lock, flags); return 0; From 9798b1724ba43f19deb44d2aa729af0e1cf4cd0d Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:41 -0700 Subject: [PATCH 77/85] drm/i915/guc: Move GuC priority fields in context under guc_active Move GuC management fields in context under guc_active struct as this is where the lock that protects theses fields lives. Also only set guc_prio field once during context init. v2: (Daniele) - set CONTEXT_SET_INIT Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-21-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context_types.h | 12 ++-- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 69 +++++++++++-------- drivers/gpu/drm/i915/i915_trace.h | 2 +- 3 files changed, 46 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 3a5d98e908f4..b56960a781da 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -112,6 +112,7 @@ struct intel_context { #define CONTEXT_FORCE_SINGLE_SUBMISSION 7 #define CONTEXT_NOPREEMPT 8 #define CONTEXT_LRCA_DIRTY 9 +#define CONTEXT_GUC_INIT 10 struct { u64 timeout_us; @@ -178,6 +179,11 @@ struct intel_context { spinlock_t lock; /** requests: active requests on this context */ struct list_head requests; + /* + * GuC priority management + */ + u8 prio; + u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; } guc_active; /* GuC LRC descriptor ID */ @@ -191,12 +197,6 @@ struct intel_context { */ struct list_head guc_id_link; - /* - * GuC priority management - */ - u8 guc_prio; - u32 guc_prio_count[GUC_CLIENT_PRIORITY_NUM]; - #ifdef CONFIG_DRM_I915_SELFTEST /** * @drop_schedule_enable: Force drop of schedule enable G2H for selftest diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 31f911ae14fa..7e9ee7d5aaab 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1385,8 +1385,6 @@ static void guc_context_policy_init(struct intel_engine_cs *engine, desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000; } -static inline u8 map_i915_prio_to_guc_prio(int prio); - static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) { struct intel_engine_cs *engine = ce->engine; @@ -1394,8 +1392,6 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) struct intel_guc *guc = &engine->gt->uc.guc; u32 desc_idx = ce->guc_id; struct guc_lrc_desc *desc; - const struct i915_gem_context *ctx; - int prio = I915_CONTEXT_DEFAULT_PRIORITY; bool context_registered; intel_wakeref_t wakeref; int ret = 0; @@ -1412,12 +1408,6 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) context_registered = lrc_desc_registered(guc, desc_idx); - rcu_read_lock(); - ctx = rcu_dereference(ce->gem_context); - if (ctx) - prio = ctx->sched.priority; - rcu_read_unlock(); - reset_lrc_desc(guc, desc_idx); set_lrc_desc_registered(guc, desc_idx, ce); @@ -1426,8 +1416,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) desc->engine_submit_mask = adjust_engine_mask(engine->class, engine->mask); desc->hw_context_desc = ce->lrc.lrca; - ce->guc_prio = map_i915_prio_to_guc_prio(prio); - desc->priority = ce->guc_prio; + desc->priority = ce->guc_active.prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); @@ -1822,10 +1811,10 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) static void __guc_context_destroy(struct intel_context *ce) { - GEM_BUG_ON(ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_HIGH] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || - ce->guc_prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + GEM_BUG_ON(ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || + ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_HIGH] || + ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || + ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); GEM_BUG_ON(ce->guc_state.number_committed_requests); lrc_fini(ce); @@ -1935,14 +1924,17 @@ static void guc_context_set_prio(struct intel_guc *guc, GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || prio > GUC_CLIENT_PRIORITY_NORMAL); + lockdep_assert_held(&ce->guc_active.lock); - if (ce->guc_prio == prio || submission_disabled(guc) || - !context_registered(ce)) + if (ce->guc_active.prio == prio || submission_disabled(guc) || + !context_registered(ce)) { + ce->guc_active.prio = prio; return; + } guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); - ce->guc_prio = prio; + ce->guc_active.prio = prio; trace_intel_context_set_prio(ce); } @@ -1962,24 +1954,24 @@ static inline void add_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_active.prio_count)); - ++ce->guc_prio_count[guc_prio]; + ++ce->guc_active.prio_count[guc_prio]; /* Overflow protection */ - GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_active.prio_count[guc_prio]); } static inline void sub_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_prio_count)); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_active.prio_count)); /* Underflow protection */ - GEM_WARN_ON(!ce->guc_prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_active.prio_count[guc_prio]); - --ce->guc_prio_count[guc_prio]; + --ce->guc_active.prio_count[guc_prio]; } static inline void update_context_prio(struct intel_context *ce) @@ -1992,8 +1984,8 @@ static inline void update_context_prio(struct intel_context *ce) lockdep_assert_held(&ce->guc_active.lock); - for (i = 0; i < ARRAY_SIZE(ce->guc_prio_count); ++i) { - if (ce->guc_prio_count[i]) { + for (i = 0; i < ARRAY_SIZE(ce->guc_active.prio_count); ++i) { + if (ce->guc_active.prio_count[i]) { guc_context_set_prio(guc, ce, i); break; } @@ -2135,6 +2127,21 @@ static bool context_needs_register(struct intel_context *ce, bool new_guc_id) !submission_disabled(ce_to_guc(ce)); } +static void guc_context_init(struct intel_context *ce) +{ + const struct i915_gem_context *ctx; + int prio = I915_CONTEXT_DEFAULT_PRIORITY; + + rcu_read_lock(); + ctx = rcu_dereference(ce->gem_context); + if (ctx) + prio = ctx->sched.priority; + rcu_read_unlock(); + + ce->guc_active.prio = map_i915_prio_to_guc_prio(prio); + set_bit(CONTEXT_GUC_INIT, &ce->flags); +} + static int guc_request_alloc(struct i915_request *rq) { struct intel_context *ce = rq->context; @@ -2166,6 +2173,9 @@ static int guc_request_alloc(struct i915_request *rq) rq->reserved_space -= GUC_REQUEST_SIZE; + if (unlikely(!test_bit(CONTEXT_GUC_INIT, &ce->flags))) + guc_context_init(ce); + /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the @@ -3042,13 +3052,12 @@ static inline void guc_log_context_priority(struct drm_printer *p, { int i; - drm_printf(p, "\t\tPriority: %d\n", - ce->guc_prio); + drm_printf(p, "\t\tPriority: %d\n", ce->guc_active.prio); drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; i < GUC_CLIENT_PRIORITY_NUM; ++i) { drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", - i, ce->guc_prio_count[i]); + i, ce->guc_active.prio_count[i]); } drm_printf(p, "\n"); } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 510168c1d703..181c5831405b 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -910,7 +910,7 @@ DECLARE_EVENT_CLASS(intel_context, __entry->guc_id = ce->guc_id; __entry->pin_count = atomic_read(&ce->pin_count); __entry->sched_state = ce->guc_state.sched_state; - __entry->guc_prio = ce->guc_prio; + __entry->guc_prio = ce->guc_active.prio; ), TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x, guc_prio=%u", From 3cb3e3434b9f9c34e98605658818b72fdaef0795 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:42 -0700 Subject: [PATCH 78/85] drm/i915/guc: Move fields protected by guc->contexts_lock into sub structure To make ownership of locking clear move fields (guc_id, guc_id_ref, guc_id_link) to sub structure guc_id in intel_context. Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: Matthew Brost Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-22-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 4 +- drivers/gpu/drm/i915/gt/intel_context_types.h | 18 +-- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 6 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 104 +++++++++--------- drivers/gpu/drm/i915/i915_trace.h | 4 +- 5 files changed, 69 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 3048267ddc7e..485460a11331 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -398,8 +398,8 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) spin_lock_init(&ce->guc_active.lock); INIT_LIST_HEAD(&ce->guc_active.requests); - ce->guc_id = GUC_INVALID_LRC_ID; - INIT_LIST_HEAD(&ce->guc_id_link); + ce->guc_id.id = GUC_INVALID_LRC_ID; + INIT_LIST_HEAD(&ce->guc_id.link); /* * Initialize fence to be complete as this is expected to be complete diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index b56960a781da..0b00d249c884 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -186,16 +186,18 @@ struct intel_context { u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; } guc_active; - /* GuC LRC descriptor ID */ - u16 guc_id; + struct { + /* GuC LRC descriptor ID */ + u16 id; - /* GuC LRC descriptor reference count */ - atomic_t guc_id_ref; + /* GuC LRC descriptor reference count */ + atomic_t ref; - /* - * GuC ID link - in list when unpinned but guc_id still valid in GuC - */ - struct list_head guc_id_link; + /* + * GuC ID link - in list when unpinned but guc_id still valid in GuC + */ + struct list_head link; + } guc_id; #ifdef CONFIG_DRM_I915_SELFTEST /** diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index 8be23e0f9306..7e6fdabac599 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -789,7 +789,7 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) if (err) pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", engine->name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id, err); + rq->fence.seqno, rq->context->guc_id.id, err); } skip: @@ -1098,7 +1098,7 @@ static int __igt_reset_engines(struct intel_gt *gt, if (err) pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", engine->name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id, err); + rq->fence.seqno, rq->context->guc_id.id, err); } count++; @@ -1108,7 +1108,7 @@ static int __igt_reset_engines(struct intel_gt *gt, pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n", engine->name, test_name, rq->fence.context, - rq->fence.seqno, rq->context->guc_id); + rq->fence.seqno, rq->context->guc_id.id); i915_request_put(rq); GEM_TRACE_DUMP(); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 7e9ee7d5aaab..2c9a6b4ce071 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -270,12 +270,12 @@ static inline void decr_context_committed_requests(struct intel_context *ce) static inline bool context_guc_id_invalid(struct intel_context *ce) { - return ce->guc_id == GUC_INVALID_LRC_ID; + return ce->guc_id.id == GUC_INVALID_LRC_ID; } static inline void set_context_guc_id_invalid(struct intel_context *ce) { - ce->guc_id = GUC_INVALID_LRC_ID; + ce->guc_id.id = GUC_INVALID_LRC_ID; } static inline struct intel_guc *ce_to_guc(struct intel_context *ce) @@ -468,14 +468,14 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) return 0; } - GEM_BUG_ON(!atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(!atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); /* * Corner case where the GuC firmware was blown away and reloaded while * this context was pinned. */ - if (unlikely(!lrc_desc_registered(guc, ce->guc_id))) { + if (unlikely(!lrc_desc_registered(guc, ce->guc_id.id))) { err = guc_lrc_desc_pin(ce, false); if (unlikely(err)) return err; @@ -494,14 +494,14 @@ static int guc_add_request(struct intel_guc *guc, struct i915_request *rq) if (!enabled) { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET; - action[len++] = ce->guc_id; + action[len++] = ce->guc_id.id; action[len++] = GUC_CONTEXT_ENABLE; set_context_pending_enable(ce); intel_context_get(ce); g2h_len_dw = G2H_LEN_DW_SCHED_CONTEXT_MODE_SET; } else { action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT; - action[len++] = ce->guc_id; + action[len++] = ce->guc_id.id; } err = intel_guc_send_nb(guc, action, len, g2h_len_dw); @@ -1167,12 +1167,12 @@ static int new_guc_id(struct intel_guc *guc) static void __release_guc_id(struct intel_guc *guc, struct intel_context *ce) { if (!context_guc_id_invalid(ce)) { - ida_simple_remove(&guc->guc_ids, ce->guc_id); - reset_lrc_desc(guc, ce->guc_id); + ida_simple_remove(&guc->guc_ids, ce->guc_id.id); + reset_lrc_desc(guc, ce->guc_id.id); set_context_guc_id_invalid(ce); } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); } static void release_guc_id(struct intel_guc *guc, struct intel_context *ce) @@ -1194,13 +1194,13 @@ static int steal_guc_id(struct intel_guc *guc) if (!list_empty(&guc->guc_id_list)) { ce = list_first_entry(&guc->guc_id_list, struct intel_context, - guc_id_link); + guc_id.link); - GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); GEM_BUG_ON(context_guc_id_invalid(ce)); - list_del_init(&ce->guc_id_link); - guc_id = ce->guc_id; + list_del_init(&ce->guc_id.link); + guc_id = ce->guc_id.id; spin_lock(&ce->guc_state.lock); clr_context_registered(ce); @@ -1236,7 +1236,7 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) int ret = 0; unsigned long flags, tries = PIN_GUC_ID_TRIES; - GEM_BUG_ON(atomic_read(&ce->guc_id_ref)); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); try_again: spin_lock_irqsave(&guc->contexts_lock, flags); @@ -1244,20 +1244,20 @@ try_again: might_lock(&ce->guc_state.lock); if (context_guc_id_invalid(ce)) { - ret = assign_guc_id(guc, &ce->guc_id); + ret = assign_guc_id(guc, &ce->guc_id.id); if (ret) goto out_unlock; ret = 1; /* Indidcates newly assigned guc_id */ } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); - atomic_inc(&ce->guc_id_ref); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); + atomic_inc(&ce->guc_id.ref); out_unlock: spin_unlock_irqrestore(&guc->contexts_lock, flags); /* - * -EAGAIN indicates no guc_ids are available, let's retire any + * -EAGAIN indicates no guc_id are available, let's retire any * outstanding requests to see if that frees up a guc_id. If the first * retire didn't help, insert a sleep with the timeslice duration before * attempting to retire more requests. Double the sleep period each @@ -1285,15 +1285,15 @@ static void unpin_guc_id(struct intel_guc *guc, struct intel_context *ce) { unsigned long flags; - GEM_BUG_ON(atomic_read(&ce->guc_id_ref) < 0); + GEM_BUG_ON(atomic_read(&ce->guc_id.ref) < 0); if (unlikely(context_guc_id_invalid(ce))) return; spin_lock_irqsave(&guc->contexts_lock, flags); - if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id_link) && - !atomic_read(&ce->guc_id_ref)) - list_add_tail(&ce->guc_id_link, &guc->guc_id_list); + if (!context_guc_id_invalid(ce) && list_empty(&ce->guc_id.link) && + !atomic_read(&ce->guc_id.ref)) + list_add_tail(&ce->guc_id.link, &guc->guc_id_list); spin_unlock_irqrestore(&guc->contexts_lock, flags); } @@ -1316,12 +1316,12 @@ static int register_context(struct intel_context *ce, bool loop) { struct intel_guc *guc = ce_to_guc(ce); u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool) + - ce->guc_id * sizeof(struct guc_lrc_desc); + ce->guc_id.id * sizeof(struct guc_lrc_desc); int ret; trace_intel_context_register(ce); - ret = __guc_action_register_context(guc, ce->guc_id, offset, loop); + ret = __guc_action_register_context(guc, ce->guc_id.id, offset, loop); if (likely(!ret)) { unsigned long flags; @@ -1390,7 +1390,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) struct intel_engine_cs *engine = ce->engine; struct intel_runtime_pm *runtime_pm = engine->uncore->rpm; struct intel_guc *guc = &engine->gt->uc.guc; - u32 desc_idx = ce->guc_id; + u32 desc_idx = ce->guc_id.id; struct guc_lrc_desc *desc; bool context_registered; intel_wakeref_t wakeref; @@ -1453,7 +1453,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) * context whose guc_id was stolen. */ with_intel_runtime_pm(runtime_pm, wakeref) - ret = deregister_context(ce, ce->guc_id); + ret = deregister_context(ce, ce->guc_id.id); if (unlikely(ret == -ENODEV)) ret = 0; /* Will get registered later */ } else { @@ -1524,7 +1524,7 @@ static void __guc_context_sched_enable(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, - ce->guc_id, + ce->guc_id.id, GUC_CONTEXT_ENABLE }; @@ -1540,7 +1540,7 @@ static void __guc_context_sched_disable(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET, - guc_id, /* ce->guc_id not stable */ + guc_id, /* ce->guc_id.id not stable */ GUC_CONTEXT_DISABLE }; @@ -1585,7 +1585,7 @@ static u16 prep_context_pending_disable(struct intel_context *ce) guc_blocked_fence_reinit(ce); intel_context_get(ce); - return ce->guc_id; + return ce->guc_id.id; } static struct i915_sw_fence *guc_context_block(struct intel_context *ce) @@ -1638,7 +1638,7 @@ static bool context_cant_unblock(struct intel_context *ce) return (ce->guc_state.sched_state & SCHED_STATE_NO_UNBLOCK) || context_guc_id_invalid(ce) || - !lrc_desc_registered(ce_to_guc(ce), ce->guc_id) || + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id) || !intel_context_is_pinned(ce); } @@ -1757,7 +1757,7 @@ static void guc_context_ban(struct intel_context *ce, struct i915_request *rq) if (!context_guc_id_invalid(ce)) with_intel_runtime_pm(runtime_pm, wakeref) __guc_context_set_preemption_timeout(guc, - ce->guc_id, + ce->guc_id.id, 1); spin_unlock_irqrestore(&ce->guc_state.lock, flags); } @@ -1802,11 +1802,11 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) { struct intel_guc *guc = ce_to_guc(ce); - GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id)); - GEM_BUG_ON(ce != __get_context(guc, ce->guc_id)); + GEM_BUG_ON(!lrc_desc_registered(guc, ce->guc_id.id)); + GEM_BUG_ON(ce != __get_context(guc, ce->guc_id.id)); GEM_BUG_ON(context_enabled(ce)); - deregister_context(ce, ce->guc_id); + deregister_context(ce, ce->guc_id.id); } static void __guc_context_destroy(struct intel_context *ce) @@ -1851,7 +1851,7 @@ static void guc_context_destroy(struct kref *kref) __guc_context_destroy(ce); return; } else if (submission_disabled(guc) || - !lrc_desc_registered(guc, ce->guc_id)) { + !lrc_desc_registered(guc, ce->guc_id.id)) { release_guc_id(guc, ce); __guc_context_destroy(ce); return; @@ -1860,10 +1860,10 @@ static void guc_context_destroy(struct kref *kref) /* * We have to acquire the context spinlock and check guc_id again, if it * is valid it hasn't been stolen and needs to be deregistered. We - * delete this context from the list of unpinned guc_ids available to + * delete this context from the list of unpinned guc_id available to * steal to seal a race with guc_lrc_desc_pin(). When the G2H CTB * returns indicating this context has been deregistered the guc_id is - * returned to the pool of available guc_ids. + * returned to the pool of available guc_id. */ spin_lock_irqsave(&guc->contexts_lock, flags); if (context_guc_id_invalid(ce)) { @@ -1872,8 +1872,8 @@ static void guc_context_destroy(struct kref *kref) return; } - if (!list_empty(&ce->guc_id_link)) - list_del_init(&ce->guc_id_link); + if (!list_empty(&ce->guc_id.link)) + list_del_init(&ce->guc_id.link); spin_unlock_irqrestore(&guc->contexts_lock, flags); /* Seal race with Reset */ @@ -1918,7 +1918,7 @@ static void guc_context_set_prio(struct intel_guc *guc, { u32 action[] = { INTEL_GUC_ACTION_SET_CONTEXT_PRIORITY, - ce->guc_id, + ce->guc_id.id, prio, }; @@ -2053,7 +2053,7 @@ static void remove_from_context(struct i915_request *rq) decr_context_committed_requests(ce); spin_unlock_irq(&ce->guc_state.lock); - atomic_dec(&ce->guc_id_ref); + atomic_dec(&ce->guc_id.ref); i915_request_notify_execute_cb_imm(rq); } @@ -2123,7 +2123,7 @@ static void guc_signal_context_fence(struct intel_context *ce) static bool context_needs_register(struct intel_context *ce, bool new_guc_id) { return (new_guc_id || test_bit(CONTEXT_LRCA_DIRTY, &ce->flags) || - !lrc_desc_registered(ce_to_guc(ce), ce->guc_id)) && + !lrc_desc_registered(ce_to_guc(ce), ce->guc_id.id)) && !submission_disabled(ce_to_guc(ce)); } @@ -2179,11 +2179,11 @@ static int guc_request_alloc(struct i915_request *rq) /* * Call pin_guc_id here rather than in the pinning step as with * dma_resv, contexts can be repeatedly pinned / unpinned trashing the - * guc_ids and creating horrible race conditions. This is especially bad - * when guc_ids are being stolen due to over subscription. By the time + * guc_id and creating horrible race conditions. This is especially bad + * when guc_id are being stolen due to over subscription. By the time * this function is reached, it is guaranteed that the guc_id will be * persistent until the generated request is retired. Thus, sealing these - * race conditions. It is still safe to fail here if guc_ids are + * race conditions. It is still safe to fail here if guc_id are * exhausted and return -EAGAIN to the user indicating that they can try * again in the future. * @@ -2193,7 +2193,7 @@ static int guc_request_alloc(struct i915_request *rq) * decremented on each retire. When it is zero, a lock around the * increment (in pin_guc_id) is needed to seal a race with unpin_guc_id. */ - if (atomic_add_unless(&ce->guc_id_ref, 1, 0)) + if (atomic_add_unless(&ce->guc_id.ref, 1, 0)) goto out; ret = pin_guc_id(guc, ce); /* returns 1 if new guc_id assigned */ @@ -2206,7 +2206,7 @@ static int guc_request_alloc(struct i915_request *rq) disable_submission(guc); goto out; /* GPU will be reset */ } - atomic_dec(&ce->guc_id_ref); + atomic_dec(&ce->guc_id.ref); unpin_guc_id(guc, ce); return ret; } @@ -3040,7 +3040,7 @@ void intel_guc_submission_print_info(struct intel_guc *guc, priolist_for_each_request(rq, pl) drm_printf(p, "guc_id=%u, seqno=%llu\n", - rq->context->guc_id, + rq->context->guc_id.id, rq->fence.seqno); } spin_unlock_irqrestore(&sched_engine->lock, flags); @@ -3071,7 +3071,7 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, xa_lock_irqsave(&guc->context_lookup, flags); xa_for_each(&guc->context_lookup, index, ce) { - drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id); + drm_printf(p, "GuC lrc descriptor %u:\n", ce->guc_id.id); drm_printf(p, "\tHW Context Desc: 0x%08x\n", ce->lrc.lrca); drm_printf(p, "\t\tLRC Head: Internal %u, Memory %u\n", ce->ring->head, @@ -3082,7 +3082,7 @@ void intel_guc_submission_print_context_info(struct intel_guc *guc, drm_printf(p, "\t\tContext Pin Count: %u\n", atomic_read(&ce->pin_count)); drm_printf(p, "\t\tGuC ID Ref Count: %u\n", - atomic_read(&ce->guc_id_ref)); + atomic_read(&ce->guc_id.ref)); drm_printf(p, "\t\tSchedule State: 0x%x\n\n", ce->guc_state.sched_state); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 181c5831405b..75a40a33e13a 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -805,7 +805,7 @@ DECLARE_EVENT_CLASS(i915_request, __entry->dev = rq->engine->i915->drm.primary->index; __entry->class = rq->engine->uabi_class; __entry->instance = rq->engine->uabi_instance; - __entry->guc_id = rq->context->guc_id; + __entry->guc_id = rq->context->guc_id.id; __entry->ctx = rq->fence.context; __entry->seqno = rq->fence.seqno; __entry->tail = rq->tail; @@ -907,7 +907,7 @@ DECLARE_EVENT_CLASS(intel_context, ), TP_fast_assign( - __entry->guc_id = ce->guc_id; + __entry->guc_id = ce->guc_id.id; __entry->pin_count = atomic_read(&ce->pin_count); __entry->sched_state = ce->guc_state.sched_state; __entry->guc_prio = ce->guc_active.prio; From af5bc9f21e3acb479683e4339c5c3ea27334b270 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:43 -0700 Subject: [PATCH 79/85] drm/i915/guc: Drop guc_active move everything into guc_state Now that we have locking hierarchy of sched_engine->lock -> ce->guc_state everything from guc_active can be moved into guc_state and protected the guc_state.lock. Signed-off-by: Matthew Brost Reviewed-by: Daniele Ceraolo Spurio Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-23-matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/intel_context.c | 10 +-- drivers/gpu/drm/i915/gt/intel_context_types.h | 7 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 88 +++++++++---------- drivers/gpu/drm/i915/i915_trace.h | 2 +- 4 files changed, 49 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 485460a11331..ff637147b1a9 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -394,9 +394,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine) spin_lock_init(&ce->guc_state.lock); INIT_LIST_HEAD(&ce->guc_state.fences); - - spin_lock_init(&ce->guc_active.lock); - INIT_LIST_HEAD(&ce->guc_active.requests); + INIT_LIST_HEAD(&ce->guc_state.requests); ce->guc_id.id = GUC_INVALID_LRC_ID; INIT_LIST_HEAD(&ce->guc_id.link); @@ -521,15 +519,15 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce) GEM_BUG_ON(!intel_engine_uses_guc(ce->engine)); - spin_lock_irqsave(&ce->guc_active.lock, flags); - list_for_each_entry_reverse(rq, &ce->guc_active.requests, + spin_lock_irqsave(&ce->guc_state.lock, flags); + list_for_each_entry_reverse(rq, &ce->guc_state.requests, sched.link) { if (i915_request_completed(rq)) break; active = rq; } - spin_unlock_irqrestore(&ce->guc_active.lock, flags); + spin_unlock_irqrestore(&ce->guc_state.lock, flags); return active; } diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 0b00d249c884..5285d660eacf 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -172,11 +172,6 @@ struct intel_context { struct i915_sw_fence blocked; /* GuC committed requests */ int number_committed_requests; - } guc_state; - - struct { - /** lock: protects everything in guc_active */ - spinlock_t lock; /** requests: active requests on this context */ struct list_head requests; /* @@ -184,7 +179,7 @@ struct intel_context { */ u8 prio; u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; - } guc_active; + } guc_state; struct { /* GuC LRC descriptor ID */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 2c9a6b4ce071..a9cf49298067 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -846,9 +846,9 @@ __unwind_incomplete_requests(struct intel_context *ce) unsigned long flags; spin_lock_irqsave(&sched_engine->lock, flags); - spin_lock(&ce->guc_active.lock); + spin_lock(&ce->guc_state.lock); list_for_each_entry_safe_reverse(rq, rn, - &ce->guc_active.requests, + &ce->guc_state.requests, sched.link) { if (i915_request_completed(rq)) continue; @@ -867,7 +867,7 @@ __unwind_incomplete_requests(struct intel_context *ce) list_add(&rq->sched.link, pl); set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); } - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); spin_unlock_irqrestore(&sched_engine->lock, flags); } @@ -962,10 +962,10 @@ static void guc_cancel_context_requests(struct intel_context *ce) /* Mark all executing requests as skipped. */ spin_lock_irqsave(&sched_engine->lock, flags); - spin_lock(&ce->guc_active.lock); - list_for_each_entry(rq, &ce->guc_active.requests, sched.link) + spin_lock(&ce->guc_state.lock); + list_for_each_entry(rq, &ce->guc_state.requests, sched.link) i915_request_put(i915_request_mark_eio(rq)); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); spin_unlock_irqrestore(&sched_engine->lock, flags); } @@ -1416,7 +1416,7 @@ static int guc_lrc_desc_pin(struct intel_context *ce, bool loop) desc->engine_submit_mask = adjust_engine_mask(engine->class, engine->mask); desc->hw_context_desc = ce->lrc.lrca; - desc->priority = ce->guc_active.prio; + desc->priority = ce->guc_state.prio; desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD; guc_context_policy_init(engine, desc); @@ -1811,10 +1811,10 @@ static inline void guc_lrc_desc_unpin(struct intel_context *ce) static void __guc_context_destroy(struct intel_context *ce) { - GEM_BUG_ON(ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || - ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_HIGH] || - ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || - ce->guc_active.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); + GEM_BUG_ON(ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_HIGH] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_HIGH] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_KMD_NORMAL] || + ce->guc_state.prio_count[GUC_CLIENT_PRIORITY_NORMAL]); GEM_BUG_ON(ce->guc_state.number_committed_requests); lrc_fini(ce); @@ -1924,17 +1924,17 @@ static void guc_context_set_prio(struct intel_guc *guc, GEM_BUG_ON(prio < GUC_CLIENT_PRIORITY_KMD_HIGH || prio > GUC_CLIENT_PRIORITY_NORMAL); - lockdep_assert_held(&ce->guc_active.lock); + lockdep_assert_held(&ce->guc_state.lock); - if (ce->guc_active.prio == prio || submission_disabled(guc) || + if (ce->guc_state.prio == prio || submission_disabled(guc) || !context_registered(ce)) { - ce->guc_active.prio = prio; + ce->guc_state.prio = prio; return; } guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); - ce->guc_active.prio = prio; + ce->guc_state.prio = prio; trace_intel_context_set_prio(ce); } @@ -1953,25 +1953,25 @@ static inline u8 map_i915_prio_to_guc_prio(int prio) static inline void add_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { - lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_active.prio_count)); + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); - ++ce->guc_active.prio_count[guc_prio]; + ++ce->guc_state.prio_count[guc_prio]; /* Overflow protection */ - GEM_WARN_ON(!ce->guc_active.prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); } static inline void sub_context_inflight_prio(struct intel_context *ce, u8 guc_prio) { - lockdep_assert_held(&ce->guc_active.lock); - GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_active.prio_count)); + lockdep_assert_held(&ce->guc_state.lock); + GEM_BUG_ON(guc_prio >= ARRAY_SIZE(ce->guc_state.prio_count)); /* Underflow protection */ - GEM_WARN_ON(!ce->guc_active.prio_count[guc_prio]); + GEM_WARN_ON(!ce->guc_state.prio_count[guc_prio]); - --ce->guc_active.prio_count[guc_prio]; + --ce->guc_state.prio_count[guc_prio]; } static inline void update_context_prio(struct intel_context *ce) @@ -1982,10 +1982,10 @@ static inline void update_context_prio(struct intel_context *ce) BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH != 0); BUILD_BUG_ON(GUC_CLIENT_PRIORITY_KMD_HIGH > GUC_CLIENT_PRIORITY_NORMAL); - lockdep_assert_held(&ce->guc_active.lock); + lockdep_assert_held(&ce->guc_state.lock); - for (i = 0; i < ARRAY_SIZE(ce->guc_active.prio_count); ++i) { - if (ce->guc_active.prio_count[i]) { + for (i = 0; i < ARRAY_SIZE(ce->guc_state.prio_count); ++i) { + if (ce->guc_state.prio_count[i]) { guc_context_set_prio(guc, ce, i); break; } @@ -2005,8 +2005,8 @@ static void add_to_context(struct i915_request *rq) GEM_BUG_ON(rq->guc_prio == GUC_PRIO_FINI); - spin_lock(&ce->guc_active.lock); - list_move_tail(&rq->sched.link, &ce->guc_active.requests); + spin_lock(&ce->guc_state.lock); + list_move_tail(&rq->sched.link, &ce->guc_state.requests); if (rq->guc_prio == GUC_PRIO_INIT) { rq->guc_prio = new_guc_prio; @@ -2018,12 +2018,12 @@ static void add_to_context(struct i915_request *rq) } update_context_prio(ce); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void guc_prio_fini(struct i915_request *rq, struct intel_context *ce) { - lockdep_assert_held(&ce->guc_active.lock); + lockdep_assert_held(&ce->guc_state.lock); if (rq->guc_prio != GUC_PRIO_INIT && rq->guc_prio != GUC_PRIO_FINI) { @@ -2037,7 +2037,7 @@ static void remove_from_context(struct i915_request *rq) { struct intel_context *ce = rq->context; - spin_lock_irq(&ce->guc_active.lock); + spin_lock_irq(&ce->guc_state.lock); list_del_init(&rq->sched.link); clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); @@ -2047,10 +2047,8 @@ static void remove_from_context(struct i915_request *rq) guc_prio_fini(rq, ce); - spin_unlock_irq(&ce->guc_active.lock); - - spin_lock_irq(&ce->guc_state.lock); decr_context_committed_requests(ce); + spin_unlock_irq(&ce->guc_state.lock); atomic_dec(&ce->guc_id.ref); @@ -2138,7 +2136,7 @@ static void guc_context_init(struct intel_context *ce) prio = ctx->sched.priority; rcu_read_unlock(); - ce->guc_active.prio = map_i915_prio_to_guc_prio(prio); + ce->guc_state.prio = map_i915_prio_to_guc_prio(prio); set_bit(CONTEXT_GUC_INIT, &ce->flags); } @@ -2372,7 +2370,7 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, !new_guc_prio_higher(rq->guc_prio, new_guc_prio))) return; - spin_lock(&ce->guc_active.lock); + spin_lock(&ce->guc_state.lock); if (rq->guc_prio != GUC_PRIO_FINI) { if (rq->guc_prio != GUC_PRIO_INIT) sub_context_inflight_prio(ce, rq->guc_prio); @@ -2380,16 +2378,16 @@ static void guc_bump_inflight_request_prio(struct i915_request *rq, add_context_inflight_prio(ce, rq->guc_prio); update_context_prio(ce); } - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void guc_retire_inflight_request_prio(struct i915_request *rq) { struct intel_context *ce = rq->context; - spin_lock(&ce->guc_active.lock); + spin_lock(&ce->guc_state.lock); guc_prio_fini(rq, ce); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); } static void sanitize_hwsp(struct intel_engine_cs *engine) @@ -2955,7 +2953,7 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine) goto next; } - list_for_each_entry(rq, &ce->guc_active.requests, sched.link) { + list_for_each_entry(rq, &ce->guc_state.requests, sched.link) { if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE) continue; @@ -3005,10 +3003,10 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine, goto next; } - spin_lock(&ce->guc_active.lock); - intel_engine_dump_active_requests(&ce->guc_active.requests, + spin_lock(&ce->guc_state.lock); + intel_engine_dump_active_requests(&ce->guc_state.requests, hung_rq, m); - spin_unlock(&ce->guc_active.lock); + spin_unlock(&ce->guc_state.lock); next: intel_context_put(ce); @@ -3052,12 +3050,12 @@ static inline void guc_log_context_priority(struct drm_printer *p, { int i; - drm_printf(p, "\t\tPriority: %d\n", ce->guc_active.prio); + drm_printf(p, "\t\tPriority: %d\n", ce->guc_state.prio); drm_printf(p, "\t\tNumber Requests (lower index == higher priority)\n"); for (i = GUC_CLIENT_PRIORITY_KMD_HIGH; i < GUC_CLIENT_PRIORITY_NUM; ++i) { drm_printf(p, "\t\tNumber requests in priority band[%d]: %d\n", - i, ce->guc_active.prio_count[i]); + i, ce->guc_state.prio_count[i]); } drm_printf(p, "\n"); } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 75a40a33e13a..9795f456cccf 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -910,7 +910,7 @@ DECLARE_EVENT_CLASS(intel_context, __entry->guc_id = ce->guc_id.id; __entry->pin_count = atomic_read(&ce->pin_count); __entry->sched_state = ce->guc_state.sched_state; - __entry->guc_prio = ce->guc_active.prio; + __entry->guc_prio = ce->guc_state.prio; ), TP_printk("guc_id=%d, pin_count=%d sched_state=0x%x, guc_prio=%u", From 4f41ddc7c7eeb0a41c3a07da975fd7a0c5715e85 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 9 Sep 2021 09:47:44 -0700 Subject: [PATCH 80/85] drm/i915/guc: Add GuC kernel doc Add GuC kernel doc for all structures added thus far for GuC submission and update the main GuC submission section with the new interface details. v2: - Drop guc_active.lock DOC v3: - Fixup a few kernel doc comments (Daniele) v4 (Daniele): - Implement doc suggestions from John - Add kerneldoc for all members of the GuC structure and pull the file in i915.rst v5 (Daniele): - Implement new doc suggestions from John Signed-off-by: Matthew Brost Signed-off-by: Daniele Ceraolo Spurio Cc: John Harrison Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210909164744.31249-24-matthew.brost@intel.com --- Documentation/gpu/i915.rst | 2 + drivers/gpu/drm/i915/gt/intel_context_types.h | 47 ++++---- drivers/gpu/drm/i915/gt/uc/intel_guc.h | 75 ++++++++++--- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 100 ++++++++++++++---- drivers/gpu/drm/i915/i915_request.h | 21 ++-- 5 files changed, 183 insertions(+), 62 deletions(-) diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 204ebdaadb45..e9b11044075e 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -498,6 +498,8 @@ GuC .. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c :doc: GuC +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.h + GuC Firmware Layout ~~~~~~~~~~~~~~~~~~~ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 5285d660eacf..930569a1a01f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -156,40 +156,51 @@ struct intel_context { u8 wa_bb_page; /* if set, page num reserved for context workarounds */ struct { - /** lock: protects everything in guc_state */ + /** @lock: protects everything in guc_state */ spinlock_t lock; /** - * sched_state: scheduling state of this context using GuC + * @sched_state: scheduling state of this context using GuC * submission */ u32 sched_state; /* - * fences: maintains of list of requests that have a submit - * fence related to GuC submission + * @fences: maintains a list of requests that are currently + * being fenced until a GuC operation completes */ struct list_head fences; - /* GuC context blocked fence */ - struct i915_sw_fence blocked; - /* GuC committed requests */ - int number_committed_requests; - /** requests: active requests on this context */ - struct list_head requests; - /* - * GuC priority management + /** + * @blocked: fence used to signal when the blocking of a + * context's submissions is complete. */ + struct i915_sw_fence blocked; + /** @number_committed_requests: number of committed requests */ + int number_committed_requests; + /** @requests: list of active requests on this context */ + struct list_head requests; + /** @prio: the context's current guc priority */ u8 prio; + /** + * @prio_count: a counter of the number requests in flight in + * each priority bucket + */ u32 prio_count[GUC_CLIENT_PRIORITY_NUM]; } guc_state; struct { - /* GuC LRC descriptor ID */ + /** + * @id: handle which is used to uniquely identify this context + * with the GuC, protected by guc->contexts_lock + */ u16 id; - - /* GuC LRC descriptor reference count */ + /** + * @ref: the number of references to the guc_id, when + * transitioning in and out of zero protected by + * guc->contexts_lock + */ atomic_t ref; - - /* - * GuC ID link - in list when unpinned but guc_id still valid in GuC + /** + * @link: in guc->guc_id_list when the guc_id has no refs but is + * still valid, protected by guc->contexts_lock */ struct list_head link; } guc_id; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index 2e27fe59786b..5dd174babf7a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -22,74 +22,121 @@ struct __guc_ads_blob; -/* - * Top level structure of GuC. It handles firmware loading and manages client - * pool. intel_guc owns a intel_guc_client to replace the legacy ExecList - * submission. +/** + * struct intel_guc - Top level structure of GuC. + * + * It handles firmware loading and manages client pool. intel_guc owns an + * i915_sched_engine for submission. */ struct intel_guc { + /** @fw: the GuC firmware */ struct intel_uc_fw fw; + /** @log: sub-structure containing GuC log related data and objects */ struct intel_guc_log log; + /** @ct: the command transport communication channel */ struct intel_guc_ct ct; + /** @slpc: sub-structure containing SLPC related data and objects */ struct intel_guc_slpc slpc; - /* Global engine used to submit requests to GuC */ + /** @sched_engine: Global engine used to submit requests to GuC */ struct i915_sched_engine *sched_engine; + /** + * @stalled_request: if GuC can't process a request for any reason, we + * save it until GuC restarts processing. No other request can be + * submitted until the stalled request is processed. + */ struct i915_request *stalled_request; /* intel_guc_recv interrupt related state */ + /** @irq_lock: protects GuC irq state */ spinlock_t irq_lock; + /** + * @msg_enabled_mask: mask of events that are processed when receiving + * an INTEL_GUC_ACTION_DEFAULT G2H message. + */ unsigned int msg_enabled_mask; + /** + * @outstanding_submission_g2h: number of outstanding GuC to Host + * responses related to GuC submission, used to determine if the GT is + * idle + */ atomic_t outstanding_submission_g2h; + /** @interrupts: pointers to GuC interrupt-managing functions. */ struct { void (*reset)(struct intel_guc *guc); void (*enable)(struct intel_guc *guc); void (*disable)(struct intel_guc *guc); } interrupts; - /* - * contexts_lock protects the pool of free guc ids and a linked list of - * guc ids available to be stolen + /** + * @contexts_lock: protects guc_ids, guc_id_list, ce->guc_id.id, and + * ce->guc_id.ref when transitioning in and out of zero */ spinlock_t contexts_lock; + /** @guc_ids: used to allocate unique ce->guc_id.id values */ struct ida guc_ids; + /** + * @guc_id_list: list of intel_context with valid guc_ids but no refs + */ struct list_head guc_id_list; + /** + * @submission_supported: tracks whether we support GuC submission on + * the current platform + */ bool submission_supported; + /** @submission_selected: tracks whether the user enabled GuC submission */ bool submission_selected; + /** + * @rc_supported: tracks whether we support GuC rc on the current platform + */ bool rc_supported; + /** @rc_selected: tracks whether the user enabled GuC rc */ bool rc_selected; + /** @ads_vma: object allocated to hold the GuC ADS */ struct i915_vma *ads_vma; + /** @ads_blob: contents of the GuC ADS */ struct __guc_ads_blob *ads_blob; + /** @ads_regset_size: size of the save/restore regsets in the ADS */ u32 ads_regset_size; + /** @ads_golden_ctxt_size: size of the golden contexts in the ADS */ u32 ads_golden_ctxt_size; + /** @lrc_desc_pool: object allocated to hold the GuC LRC descriptor pool */ struct i915_vma *lrc_desc_pool; + /** @lrc_desc_pool_vaddr: contents of the GuC LRC descriptor pool */ void *lrc_desc_pool_vaddr; - /* guc_id to intel_context lookup */ + /** + * @context_lookup: used to resolve intel_context from guc_id, if a + * context is present in this structure it is registered with the GuC + */ struct xarray context_lookup; - /* Control params for fw initialization */ + /** @params: Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; - /* GuC's FW specific registers used in MMIO send */ + /** @send_regs: GuC's FW specific registers used for sending MMIO H2G */ struct { u32 base; unsigned int count; enum forcewake_domains fw_domains; } send_regs; - /* register used to send interrupts to the GuC FW */ + /** @notify_reg: register used to send interrupts to the GuC FW */ i915_reg_t notify_reg; - /* Store msg (e.g. log flush) that we see while CTBs are disabled */ + /** + * @mmio_msg: notification bitmask that the GuC writes in one of its + * registers when the CT channel is disabled, to be processed when the + * channel is back up. + */ u32 mmio_msg; - /* To serialize the intel_guc_send actions */ + /** @send_mutex: used to serialize the intel_guc_send actions */ struct mutex send_mutex; }; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index a9cf49298067..c7a41802b448 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -28,21 +28,6 @@ /** * DOC: GuC-based command submission * - * IMPORTANT NOTE: GuC submission is currently not supported in i915. The GuC - * firmware is moving to an updated submission interface and we plan to - * turn submission back on when that lands. The below documentation (and related - * code) matches the old submission model and will be updated as part of the - * upgrade to the new flow. - * - * GuC stage descriptor: - * During initialization, the driver allocates a static pool of 1024 such - * descriptors, and shares them with the GuC. Currently, we only use one - * descriptor. This stage descriptor lets the GuC know about the workqueue and - * process descriptor. Theoretically, it also lets the GuC know about our HW - * contexts (context ID, etc...), but we actually employ a kind of submission - * where the GuC uses the LRCA sent via the work item instead. This is called - * a "proxy" submission. - * * The Scratch registers: * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes * a value to the action register (SOFT_SCRATCH_0) along with any data. It then @@ -51,14 +36,85 @@ * processes the request. The kernel driver polls waiting for this update and * then proceeds. * - * Work Items: - * There are several types of work items that the host may place into a - * workqueue, each with its own requirements and limitations. Currently only - * WQ_TYPE_INORDER is needed to support legacy submission via GuC, which - * represents in-order queue. The kernel driver packs ring tail pointer and an - * ELSP context descriptor dword into Work Item. - * See guc_add_request() + * Command Transport buffers (CTBs): + * Covered in detail in other sections but CTBs (Host to GuC - H2G, GuC to Host + * - G2H) are a message interface between the i915 and GuC. * + * Context registration: + * Before a context can be submitted it must be registered with the GuC via a + * H2G. A unique guc_id is associated with each context. The context is either + * registered at request creation time (normal operation) or at submission time + * (abnormal operation, e.g. after a reset). + * + * Context submission: + * The i915 updates the LRC tail value in memory. The i915 must enable the + * scheduling of the context within the GuC for the GuC to actually consider it. + * Therefore, the first time a disabled context is submitted we use a schedule + * enable H2G, while follow up submissions are done via the context submit H2G, + * which informs the GuC that a previously enabled context has new work + * available. + * + * Context unpin: + * To unpin a context a H2G is used to disable scheduling. When the + * corresponding G2H returns indicating the scheduling disable operation has + * completed it is safe to unpin the context. While a disable is in flight it + * isn't safe to resubmit the context so a fence is used to stall all future + * requests of that context until the G2H is returned. + * + * Context deregistration: + * Before a context can be destroyed or if we steal its guc_id we must + * deregister the context with the GuC via H2G. If stealing the guc_id it isn't + * safe to submit anything to this guc_id until the deregister completes so a + * fence is used to stall all requests associated with this guc_id until the + * corresponding G2H returns indicating the guc_id has been deregistered. + * + * guc_ids: + * Unique number associated with private GuC context data passed in during + * context registration / submission / deregistration. 64k available. Simple ida + * is used for allocation. + * + * Stealing guc_ids: + * If no guc_ids are available they can be stolen from another context at + * request creation time if that context is unpinned. If a guc_id can't be found + * we punt this problem to the user as we believe this is near impossible to hit + * during normal use cases. + * + * Locking: + * In the GuC submission code we have 3 basic spin locks which protect + * everything. Details about each below. + * + * sched_engine->lock + * This is the submission lock for all contexts that share an i915 schedule + * engine (sched_engine), thus only one of the contexts which share a + * sched_engine can be submitting at a time. Currently only one sched_engine is + * used for all of GuC submission but that could change in the future. + * + * guc->contexts_lock + * Protects guc_id allocation for the given GuC, i.e. only one context can be + * doing guc_id allocation operations at a time for each GuC in the system. + * + * ce->guc_state.lock + * Protects everything under ce->guc_state. Ensures that a context is in the + * correct state before issuing a H2G. e.g. We don't issue a schedule disable + * on a disabled context (bad idea), we don't issue a schedule enable when a + * schedule disable is in flight, etc... Also protects list of inflight requests + * on the context and the priority management state. Lock is individual to each + * context. + * + * Lock ordering rules: + * sched_engine->lock -> ce->guc_state.lock + * guc->contexts_lock -> ce->guc_state.lock + * + * Reset races: + * When a full GT reset is triggered it is assumed that some G2H responses to + * H2Gs can be lost as the GuC is also reset. Losing these G2H can prove to be + * fatal as we do certain operations upon receiving a G2H (e.g. destroy + * contexts, release guc_ids, etc...). When this occurs we can scrub the + * context state and cleanup appropriately, however this is quite racey. + * To avoid races, the reset code must disable submission before scrubbing for + * the missing G2H, while the submission code must check for submission being + * disabled and skip sending H2Gs and updating context states when it is. Both + * sides must also make sure to hold the relevant locks. */ /* GuC Virtual Engine */ diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index d818cfbfc41d..7bd9ed20623e 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -290,18 +290,23 @@ struct i915_request { struct hrtimer timer; } watchdog; - /* - * Requests may need to be stalled when using GuC submission waiting for - * certain GuC operations to complete. If that is the case, stalled - * requests are added to a per context list of stalled requests. The - * below list_head is the link in that list. + /** + * @guc_fence_link: Requests may need to be stalled when using GuC + * submission waiting for certain GuC operations to complete. If that is + * the case, stalled requests are added to a per context list of stalled + * requests. The below list_head is the link in that list. Protected by + * ce->guc_state.lock. */ struct list_head guc_fence_link; /** - * Priority level while the request is inflight. Differs from i915 - * scheduler priority. See comment above - * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. + * @guc_prio: Priority level while the request is in flight. Differs + * from i915 scheduler priority. See comment above + * I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP for details. Protected by + * ce->guc_active.lock. Two special values (GUC_PRIO_INIT and + * GUC_PRIO_FINI) outside the GuC priority range are used to indicate + * if the priority has not been initialized yet or if no more updates + * are possible because the request has completed. */ #define GUC_PRIO_INIT 0xff #define GUC_PRIO_FINI 0xfe From 4796054b381a586f4177a24e3d8b5a6a0a32ce62 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Aug 2021 15:54:25 -0700 Subject: [PATCH 81/85] drm/i915/selftests: Do not use import_obj uninitialized MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang warns a couple of times: drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:63:6: warning: variable 'import_obj' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] if (import != &obj->base) { ^~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:80:22: note: uninitialized use occurs here i915_gem_object_put(import_obj); ^~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:63:2: note: remove the 'if' if its condition is always false if (import != &obj->base) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:38:46: note: initialize the variable 'import_obj' to silence this warning struct drm_i915_gem_object *obj, *import_obj; ^ = NULL Shuffle the import_obj initialization above these if statements so that it is not used uninitialized. Fixes: d7b2cb380b3a ("drm/i915/gem: Correct the locking and pin pattern for dma-buf (v8)") Reported-by: Dan Carpenter Reviewed-by: Thomas Hellström Signed-off-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210824225427.2065517-2-nathan@kernel.org --- drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index ffae7df5e4d7..532c7955b300 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -59,13 +59,13 @@ static int igt_dmabuf_import_self(void *arg) err = PTR_ERR(import); goto out_dmabuf; } + import_obj = to_intel_bo(import); if (import != &obj->base) { pr_err("i915_gem_prime_import created a new object!\n"); err = -EINVAL; goto out_import; } - import_obj = to_intel_bo(import); i915_gem_object_lock(import_obj, NULL); err = __i915_gem_object_get_pages(import_obj); @@ -176,6 +176,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, err = PTR_ERR(import); goto out_dmabuf; } + import_obj = to_intel_bo(import); if (import == &obj->base) { pr_err("i915_gem_prime_import reused gem object!\n"); @@ -183,8 +184,6 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, goto out_import; } - import_obj = to_intel_bo(import); - i915_gem_object_lock(import_obj, NULL); err = __i915_gem_object_get_pages(import_obj); if (err) { From 46f20a353b80d02492655d99714f0566018a17e8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Aug 2021 15:54:26 -0700 Subject: [PATCH 82/85] drm/i915/selftests: Always initialize err in igt_dmabuf_import_same_driver_lmem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clang warns: drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:127:13: warning: variable 'err' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] } else if (PTR_ERR(import) != -EOPNOTSUPP) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:138:9: note: uninitialized use occurs here return err; ^~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:127:9: note: remove the 'if' if its condition is always true } else if (PTR_ERR(import) != -EOPNOTSUPP) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c:95:9: note: initialize the variable 'err' to silence this warning int err; ^ = 0 The test is expected to pass if i915_gem_prime_import() returns -EOPNOTSUPP so initialize err to zero in this case. Fixes: cdb35d1ed6d2 ("drm/i915/gem: Migrate to system at dma-buf attach time (v7)") Reported-by: Dan Carpenter Reviewed-by: Thomas Hellström Signed-off-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210824225427.2065517-3-nathan@kernel.org --- drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index 532c7955b300..4a6bb64c3a35 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -128,6 +128,8 @@ static int igt_dmabuf_import_same_driver_lmem(void *arg) pr_err("i915_gem_prime_import failed with the wrong err=%ld\n", PTR_ERR(import)); err = PTR_ERR(import); + } else { + err = 0; } dma_buf_put(dmabuf); From 43192617f7816bb74584c1df06f57363afd15337 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 24 Aug 2021 15:54:27 -0700 Subject: [PATCH 83/85] drm/i915: Enable -Wsometimes-uninitialized This warning helps catch uninitialized variables. It should have been enabled at the same time as commit b2423184ac33 ("drm/i915: Enable -Wuninitialized") but I did not realize they were disabled separately. Enable it now that i915 is clean so that it stays that way. Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20210824225427.2065517-4-nathan@kernel.org --- drivers/gpu/drm/i915/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 949719ba048e..c584188aa15a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -19,7 +19,6 @@ subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) # clang warnings subdir-ccflags-y += $(call cc-disable-warning, sign-compare) -subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-y += $(call cc-disable-warning, frame-address) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror From 50bc6486a8f12643624cd3c48cd67fe49873849a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Fri, 3 Sep 2021 17:35:43 -0700 Subject: [PATCH 84/85] drm/i915/xehpsdv: Define MOCS table for XeHP SDV Like DG1, XeHP SDV doesn't have LLC/eDRAM control values due to being a dgfx card. XeHP SDV adds 2 more bits: L3_GLBGO to "push the Go point to memory for L3 destined transaction" and L3_LKP to "enable Lookup for uncacheable accesses". Bspec: 45101 Cc: Daniele Ceraolo Spurio Signed-off-by: Lucas De Marchi Signed-off-by: Stuart Summers Signed-off-by: Matt Roper Reviewed-by: Clint Taylor Link: https://patchwork.freedesktop.org/patch/msgid/20210904003544.2422282-2-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 35 +++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index f47521b8e941..8c3fbf4f323b 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -42,6 +42,8 @@ struct drm_i915_mocs_table { #define L3_ESC(value) ((value) << 0) #define L3_SCC(value) ((value) << 1) #define _L3_CACHEABILITY(value) ((value) << 4) +#define L3_GLBGO(value) ((value) << 6) +#define L3_LKUP(value) ((value) << 7) /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ @@ -343,6 +345,31 @@ static const struct drm_i915_mocs_entry gen12_mocs_table[] = { L3_3_WB), }; +static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = { + /* wa_1608975824 */ + MOCS_ENTRY(0, 0, L3_3_WB | L3_LKUP(1)), + + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(3, 0, L3_1_UC | L3_GLBGO(1)), + /* UC - Non-Coherent; GO:L3 */ + MOCS_ENTRY(4, 0, L3_1_UC), + + /* WB */ + MOCS_ENTRY(5, 0, L3_3_WB | L3_LKUP(1)), + + /* HW Reserved - SW program but never use. */ + MOCS_ENTRY(48, 0, L3_3_WB | L3_LKUP(1)), + MOCS_ENTRY(49, 0, L3_1_UC | L3_LKUP(1)), + MOCS_ENTRY(60, 0, L3_1_UC), + MOCS_ENTRY(61, 0, L3_1_UC), + MOCS_ENTRY(62, 0, L3_1_UC), + MOCS_ENTRY(63, 0, L3_1_UC), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -372,7 +399,13 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_DG1(i915)) { + if (IS_XEHPSDV(i915)) { + table->size = ARRAY_SIZE(xehpsdv_mocs_table); + table->table = xehpsdv_mocs_table; + table->uc_index = 2; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->unused_entries_index = 5; + } else if (IS_DG1(i915)) { table->size = ARRAY_SIZE(dg1_mocs_table); table->table = dg1_mocs_table; table->uc_index = 1; From e935405102783219b883b1e50539908f21463e9a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Fri, 3 Sep 2021 17:35:44 -0700 Subject: [PATCH 85/85] drm/i915/dg2: Define MOCS table for DG2 Bspec: 45101, 45427 Cc: Ramalingam C Signed-off-by: Matt Roper Reviewed-by: Matt Atwood Link: https://patchwork.freedesktop.org/patch/msgid/20210904003544.2422282-3-matthew.d.roper@intel.com --- drivers/gpu/drm/i915/gt/intel_mocs.c | 37 +++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index 8c3fbf4f323b..e4b97cd14cf9 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -370,6 +370,30 @@ static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = { MOCS_ENTRY(63, 0, L3_1_UC), }; +static const struct drm_i915_mocs_entry dg2_mocs_table[] = { + /* UC - Coherent; GO:L3 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + +static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { + /* Wa_14011441408: Set Go to Memory for MOCS#0 */ + MOCS_ENTRY(0, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Coherent; GO:Memory */ + MOCS_ENTRY(1, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)), + /* UC - Non-Coherent; GO:Memory */ + MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1)), + + /* WB - LC */ + MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -399,7 +423,18 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_XEHPSDV(i915)) { + if (IS_DG2(i915)) { + if (IS_DG2_GT_STEP(i915, G10, STEP_A0, STEP_B0)) { + table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); + table->table = dg2_mocs_table_g10_ax; + } else { + table->size = ARRAY_SIZE(dg2_mocs_table); + table->table = dg2_mocs_table; + } + table->uc_index = 1; + table->n_entries = GEN9_NUM_MOCS_ENTRIES; + table->unused_entries_index = 3; + } else if (IS_XEHPSDV(i915)) { table->size = ARRAY_SIZE(xehpsdv_mocs_table); table->table = xehpsdv_mocs_table; table->uc_index = 2;