From 76ef74d4a379faa451003621a84e3498044e7aa3 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 11 Jan 2023 17:07:33 +0100 Subject: [PATCH 001/177] netfilter: nft_payload: incorrect arithmetics when fetching VLAN header bits commit 696e1a48b1a1b01edad542a1ef293665864a4dd0 upstream. If the offset + length goes over the ethernet + vlan header, then the length is adjusted to copy the bytes that are within the boundaries of the vlan_ethhdr scratchpad area. The remaining bytes beyond ethernet + vlan header are copied directly from the skbuff data area. Fix incorrect arithmetic operator: subtract, not add, the size of the vlan header in case of double-tagged packets to adjust the length accordingly to address CVE-2023-0179. Reported-by: Davide Ornaghi Fixes: f6ae9f120dad ("netfilter: nft_payload: add C-VLAN support") Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nft_payload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 4edd899aeb9b..d7de2ecb287e 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -62,7 +62,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len) return false; if (offset + len > VLAN_ETH_HLEN + vlan_hlen) - ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen; + ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen; memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen); From 303a04b6562ec7c96eb91a6648a7e7f7e3e10d41 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Jan 2023 16:09:44 +0100 Subject: [PATCH 002/177] Revert "ALSA: usb-audio: Drop superfluous interface setup at parsing" commit 16f1f838442dc6430d32d51ddda347b8421ec34b upstream. This reverts commit ac5e2fb425e1121ceef2b9d1b3ffccc195d55707. The commit caused a regression on Behringer UMC404HD (and likely others). As the change was meant only as a minor optimization, it's better to revert it to address the regression. Reported-and-tested-by: Michael Ralston Cc: Link: https://lore.kernel.org/r/CAC2975JXkS1A5Tj9b02G_sy25ZWN-ys+tc9wmkoS=qPgKCogSg@mail.gmail.com Link: https://lore.kernel.org/r/20230104150944.24918-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/stream.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index f75601ca2d52..f10f4e6d3fb8 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -1222,6 +1222,12 @@ static int __snd_usb_parse_audio_interface(struct snd_usb_audio *chip, if (err < 0) return err; } + + /* try to set the interface... */ + usb_set_interface(chip->dev, iface_no, 0); + snd_usb_init_pitch(chip, fp); + snd_usb_init_sample_rate(chip, fp, fp->rate_max); + usb_set_interface(chip->dev, iface_no, altno); } return 0; } From e59e115d67d66d690904708bfef80c400c16a28b Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 9 Jan 2023 16:12:49 +0100 Subject: [PATCH 003/177] ALSA: control-led: use strscpy in set_led_id() commit 70051cffb31b5ee09096351c3b41fcae6f89de31 upstream. The use of strncpy() in the set_led_id() was incorrect. The len variable should use 'min(sizeof(buf2) - 1, count)' expression. Use strscpy() function to simplify things and handle the error gracefully. Fixes: a135dfb5de15 ("ALSA: led control - add sysfs kcontrol LED marking layer") Reported-by: yang.yang29@zte.com.cn Link: https://lore.kernel.org/alsa-devel/202301091945513559977@zte.com.cn/ Cc: Signed-off-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/core/control_led.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sound/core/control_led.c b/sound/core/control_led.c index f975cc85772b..3cadd40100f3 100644 --- a/sound/core/control_led.c +++ b/sound/core/control_led.c @@ -530,12 +530,11 @@ static ssize_t set_led_id(struct snd_ctl_led_card *led_card, const char *buf, si bool attach) { char buf2[256], *s, *os; - size_t len = max(sizeof(s) - 1, count); struct snd_ctl_elem_id id; int err; - strncpy(buf2, buf, len); - buf2[len] = '\0'; + if (strscpy(buf2, buf, sizeof(buf2)) < 0) + return -E2BIG; memset(&id, 0, sizeof(id)); id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; s = buf2; From e1e0a181aea375edfae2f9a59070f95d904980d1 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 9 Jan 2023 15:11:33 +0100 Subject: [PATCH 004/177] ALSA: usb-audio: Always initialize fixed_rate in snd_usb_find_implicit_fb_sync_format() commit 291e9da91403e0e628d7692b5ed505100e7b7706 upstream. Handle the fallback code path, too. Fixes: fd28941cff1c ("ALSA: usb-audio: Add new quirk FIXED_RATE for JBL Quantum810 Wireless") BugLink: https://lore.kernel.org/alsa-devel/Y7frf3N%2FxzvESEsN@kili/ Reported-by: Dan Carpenter Cc: Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230109141133.335543-1-perex@perex.cz Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/implicit.c | 3 ++- sound/usb/pcm.c | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 41ac7185b42b..4727043fd745 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -471,7 +471,7 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, subs = find_matching_substream(chip, stream, target->sync_ep, target->fmt_type); if (!subs) - return sync_fmt; + goto end; high_score = 0; list_for_each_entry(fp, &subs->fmt_list, list) { @@ -485,6 +485,7 @@ snd_usb_find_implicit_fb_sync_format(struct snd_usb_audio *chip, } } + end: if (fixed_rate) *fixed_rate = snd_usb_pcm_has_fixed_rate(subs); return sync_fmt; diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 99a66d0ef5b2..d2c652aa1385 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -163,6 +163,8 @@ bool snd_usb_pcm_has_fixed_rate(struct snd_usb_substream *subs) struct snd_usb_audio *chip = subs->stream->chip; int rate = -1; + if (!subs) + return false; if (!(chip->quirk_flags & QUIRK_FLAG_FIXED_RATE)) return false; list_for_each_entry(fp, &subs->fmt_list, list) { From 779c7c7ea8e011f8ba404dbc73964ce75ef421ba Mon Sep 17 00:00:00 2001 From: Yuchi Yang Date: Fri, 30 Dec 2022 15:22:25 +0800 Subject: [PATCH 005/177] ALSA: hda/realtek - Turn on power early commit 1f680609bf1beac20e2a31ddcb1b88874123c39f upstream. Turn on power early to avoid wrong state for power relation register. This can earlier update JD state when resume back. Signed-off-by: Yuchi Yang Cc: Link: https://lore.kernel.org/r/e35d8f4fa18f4448a2315cc7d4a3715f@realtek.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 764eb07bbaff..8362eb4642d8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3564,6 +3564,15 @@ static void alc256_init(struct hda_codec *codec) hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp_pin_sense; + if (spec->ultra_low_power) { + alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1); + alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2); + alc_update_coef_idx(codec, 0x08, 7<<4, 0); + alc_update_coef_idx(codec, 0x3b, 1<<15, 0); + alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); + msleep(30); + } + if (!hp_pin) hp_pin = 0x21; @@ -3575,14 +3584,6 @@ static void alc256_init(struct hda_codec *codec) msleep(2); alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ - if (spec->ultra_low_power) { - alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1); - alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2); - alc_update_coef_idx(codec, 0x08, 7<<4, 0); - alc_update_coef_idx(codec, 0x3b, 1<<15, 0); - alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); - msleep(30); - } snd_hda_codec_write(codec, hp_pin, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); @@ -3713,6 +3714,13 @@ static void alc225_init(struct hda_codec *codec) hda_nid_t hp_pin = alc_get_hp_pin(spec); bool hp1_pin_sense, hp2_pin_sense; + if (spec->ultra_low_power) { + alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2); + alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); + alc_update_coef_idx(codec, 0x33, 1<<11, 0); + msleep(30); + } + if (spec->codec_variant != ALC269_TYPE_ALC287 && spec->codec_variant != ALC269_TYPE_ALC245) /* required only at boot or S3 and S4 resume time */ @@ -3734,12 +3742,6 @@ static void alc225_init(struct hda_codec *codec) msleep(2); alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ - if (spec->ultra_low_power) { - alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2); - alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); - alc_update_coef_idx(codec, 0x33, 1<<11, 0); - msleep(30); - } if (hp1_pin_sense || spec->ultra_low_power) snd_hda_codec_write(codec, hp_pin, 0, From 82972d60f46f3241e3aa04a12c05dca0c2a1e166 Mon Sep 17 00:00:00 2001 From: Luka Guzenko Date: Tue, 10 Jan 2023 21:25:14 +0100 Subject: [PATCH 006/177] ALSA: hda/realtek: Enable mute/micmute LEDs on HP Spectre x360 13-aw0xxx commit ca88eeb308a221c2dcd4a64031d2e5fcd3db9eaa upstream. The HP Spectre x360 13-aw0xxx devices use the ALC285 codec with GPIO 0x04 controlling the micmute LED and COEF 0x0b index 8 controlling the mute LED. A quirk was added to make these work as well as a fixup. Signed-off-by: Luka Guzenko Cc: Link: https://lore.kernel.org/r/20230110202514.2792-1-l.guzenko@web.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8362eb4642d8..6fab7c8fc19a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4646,6 +4646,16 @@ static void alc285_fixup_hp_coef_micmute_led(struct hda_codec *codec, } } +static void alc285_fixup_hp_gpio_micmute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->micmute_led_polarity = 1; + alc_fixup_hp_gpio_led(codec, action, 0, 0x04); +} + static void alc236_fixup_hp_coef_micmute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -4667,6 +4677,13 @@ static void alc285_fixup_hp_mute_led(struct hda_codec *codec, alc285_fixup_hp_coef_micmute_led(codec, fix, action); } +static void alc285_fixup_hp_spectre_x360_mute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + alc285_fixup_hp_mute_led_coefbit(codec, fix, action); + alc285_fixup_hp_gpio_micmute_led(codec, fix, action); +} + static void alc236_fixup_hp_mute_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -7108,6 +7125,7 @@ enum { ALC285_FIXUP_ASUS_G533Z_PINS, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, + ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED, ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, @@ -8488,6 +8506,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_mute_led, }, + [ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_spectre_x360_mute_led, + }, [ALC236_FIXUP_HP_GPIO_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_gpio_led, @@ -9330,6 +9352,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x86f9, "HP Spectre x360 13-aw0xxx", ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), From d778e68faa6a8f42a0bf19c1d9dd9d19b21d5c0a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 22 Oct 2022 04:17:53 -0400 Subject: [PATCH 007/177] KVM: x86: Do not return host topology information from KVM_GET_SUPPORTED_CPUID commit 45e966fcca03ecdcccac7cb236e16eea38cc18af upstream. Passing the host topology to the guest is almost certainly wrong and will confuse the scheduler. In addition, several fields of these CPUID leaves vary on each processor; it is simply impossible to return the right values from KVM_GET_SUPPORTED_CPUID in such a way that they can be passed to KVM_SET_CPUID2. The values that will most likely prevent confusion are all zeroes. Userspace will have to override it anyway if it wishes to present a specific topology to the guest. Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- Documentation/virt/kvm/api.rst | 14 ++++++++++++++ arch/x86/kvm/cpuid.c | 34 +++++++++++++++++----------------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 896914e3a847..b8ec88ef2efa 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8248,6 +8248,20 @@ CPU[EAX=1]:ECX[24] (TSC_DEADLINE) is not reported by ``KVM_GET_SUPPORTED_CPUID`` It can be enabled if ``KVM_CAP_TSC_DEADLINE_TIMER`` is present and the kernel has enabled in-kernel emulation of the local APIC. +CPU topology +~~~~~~~~~~~~ + +Several CPUID values include topology information for the host CPU: +0x0b and 0x1f for Intel systems, 0x8000001e for AMD systems. Different +versions of KVM return different values for this information and userspace +should not rely on it. Currently they return all zeroes. + +If userspace wishes to set up a guest topology, it should be careful that +the values of these three leaves differ for each CPU. In particular, +the APIC ID is found in EDX for all subleaves of 0x0b and 0x1f, and in EAX +for 0x8000001e; the latter also encodes the core id and node id in bits +7:0 of EBX and ECX respectively. + Obsolete ioctls and capabilities ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 62bc7a01cecc..6047dbe04880 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -759,15 +759,21 @@ struct kvm_cpuid_array { int nent; }; -static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, - u32 function, u32 index) +static struct kvm_cpuid_entry2 *get_next_cpuid(struct kvm_cpuid_array *array) { - struct kvm_cpuid_entry2 *entry; - if (array->nent >= array->maxnent) return NULL; - entry = &array->entries[array->nent++]; + return &array->entries[array->nent++]; +} + +static struct kvm_cpuid_entry2 *do_host_cpuid(struct kvm_cpuid_array *array, + u32 function, u32 index) +{ + struct kvm_cpuid_entry2 *entry = get_next_cpuid(array); + + if (!entry) + return NULL; memset(entry, 0, sizeof(*entry)); entry->function = function; @@ -945,22 +951,13 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->edx = edx.full; break; } - /* - * Per Intel's SDM, the 0x1f is a superset of 0xb, - * thus they can be handled by common code. - */ case 0x1f: case 0xb: /* - * Populate entries until the level type (ECX[15:8]) of the - * previous entry is zero. Note, CPUID EAX.{0x1f,0xb}.0 is - * the starting entry, filled by the primary do_host_cpuid(). + * No topology; a valid topology is indicated by the presence + * of subleaf 1. */ - for (i = 1; entry->ecx & 0xff00; ++i) { - entry = do_host_cpuid(array, function, i); - if (!entry) - goto out; - } + entry->eax = entry->ebx = entry->ecx = 0; break; case 0xd: { u64 permitted_xcr0 = kvm_caps.supported_xcr0 & xstate_get_guest_group_perm(); @@ -1193,6 +1190,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) entry->ebx = entry->ecx = entry->edx = 0; break; case 0x8000001e: + /* Do not return host topology information. */ + entry->eax = entry->ebx = entry->ecx = 0; + entry->edx = 0; /* reserved */ break; case 0x8000001F: if (!kvm_cpu_cap_has(X86_FEATURE_SEV)) { From 9a1195c584321d49dd11aad90ce855994b2f4455 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 20 Dec 2022 14:03:52 +0000 Subject: [PATCH 008/177] KVM: arm64: Fix S1PTW handling on RO memslots commit 406504c7b0405d74d74c15a667cd4c4620c3e7a9 upstream. A recent development on the EFI front has resulted in guests having their page tables baked in the firmware binary, and mapped into the IPA space as part of a read-only memslot. Not only is this legitimate, but it also results in added security, so thumbs up. It is possible to take an S1PTW translation fault if the S1 PTs are unmapped at stage-2. However, KVM unconditionally treats S1PTW as a write to correctly handle hardware AF/DB updates to the S1 PTs. Furthermore, KVM injects an exception into the guest for S1PTW writes. In the aforementioned case this results in the guest taking an abort it won't recover from, as the S1 PTs mapping the vectors suffer from the same problem. So clearly our handling is... wrong. Instead, switch to a two-pronged approach: - On S1PTW translation fault, handle the fault as a read - On S1PTW permission fault, handle the fault as a write This is of no consequence to SW that *writes* to its PTs (the write will trigger a non-S1PTW fault), and SW that uses RO PTs will not use HW-assisted AF/DB anyway, as that'd be wrong. Only in the case described in c4ad98e4b72c ("KVM: arm64: Assume write fault on S1PTW permission fault on instruction fetch") do we end-up with two back-to-back faults (page being evicted and faulted back). I don't think this is a case worth optimising for. Fixes: c4ad98e4b72c ("KVM: arm64: Assume write fault on S1PTW permission fault on instruction fetch") Reviewed-by: Oliver Upton Reviewed-by: Ard Biesheuvel Regression-tested-by: Ard Biesheuvel Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/kvm_emulate.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 9bdba47f7e14..0d40c48d8132 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu) static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu) { - if (kvm_vcpu_abt_iss1tw(vcpu)) - return true; + if (kvm_vcpu_abt_iss1tw(vcpu)) { + /* + * Only a permission fault on a S1PTW should be + * considered as a write. Otherwise, page tables baked + * in a read-only memslot will result in an exception + * being delivered in the guest. + * + * The drawback is that we end-up faulting twice if the + * guest is using any of HW AF/DB: a translation fault + * to map the page containing the PT (read only at + * first), then a permission fault to allow the flags + * to be set. + */ + switch (kvm_vcpu_trap_get_fault_type(vcpu)) { + case ESR_ELx_FSC_PERM: + return true; + default: + return false; + } + } if (kvm_vcpu_trap_is_iabt(vcpu)) return false; From 5b66b27b9deee212e2404bf4da801b94f4a84589 Mon Sep 17 00:00:00 2001 From: Ding Hui Date: Tue, 27 Dec 2022 23:09:36 +0800 Subject: [PATCH 009/177] efi: fix userspace infinite retry read efivars after EFI runtime services page fault commit e006ac3003080177cf0b673441a4241f77aaecce upstream. After [1][2], if we catch exceptions due to EFI runtime service, we will clear EFI_RUNTIME_SERVICES bit to disable EFI runtime service, then the subsequent routine which invoke the EFI runtime service should fail. But the userspace cat efivars through /sys/firmware/efi/efivars/ will stuck and infinite loop calling read() due to efivarfs_file_read() return -EINTR. The -EINTR is converted from EFI_ABORTED by efi_status_to_err(), and is an improper return value in this situation, so let virt_efi_xxx() return EFI_DEVICE_ERROR and converted to -EIO to invoker. Cc: Fixes: 3425d934fc03 ("efi/x86: Handle page faults occurring while running EFI runtime services") Fixes: 23715a26c8d8 ("arm64: efi: Recover from synchronous exceptions occurring in firmware") Signed-off-by: Ding Hui Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/efi/runtime-wrappers.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index f3e54f6616f0..60075e0e4943 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -62,6 +62,7 @@ struct efi_runtime_work efi_rts_work; \ if (!efi_enabled(EFI_RUNTIME_SERVICES)) { \ pr_warn_once("EFI Runtime Services are disabled!\n"); \ + efi_rts_work.status = EFI_DEVICE_ERROR; \ goto exit; \ } \ \ From e2cee489f24feaf922cdffdbe56605337dfc8f1a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 9 Jan 2023 10:44:31 +0100 Subject: [PATCH 010/177] efi: tpm: Avoid READ_ONCE() for accessing the event log commit d3f450533bbcb6dd4d7d59cadc9b61b7321e4ac1 upstream. Nathan reports that recent kernels built with LTO will crash when doing EFI boot using Fedora's GRUB and SHIM. The culprit turns out to be a misaligned load from the TPM event log, which is annotated with READ_ONCE(), and under LTO, this gets translated into a LDAR instruction which does not tolerate misaligned accesses. Interestingly, this does not happen when booting the same kernel straight from the UEFI shell, and so the fact that the event log may appear misaligned in memory may be caused by a bug in GRUB or SHIM. However, using READ_ONCE() to access firmware tables is slightly unusual in any case, and here, we only need to ensure that 'event' is not dereferenced again after it gets unmapped, but this is already taken care of by the implicit barrier() semantics of the early_memunmap() call. Cc: Cc: Peter Jones Cc: Jarkko Sakkinen Cc: Matthew Garrett Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Link: https://github.com/ClangBuiltLinux/linux/issues/1782 Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- include/linux/tpm_eventlog.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 20c0ff54b7a0..7d68a5cc5881 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -198,8 +198,8 @@ static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *ev * The loop below will unmap these fields if the log is larger than * one page, so save them here for reference: */ - count = READ_ONCE(event->count); - event_type = READ_ONCE(event->event_type); + count = event->count; + event_type = event->event_type; /* Verify that it's the log header */ if (event_header->pcr_idx != 0 || From 78c8b66a697abed34c34a37eaa7fc3865b4f3ec4 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 4 Jan 2023 10:47:39 -0700 Subject: [PATCH 011/177] docs: Fix the docs build with Sphinx 6.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0283189e8f3d0917e2ac399688df85211f48447b upstream. Sphinx 6.0 removed the execfile_() function, which we use as part of the configuration process. They *did* warn us... Just open-code the functionality as is done in Sphinx itself. Tested (using SPHINX_CONF, since this code is only executed with an alternative config file) on various Sphinx versions from 2.5 through 6.0. Reported-by: Martin Liška Cc: stable@vger.kernel.org Signed-off-by: Jonathan Corbet Signed-off-by: Greg Kroah-Hartman --- Documentation/sphinx/load_config.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/sphinx/load_config.py b/Documentation/sphinx/load_config.py index eeb394b39e2c..8b416bfd75ac 100644 --- a/Documentation/sphinx/load_config.py +++ b/Documentation/sphinx/load_config.py @@ -3,7 +3,7 @@ import os import sys -from sphinx.util.pycompat import execfile_ +from sphinx.util.osutil import fs_encoding # ------------------------------------------------------------------------------ def loadConfig(namespace): @@ -48,7 +48,9 @@ def loadConfig(namespace): sys.stdout.write("load additional sphinx-config: %s\n" % config_file) config = namespace.copy() config['__file__'] = config_file - execfile_(config_file, config) + with open(config_file, 'rb') as f: + code = compile(f.read(), fs_encoding, 'exec') + exec(code, config) del config['__file__'] namespace.update(config) else: From 4ad6c063541665c407d17e1faf2fe4f04e947dcc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 Jan 2023 14:46:10 -0700 Subject: [PATCH 012/177] io_uring/poll: add hash if ready poll request can't complete inline commit febb985c06cb6f5fac63598c0bffd4fd823d110d upstream. If we don't, then we may lose access to it completely, leading to a request leak. This will eventually stall the ring exit process as well. Cc: stable@vger.kernel.org Fixes: 49f1c68e048f ("io_uring: optimise submission side poll_refs") Reported-and-tested-by: syzbot+6c95df01470a47fc3af4@syzkaller.appspotmail.com Link: https://lore.kernel.org/io-uring/0000000000009f829805f1ce87b2@google.com/ Suggested-by: Pavel Begunkov Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/poll.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index fded1445a803..df42fd8a6ab0 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -549,6 +549,14 @@ static bool io_poll_can_finish_inline(struct io_kiocb *req, return pt->owning || io_poll_get_ownership(req); } +static void io_poll_add_hash(struct io_kiocb *req) +{ + if (req->flags & REQ_F_HASH_LOCKED) + io_poll_req_insert_locked(req); + else + io_poll_req_insert(req); +} + /* * Returns 0 when it's handed over for polling. The caller owns the requests if * it returns non-zero, but otherwise should not touch it. Negative values @@ -607,18 +615,17 @@ static int __io_arm_poll_handler(struct io_kiocb *req, if (mask && ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { - if (!io_poll_can_finish_inline(req, ipt)) + if (!io_poll_can_finish_inline(req, ipt)) { + io_poll_add_hash(req); return 0; + } io_poll_remove_entries(req); ipt->result_mask = mask; /* no one else has access to the req, forget about the ref */ return 1; } - if (req->flags & REQ_F_HASH_LOCKED) - io_poll_req_insert_locked(req); - else - io_poll_req_insert(req); + io_poll_add_hash(req); if (mask && (poll->events & EPOLLET) && io_poll_can_finish_inline(req, ipt)) { From 87b3a402bbc535dfdd86f02944ee2aaa4c62597c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Dec 2022 18:12:49 +0000 Subject: [PATCH 013/177] arm64: mte: Fix double-freeing of the temporary tag storage during coredump commit 736eedc974eaafbf4360e0ea85fc892cea72a223 upstream. Commit 16decce22efa ("arm64: mte: Fix the stack frame size warning in mte_dump_tag_range()") moved the temporary tag storage array from the stack to slab but it also introduced an error in double freeing this object. Remove the in-loop freeing. Fixes: 16decce22efa ("arm64: mte: Fix the stack frame size warning in mte_dump_tag_range()") Cc: # 5.18.x Signed-off-by: Catalin Marinas Reported-by: Seth Jenkins Cc: Will Deacon Link: https://lore.kernel.org/r/20221222181251.1345752-2-catalin.marinas@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/elfcore.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 27ef7ad3ffd2..4e3f84799669 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -65,7 +65,6 @@ static int mte_dump_tag_range(struct coredump_params *cprm, mte_save_page_tags(page_address(page), tags); put_page(page); if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) { - mte_free_tag_storage(tags); ret = 0; break; } From f5731a7924866282efc133c2b2f9d2a97a3a3dd5 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Dec 2022 18:12:51 +0000 Subject: [PATCH 014/177] arm64: mte: Avoid the racy walk of the vma list during core dump commit 4f4c549feb4ecca95ae9abb88887b941d196f83a upstream. The MTE coredump code in arch/arm64/kernel/elfcore.c iterates over the vma list without the mmap_lock held. This can race with another process or userfaultfd concurrently modifying the vma list. Change the for_each_mte_vma macro and its callers to instead use the vma snapshot taken by dump_vma_snapshot() and stored in the cprm object. Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file") Cc: # 5.18.x Signed-off-by: Catalin Marinas Reported-by: Seth Jenkins Suggested-by: Seth Jenkins Cc: Will Deacon Link: https://lore.kernel.org/r/20221222181251.1345752-4-catalin.marinas@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/elfcore.c | 56 +++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 4e3f84799669..4077a625342b 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -8,28 +8,27 @@ #include #include -#define for_each_mte_vma(vmi, vma) \ +#define for_each_mte_vma(cprm, i, m) \ if (system_supports_mte()) \ - for_each_vma(vmi, vma) \ - if (vma->vm_flags & VM_MTE) + for (i = 0, m = cprm->vma_meta; \ + i < cprm->vma_count; \ + i++, m = cprm->vma_meta + i) \ + if (m->flags & VM_MTE) -static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) +static unsigned long mte_vma_tag_dump_size(struct core_vma_metadata *m) { - if (vma->vm_flags & VM_DONTDUMP) - return 0; - - return vma_pages(vma) * MTE_PAGE_TAG_STORAGE; + return (m->dump_size >> PAGE_SHIFT) * MTE_PAGE_TAG_STORAGE; } /* Derived from dump_user_range(); start/end must be page-aligned */ static int mte_dump_tag_range(struct coredump_params *cprm, - unsigned long start, unsigned long end) + unsigned long start, unsigned long len) { int ret = 1; unsigned long addr; void *tags = NULL; - for (addr = start; addr < end; addr += PAGE_SIZE) { + for (addr = start; addr < start + len; addr += PAGE_SIZE) { struct page *page = get_dump_page(addr); /* @@ -78,11 +77,11 @@ static int mte_dump_tag_range(struct coredump_params *cprm, Elf_Half elf_core_extra_phdrs(void) { - struct vm_area_struct *vma; + int i; + struct core_vma_metadata *m; int vma_count = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) + for_each_mte_vma(cprm, i, m) vma_count++; return vma_count; @@ -90,18 +89,18 @@ Elf_Half elf_core_extra_phdrs(void) int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) { - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); + int i; + struct core_vma_metadata *m; - for_each_mte_vma(vmi, vma) { + for_each_mte_vma(cprm, i, m) { struct elf_phdr phdr; phdr.p_type = PT_AARCH64_MEMTAG_MTE; phdr.p_offset = offset; - phdr.p_vaddr = vma->vm_start; + phdr.p_vaddr = m->start; phdr.p_paddr = 0; - phdr.p_filesz = mte_vma_tag_dump_size(vma); - phdr.p_memsz = vma->vm_end - vma->vm_start; + phdr.p_filesz = mte_vma_tag_dump_size(m); + phdr.p_memsz = m->end - m->start; offset += phdr.p_filesz; phdr.p_flags = 0; phdr.p_align = 0; @@ -115,26 +114,23 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) size_t elf_core_extra_data_size(void) { - struct vm_area_struct *vma; + int i; + struct core_vma_metadata *m; size_t data_size = 0; - VMA_ITERATOR(vmi, current->mm, 0); - for_each_mte_vma(vmi, vma) - data_size += mte_vma_tag_dump_size(vma); + for_each_mte_vma(cprm, i, m) + data_size += mte_vma_tag_dump_size(m); return data_size; } int elf_core_write_extra_data(struct coredump_params *cprm) { - struct vm_area_struct *vma; - VMA_ITERATOR(vmi, current->mm, 0); + int i; + struct core_vma_metadata *m; - for_each_mte_vma(vmi, vma) { - if (vma->vm_flags & VM_DONTDUMP) - continue; - - if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end)) + for_each_mte_vma(cprm, i, m) { + if (!mte_dump_tag_range(cprm, m->start, m->dump_size)) return 0; } From 8f282a84f31c9f70350ce809eda132fbe99cf5d5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 4 Jan 2023 15:16:26 +0000 Subject: [PATCH 015/177] arm64: cmpxchg_double*: hazard against entire exchange variable commit 031af50045ea97ed4386eb3751ca2c134d0fc911 upstream. The inline assembly for arm64's cmpxchg_double*() implementations use a +Q constraint to hazard against other accesses to the memory location being exchanged. However, the pointer passed to the constraint is a pointer to unsigned long, and thus the hazard only applies to the first 8 bytes of the location. GCC can take advantage of this, assuming that other portions of the location are unchanged, leading to a number of potential problems. This is similar to what we fixed back in commit: fee960bed5e857eb ("arm64: xchg: hazard against entire exchange variable") ... but we forgot to adjust cmpxchg_double*() similarly at the same time. The same problem applies, as demonstrated with the following test: | struct big { | u64 lo, hi; | } __aligned(128); | | unsigned long foo(struct big *b) | { | u64 hi_old, hi_new; | | hi_old = b->hi; | cmpxchg_double_local(&b->lo, &b->hi, 0x12, 0x34, 0x56, 0x78); | hi_new = b->hi; | | return hi_old ^ hi_new; | } ... which GCC 12.1.0 compiles as: | 0000000000000000 : | 0: d503233f paciasp | 4: aa0003e4 mov x4, x0 | 8: 1400000e b 40 | c: d2800240 mov x0, #0x12 // #18 | 10: d2800681 mov x1, #0x34 // #52 | 14: aa0003e5 mov x5, x0 | 18: aa0103e6 mov x6, x1 | 1c: d2800ac2 mov x2, #0x56 // #86 | 20: d2800f03 mov x3, #0x78 // #120 | 24: 48207c82 casp x0, x1, x2, x3, [x4] | 28: ca050000 eor x0, x0, x5 | 2c: ca060021 eor x1, x1, x6 | 30: aa010000 orr x0, x0, x1 | 34: d2800000 mov x0, #0x0 // #0 <--- BANG | 38: d50323bf autiasp | 3c: d65f03c0 ret | 40: d2800240 mov x0, #0x12 // #18 | 44: d2800681 mov x1, #0x34 // #52 | 48: d2800ac2 mov x2, #0x56 // #86 | 4c: d2800f03 mov x3, #0x78 // #120 | 50: f9800091 prfm pstl1strm, [x4] | 54: c87f1885 ldxp x5, x6, [x4] | 58: ca0000a5 eor x5, x5, x0 | 5c: ca0100c6 eor x6, x6, x1 | 60: aa0600a6 orr x6, x5, x6 | 64: b5000066 cbnz x6, 70 | 68: c8250c82 stxp w5, x2, x3, [x4] | 6c: 35ffff45 cbnz w5, 54 | 70: d2800000 mov x0, #0x0 // #0 <--- BANG | 74: d50323bf autiasp | 78: d65f03c0 ret Notice that at the lines with "BANG" comments, GCC has assumed that the higher 8 bytes are unchanged by the cmpxchg_double() call, and that `hi_old ^ hi_new` can be reduced to a constant zero, for both LSE and LL/SC versions of cmpxchg_double(). This patch fixes the issue by passing a pointer to __uint128_t into the +Q constraint, ensuring that the compiler hazards against the entire 16 bytes being modified. With this change, GCC 12.1.0 compiles the above test as: | 0000000000000000 : | 0: f9400407 ldr x7, [x0, #8] | 4: d503233f paciasp | 8: aa0003e4 mov x4, x0 | c: 1400000f b 48 | 10: d2800240 mov x0, #0x12 // #18 | 14: d2800681 mov x1, #0x34 // #52 | 18: aa0003e5 mov x5, x0 | 1c: aa0103e6 mov x6, x1 | 20: d2800ac2 mov x2, #0x56 // #86 | 24: d2800f03 mov x3, #0x78 // #120 | 28: 48207c82 casp x0, x1, x2, x3, [x4] | 2c: ca050000 eor x0, x0, x5 | 30: ca060021 eor x1, x1, x6 | 34: aa010000 orr x0, x0, x1 | 38: f9400480 ldr x0, [x4, #8] | 3c: d50323bf autiasp | 40: ca0000e0 eor x0, x7, x0 | 44: d65f03c0 ret | 48: d2800240 mov x0, #0x12 // #18 | 4c: d2800681 mov x1, #0x34 // #52 | 50: d2800ac2 mov x2, #0x56 // #86 | 54: d2800f03 mov x3, #0x78 // #120 | 58: f9800091 prfm pstl1strm, [x4] | 5c: c87f1885 ldxp x5, x6, [x4] | 60: ca0000a5 eor x5, x5, x0 | 64: ca0100c6 eor x6, x6, x1 | 68: aa0600a6 orr x6, x5, x6 | 6c: b5000066 cbnz x6, 78 | 70: c8250c82 stxp w5, x2, x3, [x4] | 74: 35ffff45 cbnz w5, 5c | 78: f9400480 ldr x0, [x4, #8] | 7c: d50323bf autiasp | 80: ca0000e0 eor x0, x7, x0 | 84: d65f03c0 ret ... sampling the high 8 bytes before and after the cmpxchg, and performing an EOR, as we'd expect. For backporting, I've tested this atop linux-4.9.y with GCC 5.5.0. Note that linux-4.9.y is oldest currently supported stable release, and mandates GCC 5.1+. Unfortunately I couldn't get a GCC 5.1 binary to run on my machines due to library incompatibilities. I've also used a standalone test to check that we can use a __uint128_t pointer in a +Q constraint at least as far back as GCC 4.8.5 and LLVM 3.9.1. Fixes: 5284e1b4bc8a ("arm64: xchg: Implement cmpxchg_double") Fixes: e9a4b795652f ("arm64: cmpxchg_dbl: patch in lse instructions when supported by the CPU") Reported-by: Boqun Feng Link: https://lore.kernel.org/lkml/Y6DEfQXymYVgL3oJ@boqun-archlinux/ Reported-by: Peter Zijlstra Link: https://lore.kernel.org/lkml/Y6GXoO4qmH9OIZ5Q@hirez.programming.kicks-ass.net/ Signed-off-by: Mark Rutland Cc: stable@vger.kernel.org Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Steve Capper Cc: Will Deacon Link: https://lore.kernel.org/r/20230104151626.3262137-1-mark.rutland@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/atomic_ll_sc.h | 2 +- arch/arm64/include/asm/atomic_lse.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 0890e4f568fb..cbb3d961123b 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -315,7 +315,7 @@ __ll_sc__cmpxchg_double##name(unsigned long old1, \ " cbnz %w0, 1b\n" \ " " #mb "\n" \ "2:" \ - : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \ + : "=&r" (tmp), "=&r" (ret), "+Q" (*(__uint128_t *)ptr) \ : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ : cl); \ \ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 52075e93de6c..a94d6dacc029 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -311,7 +311,7 @@ __lse__cmpxchg_double##name(unsigned long old1, \ " eor %[old2], %[old2], %[oldval2]\n" \ " orr %[old1], %[old1], %[old2]" \ : [old1] "+&r" (x0), [old2] "+&r" (x1), \ - [v] "+Q" (*(unsigned long *)ptr) \ + [v] "+Q" (*(__uint128_t *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ : cl); \ From 0bb6742bf81a09fb7c9c23a6f59dc0d3242eaac6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 10 Jan 2023 16:30:28 +0100 Subject: [PATCH 016/177] ACPI: Fix selecting wrong ACPI fwnode for the iGPU on some Dell laptops commit f64e4275ef7407d5c3eca20436519bbd1f796e40 upstream. The Dell Latitude E6430 both with and without the optional NVidia dGPU has a bug in its ACPI tables which is causing Linux to assign the wrong ACPI fwnode / companion to the pci_device for the i915 iGPU. Specifically under the PCI root bridge there are these 2 ACPI Device()s : Scope (_SB.PCI0) { Device (GFX0) { Name (_ADR, 0x00020000) // _ADR: Address } ... Device (VID) { Name (_ADR, 0x00020000) // _ADR: Address ... Method (_DOS, 1, NotSerialized) // _DOS: Disable Output Switching { VDP8 = Arg0 VDP1 (One, VDP8) } Method (_DOD, 0, NotSerialized) // _DOD: Display Output Devices { ... } ... } } The non-functional GFX0 ACPI device is a problem, because this gets returned as ACPI companion-device by acpi_find_child_device() for the iGPU. This is a long standing problem and the i915 driver does use the ACPI companion for some things, but works fine without it. However since commit 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()") acpi_get_pci_dev() relies on the physical-node pointer in the acpi_device and that is set on the wrong acpi_device because of the wrong acpi_find_child_device() return. This breaks the ACPI video code, leading to non working backlight control in some cases. Add a type.backlight flag, mark ACPI video bus devices with this and make find_child_checks() return a higher score for children with this flag set, so that it picks the right companion-device. Fixes: 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()") Co-developed-by: Rafael J. Wysocki Signed-off-by: Hans de Goede Cc: 6.1+ # 6.1+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/glue.c | 14 ++++++++++++-- drivers/acpi/scan.c | 7 +++++-- include/acpi/acpi_bus.h | 3 ++- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 204fe94c7e45..a194f30876c5 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -75,7 +75,8 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev) } #define FIND_CHILD_MIN_SCORE 1 -#define FIND_CHILD_MAX_SCORE 2 +#define FIND_CHILD_MID_SCORE 2 +#define FIND_CHILD_MAX_SCORE 3 static int match_any(struct acpi_device *adev, void *not_used) { @@ -96,8 +97,17 @@ static int find_child_checks(struct acpi_device *adev, bool check_children) return -ENODEV; status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta); - if (status == AE_NOT_FOUND) + if (status == AE_NOT_FOUND) { + /* + * Special case: backlight device objects without _STA are + * preferred to other objects with the same _ADR value, because + * it is more likely that they are actually useful. + */ + if (adev->pnp.type.backlight) + return FIND_CHILD_MID_SCORE; + return FIND_CHILD_MIN_SCORE; + } if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED)) return -ENODEV; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index b47e93a24a9a..dbfa58e799e2 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1370,9 +1370,12 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, * Some devices don't reliably have _HIDs & _CIDs, so add * synthetic HIDs to make sure drivers can find them. */ - if (acpi_is_video_device(handle)) + if (acpi_is_video_device(handle)) { acpi_add_id(pnp, ACPI_VIDEO_HID); - else if (acpi_bay_match(handle)) + pnp->type.backlight = 1; + break; + } + if (acpi_bay_match(handle)) acpi_add_id(pnp, ACPI_BAY_HID); else if (acpi_dock_match(handle)) acpi_add_id(pnp, ACPI_DOCK_HID); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c09d72986968..ab2d6266038a 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -230,7 +230,8 @@ struct acpi_pnp_type { u32 hardware_id:1; u32 bus_address:1; u32 platform_id:1; - u32 reserved:29; + u32 backlight:1; + u32 reserved:28; }; struct acpi_device_pnp { From 2da1ba6f56f66202769761cf9308c0b664431474 Mon Sep 17 00:00:00 2001 From: Noor Azura Ahmad Tarmizi Date: Wed, 11 Jan 2023 13:02:00 +0800 Subject: [PATCH 017/177] net: stmmac: add aux timestamps fifo clearance wait commit ae9dcb91c6069e20b3b9505d79cbc89fd6e086f5 upstream. Add timeout polling wait for auxiliary timestamps snapshot FIFO clear bit (ATSFC) to clear. This is to ensure no residue fifo value is being read erroneously. Fixes: f4da56529da6 ("net: stmmac: Add support for external trigger timestamping") Cc: # 5.10.x Signed-off-by: Noor Azura Ahmad Tarmizi Link: https://lore.kernel.org/r/20230111050200.2130-1-noor.azura.ahmad.tarmizi@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 4d11980dcd64..9c91a3dc8e38 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -219,7 +219,10 @@ static int stmmac_enable(struct ptp_clock_info *ptp, } writel(acr_value, ptpaddr + PTP_ACR); mutex_unlock(&priv->aux_ts_lock); - ret = 0; + /* wait for auxts fifo clear to finish */ + ret = readl_poll_timeout(ptpaddr + PTP_ACR, acr_value, + !(acr_value & PTP_ACR_ATSFC), + 10, 10000); break; default: From 90e585b6ab23d047e8c09716487a7de0d3f4b3d4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 10 Jan 2023 20:56:59 +0200 Subject: [PATCH 018/177] perf auxtrace: Fix address filter duplicate symbol selection commit cf129830ee820f7fc90b98df193cd49d49344d09 upstream. When a match has been made to the nth duplicate symbol, return success not error. Example: Before: $ cat file.c cat: file.c: No such file or directory $ cat file1.c #include static void func(void) { printf("First func\n"); } void other(void); int main() { func(); other(); return 0; } $ cat file2.c #include static void func(void) { printf("Second func\n"); } void other(void) { func(); } $ gcc -Wall -Wextra -o test file1.c file2.c $ perf record -e intel_pt//u --filter 'filter func @ ./test' -- ./test Multiple symbols with name 'func' #1 0x1149 l func which is near main #2 0x1179 l func which is near other Disambiguate symbol name by inserting #n after the name e.g. func #2 Or select a global symbol by inserting #0 or #g or #G Failed to parse address filter: 'filter func @ ./test' Filter format is: filter|start|stop|tracestop [/ ] [@] Where multiple filters are separated by space or comma. $ perf record -e intel_pt//u --filter 'filter func #2 @ ./test' -- ./test Failed to parse address filter: 'filter func #2 @ ./test' Filter format is: filter|start|stop|tracestop [/ ] [@] Where multiple filters are separated by space or comma. After: $ perf record -e intel_pt//u --filter 'filter func #2 @ ./test' -- ./test First func Second func [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.016 MB perf.data ] $ perf script --itrace=b -Ftime,flags,ip,sym,addr --ns 1231062.526977619: tr strt 0 [unknown] => 558495708179 func 1231062.526977619: tr end call 558495708188 func => 558495708050 _init 1231062.526979286: tr strt 0 [unknown] => 55849570818d func 1231062.526979286: tr end return 55849570818f func => 55849570819d other Fixes: 1b36c03e356936d6 ("perf record: Add support for using symbols in address filters") Reported-by: Dmitrii Dolgov <9erthalion6@gmail.com> Signed-off-by: Adrian Hunter Tested-by: Dmitry Dolgov <9erthalion6@gmail.com> Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20230110185659.15979-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/auxtrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 46ada5ec3f9a..47062f459ccd 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -2610,7 +2610,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, *size = sym->start - *start; if (idx > 0) { if (*size) - return 1; + return 0; } else if (dso_sym_match(sym, sym_name, &cnt, idx)) { print_duplicate_syms(dso, sym_name); return -EINVAL; From ab4b5a2154bc008db41c21fda63d1fc17d716271 Mon Sep 17 00:00:00 2001 From: Alexander Egorenkov Date: Mon, 14 Nov 2022 11:40:08 +0100 Subject: [PATCH 019/177] s390/kexec: fix ipl report address for kdump commit c2337a40e04dde1692b5b0a46ecc59f89aaba8a1 upstream. This commit addresses the following erroneous situation with file-based kdump executed on a system with a valid IPL report. On s390, a kdump kernel, its initrd and IPL report if present are loaded into a special and reserved on boot memory region - crashkernel. When a system crashes and kdump was activated before, the purgatory code is entered first which swaps the crashkernel and [0 - crashkernel size] memory regions. Only after that the kdump kernel is entered. For this reason, the pointer to an IPL report in lowcore must point to the IPL report after the swap and not to the address of the IPL report that was located in crashkernel memory region before the swap. Failing to do so, makes the kdump's decompressor try to read memory from the crashkernel memory region which already contains the production's kernel memory. The situation described above caused spontaneous kdump failures/hangs on systems where the Secure IPL is activated because on such systems an IPL report is always present. In that case kdump's decompressor tried to parse an IPL report which frequently lead to illegal memory accesses because an IPL report contains addresses to various data. Cc: Fixes: 99feaa717e55 ("s390/kexec_file: Create ipl report and pass to next kernel") Reviewed-by: Vasily Gorbik Signed-off-by: Alexander Egorenkov Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/machine_kexec_file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index fc6d5f58debe..2df94d32140c 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -187,8 +187,6 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->memsz = ALIGN(data->memsz, PAGE_SIZE); buf.mem = data->memsz; - if (image->type == KEXEC_TYPE_CRASH) - buf.mem += crashk_res.start; ptr = (void *)ipl_cert_list_addr; end = ptr + ipl_cert_list_size; @@ -225,6 +223,9 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; + if (image->type == KEXEC_TYPE_CRASH) + buf.mem += crashk_res.start; + ret = kexec_add_buffer(&buf); out: return ret; From 20fa7855af45e533cddab564203921e2a6e981ef Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Fri, 6 Jan 2023 15:19:05 +0200 Subject: [PATCH 020/177] brcmfmac: Prefer DT board type over DMI board type commit a5a36720c3f650f859f5e9535dd62d06f13f4f3b upstream. The introduction of support for Apple board types inadvertently changed the precedence order, causing hybrid SMBIOS+DT platforms to look up the firmware using the DMI information instead of the device tree compatible to generate the board type. Revert back to the old behavior, as affected platforms use firmwares named after the DT compatible. Fixes: 7682de8b3351 ("wifi: brcmfmac: of: Fetch Apple properties") [1] https://bugzilla.opensuse.org/show_bug.cgi?id=1206697#c13 Cc: stable@vger.kernel.org Signed-off-by: Ivan T. Ivanov Reviewed-by: Hector Martin Reviewed-by: Arend van Spriel Tested-by: Peter Robinson Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c index a83699de01ec..fdd0c9abc1a1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c @@ -79,7 +79,8 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, /* Apple ARM64 platforms have their own idea of board type, passed in * via the device tree. They also have an antenna SKU parameter */ - if (!of_property_read_string(np, "brcm,board-type", &prop)) + err = of_property_read_string(np, "brcm,board-type", &prop); + if (!err) settings->board_type = prop; if (!of_property_read_string(np, "apple,antenna-sku", &prop)) @@ -87,7 +88,7 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type, /* Set board-type to the first string of the machine compatible prop */ root = of_find_node_by_path("/"); - if (root && !settings->board_type) { + if (root && err) { char *board_type; const char *tmp; From 5fc5cdee8ef2db8e89451a4310be10c17fae1d4d Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Fri, 30 Dec 2022 22:15:45 -0800 Subject: [PATCH 021/177] ASoC: qcom: lpass-cpu: Fix fallback SD line index handling commit 000bca8d706d1bf7cca01af75787247c5a2fdedf upstream. These indices should reference the ID placed within the dai_driver array, not the indices of the array itself. This fixes commit 4ff028f6c108 ("ASoC: qcom: lpass-cpu: Make I2S SD lines configurable"), which among others, broke IPQ8064 audio (sound/soc/qcom/lpass-ipq806x.c) because it uses ID 4 but we'd stop initializing the mi2s_playback_sd_mode and mi2s_capture_sd_mode arrays at ID 0. Fixes: 4ff028f6c108 ("ASoC: qcom: lpass-cpu: Make I2S SD lines configurable") Cc: Signed-off-by: Brian Norris Reviewed-by: Stephan Gerhold Link: https://lore.kernel.org/r/20221231061545.2110253-1-computersforpeace@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/qcom/lpass-cpu.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 54353842dc07..dbdaaa85ce48 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -1037,10 +1037,11 @@ static void of_lpass_cpu_parse_dai_data(struct device *dev, struct lpass_data *data) { struct device_node *node; - int ret, id; + int ret, i, id; /* Allow all channels by default for backwards compatibility */ - for (id = 0; id < data->variant->num_dai; id++) { + for (i = 0; i < data->variant->num_dai; i++) { + id = data->variant->dai_driver[i].id; data->mi2s_playback_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH; data->mi2s_capture_sd_mode[id] = LPAIF_I2SCTL_MODE_8CH; } From 80a82f6eb3b75c8bb02ada8f7f55efca08b74eec Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 22 Dec 2022 18:12:50 +0000 Subject: [PATCH 022/177] elfcore: Add a cprm parameter to elf_core_extra_{phdrs,data_size} commit 19e183b54528f11fafeca60fc6d0821e29ff281e upstream. A subsequent fix for arm64 will use this parameter to parse the vma information from the snapshot created by dump_vma_snapshot() rather than traversing the vma list without the mmap_lock. Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file") Cc: # 5.18.x Signed-off-by: Catalin Marinas Reported-by: Seth Jenkins Suggested-by: Seth Jenkins Cc: Will Deacon Cc: Eric Biederman Cc: Kees Cook Link: https://lore.kernel.org/r/20221222181251.1345752-3-catalin.marinas@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/elfcore.c | 4 ++-- arch/ia64/kernel/elfcore.c | 4 ++-- arch/x86/um/elfcore.c | 4 ++-- fs/binfmt_elf.c | 4 ++-- fs/binfmt_elf_fdpic.c | 4 ++-- include/linux/elfcore.h | 8 ++++---- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 4077a625342b..662a61e5e75e 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -75,7 +75,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm, return ret; } -Elf_Half elf_core_extra_phdrs(void) +Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) { int i; struct core_vma_metadata *m; @@ -112,7 +112,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { int i; struct core_vma_metadata *m; diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c index 94680521fbf9..8895df121540 100644 --- a/arch/ia64/kernel/elfcore.c +++ b/arch/ia64/kernel/elfcore.c @@ -7,7 +7,7 @@ #include -Elf64_Half elf_core_extra_phdrs(void) +Elf64_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return GATE_EHDR->e_phnum; } @@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { const struct elf_phdr *const gate_phdrs = (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c index 48a3eb09d951..650cdbbdaf45 100644 --- a/arch/x86/um/elfcore.c +++ b/arch/x86/um/elfcore.c @@ -7,7 +7,7 @@ #include -Elf32_Half elf_core_extra_phdrs(void) +Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0; } @@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -size_t elf_core_extra_data_size(void) +size_t elf_core_extra_data_size(struct coredump_params *cprm) { if ( vsyscall_ehdr ) { const struct elfhdr *const ehdrp = diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6a11025e5850..444302afc673 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -2209,7 +2209,7 @@ static int elf_core_dump(struct coredump_params *cprm) * The number of segs are recored into ELF header as 16bit value. * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here. */ - segs = cprm->vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -2249,7 +2249,7 @@ static int elf_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); offset += cprm->vma_data_size; - offset += elf_core_extra_data_size(); + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9ce5e1f41c26..069f12cc7634 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1509,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) tmp->next = thread_list; thread_list = tmp; - segs = cprm->vma_count + elf_core_extra_phdrs(); + segs = cprm->vma_count + elf_core_extra_phdrs(cprm); /* for notes section */ segs++; @@ -1555,7 +1555,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm) dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE); offset += cprm->vma_data_size; - offset += elf_core_extra_data_size(); + offset += elf_core_extra_data_size(cprm); e_shoff = offset; if (e_phnum == PN_XNUM) { diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 346a8b56cdc8..79e26b18bf0e 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -114,14 +114,14 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg * Dumping its extra ELF program headers includes all the other information * a debugger needs to easily find how the gate DSO was being used. */ -extern Elf_Half elf_core_extra_phdrs(void); +extern Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm); extern int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset); extern int elf_core_write_extra_data(struct coredump_params *cprm); -extern size_t elf_core_extra_data_size(void); +extern size_t elf_core_extra_data_size(struct coredump_params *cprm); #else -static inline Elf_Half elf_core_extra_phdrs(void) +static inline Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm) { return 0; } @@ -136,7 +136,7 @@ static inline int elf_core_write_extra_data(struct coredump_params *cprm) return 1; } -static inline size_t elf_core_extra_data_size(void) +static inline size_t elf_core_extra_data_size(struct coredump_params *cprm) { return 0; } From ba518c5b6429ed7273e26bff318b92bd27857e4b Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Tue, 10 Jan 2023 23:10:29 +0800 Subject: [PATCH 023/177] cpufreq: amd-pstate: fix kernel hang issue while amd-pstate unregistering commit 4f3085f87b51a551a0647f218d4f324796ecb703 upstream. In the amd_pstate_adjust_perf(), there is one cpufreq_cpu_get() call to increase increments the kobject reference count of policy and make it as busy. Therefore, a corresponding call to cpufreq_cpu_put() is needed to decrement the kobject reference count back, it will resolve the kernel hang issue when unregistering the amd-pstate driver and register the `amd_pstate_epp` driver instance. Fixes: 1d215f0319 ("cpufreq: amd-pstate: Add fast switch function for AMD P-State") Acked-by: Huang Rui Reviewed-by: Mario Limonciello Tested-by: Wyes Karny Signed-off-by: Perry Yuan Cc: 5.17+ # 5.17+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/amd-pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 204e39006dda..c17bd845f5fc 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -307,6 +307,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, max_perf = min_perf; amd_pstate_update(cpudata, min_perf, des_perf, max_perf, true); + cpufreq_cpu_put(policy); } static int amd_get_min_freq(struct amd_cpudata *cpudata) From 45a584f139e881366b04072ad86a961673047792 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 5 Jan 2023 15:44:20 +0100 Subject: [PATCH 024/177] s390/cpum_sf: add READ_ONCE() semantics to compare and swap loops commit 82d3edb50a11bf3c5ef63294d5358ba230181413 upstream. The current cmpxchg_double() loops within the perf hw sampling code do not have READ_ONCE() semantics to read the old value from memory. This allows the compiler to generate code which reads the "old" value several times from memory, which again allows for inconsistencies. For example: /* Reset trailer (using compare-double-and-swap) */ do { te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; te_flags |= SDB_TE_ALERT_REQ_MASK; } while (!cmpxchg_double(&te->flags, &te->overflow, te->flags, te->overflow, te_flags, 0ULL)); The compiler could generate code where te->flags used within the cmpxchg_double() call may be refetched from memory and which is not necessarily identical to the previous read version which was used to generate te_flags. Which in turn means that an incorrect update could happen. Fix this by adding READ_ONCE() semantics to all cmpxchg_double() loops. Given that READ_ONCE() cannot generate code on s390 which atomically reads 16 bytes, use a private compare-and-swap-double implementation to achieve that. Also replace cmpxchg_double() with the private implementation to be able to re-use the old value within the loops. As a side effect this converts the whole code to only use bit fields to read and modify bits within the hws trailer header. Reported-by: Alexander Gordeev Acked-by: Alexander Gordeev Acked-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Link: https://lore.kernel.org/linux-s390/Y71QJBhNTIatvxUT@osiris/T/#ma14e2a5f7aa8ed4b94b6f9576799b3ad9c60f333 Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/cpu_mf.h | 31 +++++----- arch/s390/kernel/perf_cpum_sf.c | 101 ++++++++++++++++++++------------ 2 files changed, 77 insertions(+), 55 deletions(-) diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index feaba12dbecb..efa103b52a1a 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -131,19 +131,21 @@ struct hws_combined_entry { struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ } __packed; -struct hws_trailer_entry { - union { - struct { - unsigned int f:1; /* 0 - Block Full Indicator */ - unsigned int a:1; /* 1 - Alert request control */ - unsigned int t:1; /* 2 - Timestamp format */ - unsigned int :29; /* 3 - 31: Reserved */ - unsigned int bsdes:16; /* 32-47: size of basic SDE */ - unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ - }; - unsigned long long flags; /* 0 - 63: All indicators */ +union hws_trailer_header { + struct { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned int :29; /* 3 - 31: Reserved */ + unsigned int bsdes:16; /* 32-47: size of basic SDE */ + unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ + unsigned long long overflow; /* 64 - Overflow Count */ }; - unsigned long long overflow; /* 64 - sample Overflow count */ + __uint128_t val; +}; + +struct hws_trailer_entry { + union hws_trailer_header header; /* 0 - 15 Flags + Overflow Count */ unsigned char timestamp[16]; /* 16 - 31 timestamp */ unsigned long long reserved1; /* 32 -Reserved */ unsigned long long reserved2; /* */ @@ -290,14 +292,11 @@ static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, return USEC_PER_SEC * qsi->cpu_speed / rate; } -#define SDB_TE_ALERT_REQ_MASK 0x4000000000000000UL -#define SDB_TE_BUFFER_FULL_MASK 0x8000000000000000UL - /* Return TOD timestamp contained in an trailer entry */ static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) { /* TOD in STCKE format */ - if (te->t) + if (te->header.t) return *((unsigned long long *) &te->timestamp[1]); /* TOD in STCK format */ diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 332a49965130..ce886a03545a 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -163,14 +163,15 @@ static void free_sampling_buffer(struct sf_buffer *sfb) static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) { - unsigned long sdb, *trailer; + struct hws_trailer_entry *te; + unsigned long sdb; /* Allocate and initialize sample-data-block */ sdb = get_zeroed_page(gfp_flags); if (!sdb) return -ENOMEM; - trailer = trailer_entry_ptr(sdb); - *trailer = SDB_TE_ALERT_REQ_MASK; + te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + te->header.a = 1; /* Link SDB into the sample-data-block-table */ *sdbt = sdb; @@ -1206,7 +1207,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, "%s: Found unknown" " sampling data entry: te->f %i" " basic.def %#4x (%p)\n", __func__, - te->f, sample->def, sample); + te->header.f, sample->def, sample); /* Sample slot is not yet written or other record. * * This condition can occur if the buffer was reused @@ -1217,7 +1218,7 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, * that are not full. Stop processing if the first * invalid format was detected. */ - if (!te->f) + if (!te->header.f) break; } @@ -1227,6 +1228,16 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } } +static inline __uint128_t __cdsg(__uint128_t *ptr, __uint128_t old, __uint128_t new) +{ + asm volatile( + " cdsg %[old],%[new],%[ptr]\n" + : [old] "+d" (old), [ptr] "+QS" (*ptr) + : [new] "d" (new) + : "memory", "cc"); + return old; +} + /* hw_perf_event_update() - Process sampling buffer * @event: The perf event * @flush_all: Flag to also flush partially filled sample-data-blocks @@ -1243,10 +1254,11 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, */ static void hw_perf_event_update(struct perf_event *event, int flush_all) { + unsigned long long event_overflow, sampl_overflow, num_sdb; + union hws_trailer_header old, prev, new; struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; unsigned long *sdbt; - unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; /* @@ -1266,25 +1278,25 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); /* Leave loop if no more work to do (block full indicator) */ - if (!te->f) { + if (!te->header.f) { done = 1; if (!flush_all) break; } /* Check the sample overflow count */ - if (te->overflow) + if (te->header.overflow) /* Account sample overflows and, if a particular limit * is reached, extend the sampling buffer. * For details, see sfb_account_overflows(). */ - sampl_overflow += te->overflow; + sampl_overflow += te->header.overflow; /* Timestamps are valid for full sample-data-blocks only */ debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx " "overflow %llu timestamp %#llx\n", - __func__, (unsigned long)sdbt, te->overflow, - (te->f) ? trailer_timestamp(te) : 0ULL); + __func__, (unsigned long)sdbt, te->header.overflow, + (te->header.f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and * flag if an (perf) event overflow happened. If so, the PMU @@ -1294,12 +1306,16 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) num_sdb++; /* Reset trailer (using compare-double-and-swap) */ + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - te_flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; - te_flags |= SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - te->flags, te->overflow, - te_flags, 0ULL)); + old.val = prev.val; + new.val = prev.val; + new.f = 0; + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); /* Advance to next sample-data-block */ sdbt++; @@ -1384,7 +1400,7 @@ static void aux_output_end(struct perf_output_handle *handle) range_scan = AUX_SDB_NUM_ALERT(aux); for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - if (!(te->flags & SDB_TE_BUFFER_FULL_MASK)) + if (!te->header.f) break; } /* i is num of SDBs which are full */ @@ -1392,7 +1408,7 @@ static void aux_output_end(struct perf_output_handle *handle) /* Remove alert indicators in the buffer */ te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags &= ~SDB_TE_ALERT_REQ_MASK; + te->header.a = 0; debug_sprintf_event(sfdbg, 6, "%s: SDBs %ld range %ld head %ld\n", __func__, i, range_scan, aux->head); @@ -1437,9 +1453,9 @@ static int aux_output_begin(struct perf_output_handle *handle, idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); - te->flags &= ~(SDB_TE_BUFFER_FULL_MASK | - SDB_TE_ALERT_REQ_MASK); - te->overflow = 0; + te->header.f = 0; + te->header.a = 0; + te->header.overflow = 0; } /* Save the position of empty SDBs */ aux->empty_mark = aux->head + range - 1; @@ -1448,7 +1464,7 @@ static int aux_output_begin(struct perf_output_handle *handle, /* Set alert indicator */ aux->alert_mark = aux->head + range/2 - 1; te = aux_sdb_trailer(aux, aux->alert_mark); - te->flags = te->flags | SDB_TE_ALERT_REQ_MASK; + te->header.a = 1; /* Reset hardware buffer head */ head = AUX_SDB_INDEX(aux, aux->head); @@ -1475,14 +1491,17 @@ static int aux_output_begin(struct perf_output_handle *handle, static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; + union hws_trailer_header old, prev, new; struct hws_trailer_entry *te; te = aux_sdb_trailer(aux, alert_index); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - *overflow = orig_overflow = te->overflow; - if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { + old.val = prev.val; + new.val = prev.val; + *overflow = old.overflow; + if (old.f) { /* * SDB is already set by hardware. * Abort and try to set somewhere @@ -1490,10 +1509,10 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, */ return false; } - new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 1; + new.overflow = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); return true; } @@ -1522,8 +1541,9 @@ static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, unsigned long long *overflow) { - unsigned long long orig_overflow, orig_flags, new_flags; unsigned long i, range_scan, idx, idx_old; + union hws_trailer_header old, prev, new; + unsigned long long orig_overflow; struct hws_trailer_entry *te; debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld " @@ -1554,17 +1574,20 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, idx_old = idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); + /* READ_ONCE() 16 byte header */ + prev.val = __cdsg(&te->header.val, 0, 0); do { - orig_flags = te->flags; - orig_overflow = te->overflow; - new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK; + old.val = prev.val; + new.val = prev.val; + orig_overflow = old.overflow; + new.f = 0; + new.overflow = 0; if (idx == aux->alert_mark) - new_flags |= SDB_TE_ALERT_REQ_MASK; + new.a = 1; else - new_flags &= ~SDB_TE_ALERT_REQ_MASK; - } while (!cmpxchg_double(&te->flags, &te->overflow, - orig_flags, orig_overflow, - new_flags, 0ULL)); + new.a = 0; + prev.val = __cdsg(&te->header.val, old.val, new.val); + } while (prev.val != old.val); *overflow += orig_overflow; } From b6ac9ded42aeafb1d307ffc9da8e1965117e9060 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Jan 2023 11:51:20 +0100 Subject: [PATCH 025/177] s390/percpu: add READ_ONCE() to arch_this_cpu_to_op_simple() commit e3f360db08d55a14112bd27454e616a24296a8b0 upstream. Make sure that *ptr__ within arch_this_cpu_to_op_simple() is only dereferenced once by using READ_ONCE(). Otherwise the compiler could generate incorrect code. Cc: Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- arch/s390/include/asm/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index cb5fc0690435..081837b391e3 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -31,7 +31,7 @@ pcp_op_T__ *ptr__; \ preempt_disable_notrace(); \ ptr__ = raw_cpu_ptr(&(pcp)); \ - prev__ = *ptr__; \ + prev__ = READ_ONCE(*ptr__); \ do { \ old__ = prev__; \ new__ = old__ op (val); \ From adc48e5e408afbb01d261bd303fd9fbbbaa3e317 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Fri, 16 Dec 2022 15:33:55 -0800 Subject: [PATCH 026/177] drm/virtio: Fix GEM handle creation UAF commit 52531258318ed59a2dc5a43df2eaf0eb1d65438e upstream. Userspace can guess the handle value and try to race GEM object creation with handle close, resulting in a use-after-free if we dereference the object after dropping the handle's reference. For that reason, dropping the handle's reference must be done *after* we are done dereferencing the object. Signed-off-by: Rob Clark Reviewed-by: Chia-I Wu Fixes: 62fb7a5e1096 ("virtio-gpu: add 3d/virgl support") Cc: stable@vger.kernel.org Signed-off-by: Dmitry Osipenko Link: https://patchwork.freedesktop.org/patch/msgid/20221216233355.542197-2-robdclark@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/virtio/virtgpu_ioctl.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index 5d05093014ac..9f4a90493aea 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -358,10 +358,18 @@ static int virtio_gpu_resource_create_ioctl(struct drm_device *dev, void *data, drm_gem_object_release(obj); return ret; } - drm_gem_object_put(obj); rc->res_handle = qobj->hw_res_handle; /* similiar to a VM address */ rc->bo_handle = handle; + + /* + * The handle owns the reference now. But we must drop our + * remaining reference *after* we no longer need to dereference + * the obj. Otherwise userspace could guess the handle and + * race closing it from another thread. + */ + drm_gem_object_put(obj); + return 0; } @@ -723,11 +731,18 @@ static int virtio_gpu_resource_create_blob_ioctl(struct drm_device *dev, drm_gem_object_release(obj); return ret; } - drm_gem_object_put(obj); rc_blob->res_handle = bo->hw_res_handle; rc_blob->bo_handle = handle; + /* + * The handle owns the reference now. But we must drop our + * remaining reference *after* we no longer need to dereference + * the obj. Otherwise userspace could guess the handle and + * race closing it from another thread. + */ + drm_gem_object_put(obj); + return 0; } From b8e6fc445990dce368950bd9553b31f46b50285e Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Tue, 10 Jan 2023 11:33:44 +0800 Subject: [PATCH 027/177] drm/amd/pm/smu13: BACO is supported when it's in BACO state commit 972fb53d3605eb6cdf0d6ae9a52e910626a91ff7 upstream. This leverages the logic in smu11. No need to talk to SMU to check BACO enablement as it's in BACO state already. Signed-off-by: Guchun Chen Reviewed-by: Kenneth Feng Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0, 6.1 Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 8e4830a311bd..9b96902adad3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2249,6 +2249,10 @@ bool smu_v13_0_baco_is_support(struct smu_context *smu) !smu_baco->platform_support) return false; + /* return true if ASIC is in BACO state already */ + if (smu_v13_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER) + return true; + if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) && !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT)) return false; From 42526442fe3ed9c2487a2a475cb4a6f463ce2eaf Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Thu, 12 Jan 2023 04:00:27 -0800 Subject: [PATCH 028/177] drm: Optimize drm buddy top-down allocation method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 5640e81607152d7f2d2558227c0f6cb78b8f39cf upstream. We are observing performance drop in many usecases which include games, 3D benchmark applications,etc.. To solve this problem, We are strictly not allowing top down flag enabled allocations to steal the memory space from cpu visible region. The idea is, we are sorting each order list entries in ascending order and compare the last entry of each order list in the freelist and return the max block. This patch improves the 3D benchmark scores and solves fragmentation issues. All drm buddy selftests are verfied. drm_buddy: pass:6 fail:0 skip:0 total:6 Signed-off-by: Arunpravin Paneer Selvam Acked-by: Christian König Acked-by: Alex Deucher Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20230112120027.3072-1-Arunpravin.PaneerSelvam@amd.com Signed-off-by: Christian König CC: Cc: stable@vger.kernel.org # 5.18+ Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/drm_buddy.c | 81 ++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 11bb59399471..3d1f50f481cf 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -38,6 +38,25 @@ static void drm_block_free(struct drm_buddy *mm, kmem_cache_free(slab_blocks, block); } +static void list_insert_sorted(struct drm_buddy *mm, + struct drm_buddy_block *block) +{ + struct drm_buddy_block *node; + struct list_head *head; + + head = &mm->free_list[drm_buddy_block_order(block)]; + if (list_empty(head)) { + list_add(&block->link, head); + return; + } + + list_for_each_entry(node, head, link) + if (drm_buddy_block_offset(block) < drm_buddy_block_offset(node)) + break; + + __list_add(&block->link, node->link.prev, &node->link); +} + static void mark_allocated(struct drm_buddy_block *block) { block->header &= ~DRM_BUDDY_HEADER_STATE; @@ -52,8 +71,7 @@ static void mark_free(struct drm_buddy *mm, block->header &= ~DRM_BUDDY_HEADER_STATE; block->header |= DRM_BUDDY_FREE; - list_add(&block->link, - &mm->free_list[drm_buddy_block_order(block)]); + list_insert_sorted(mm, block); } static void mark_split(struct drm_buddy_block *block) @@ -387,20 +405,26 @@ err_undo: } static struct drm_buddy_block * -get_maxblock(struct list_head *head) +get_maxblock(struct drm_buddy *mm, unsigned int order) { struct drm_buddy_block *max_block = NULL, *node; + unsigned int i; - max_block = list_first_entry_or_null(head, - struct drm_buddy_block, - link); - if (!max_block) - return NULL; + for (i = order; i <= mm->max_order; ++i) { + if (!list_empty(&mm->free_list[i])) { + node = list_last_entry(&mm->free_list[i], + struct drm_buddy_block, + link); + if (!max_block) { + max_block = node; + continue; + } - list_for_each_entry(node, head, link) { - if (drm_buddy_block_offset(node) > - drm_buddy_block_offset(max_block)) - max_block = node; + if (drm_buddy_block_offset(node) > + drm_buddy_block_offset(max_block)) { + max_block = node; + } + } } return max_block; @@ -412,20 +436,23 @@ alloc_from_freelist(struct drm_buddy *mm, unsigned long flags) { struct drm_buddy_block *block = NULL; - unsigned int i; + unsigned int tmp; int err; - for (i = order; i <= mm->max_order; ++i) { - if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { - block = get_maxblock(&mm->free_list[i]); - if (block) - break; - } else { - block = list_first_entry_or_null(&mm->free_list[i], - struct drm_buddy_block, - link); - if (block) - break; + if (flags & DRM_BUDDY_TOPDOWN_ALLOCATION) { + block = get_maxblock(mm, order); + if (block) + /* Store the obtained block order */ + tmp = drm_buddy_block_order(block); + } else { + for (tmp = order; tmp <= mm->max_order; ++tmp) { + if (!list_empty(&mm->free_list[tmp])) { + block = list_last_entry(&mm->free_list[tmp], + struct drm_buddy_block, + link); + if (block) + break; + } } } @@ -434,18 +461,18 @@ alloc_from_freelist(struct drm_buddy *mm, BUG_ON(!drm_buddy_block_is_free(block)); - while (i != order) { + while (tmp != order) { err = split_block(mm, block); if (unlikely(err)) goto err_undo; block = block->right; - i--; + tmp--; } return block; err_undo: - if (i != order) + if (tmp != order) __drm_buddy_free(mm, block); return ERR_PTR(err); } From 4009502c091c1543ae8708a12d1a97583ae411ac Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 12 Dec 2022 17:13:38 +0100 Subject: [PATCH 029/177] drm/i915/gt: Reset twice commit d3de5616d36462a646f5b360ba82d3b09ff668eb upstream. After applying an engine reset, on some platforms like Jasperlake, we occasionally detect that the engine state is not cleared until shortly after the resume. As we try to resume the engine with volatile internal state, the first request fails with a spurious CS event (it looks like it reports a lite-restore to the hung context, instead of the expected idle->active context switch). Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Cc: Mika Kuoppala Signed-off-by: Andi Shyti Reviewed-by: Gwan-gyeong Mun Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi.shyti@linux.intel.com (cherry picked from commit 3db9d590557da3aa2c952f2fecd3e9b703dad790) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_reset.c | 34 ++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index b36674356986..10b930eaa8cb 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -278,6 +278,7 @@ out: static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) { struct intel_uncore *uncore = gt->uncore; + int loops = 2; int err; /* @@ -285,18 +286,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) * for fifo space for the write or forcewake the chip for * the read */ - intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); + do { + intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); - /* Wait for the device to ack the reset requests */ - err = __intel_wait_for_register_fw(uncore, - GEN6_GDRST, hw_domain_mask, 0, - 500, 0, - NULL); + /* + * Wait for the device to ack the reset requests. + * + * On some platforms, e.g. Jasperlake, we see that the + * engine register state is not cleared until shortly after + * GDRST reports completion, causing a failure as we try + * to immediately resume while the internal state is still + * in flux. If we immediately repeat the reset, the second + * reset appears to serialise with the first, and since + * it is a no-op, the registers should retain their reset + * value. However, there is still a concern that upon + * leaving the second reset, the internal engine state + * is still in flux and not ready for resuming. + */ + err = __intel_wait_for_register_fw(uncore, GEN6_GDRST, + hw_domain_mask, 0, + 2000, 0, + NULL); + } while (err == 0 && --loops); if (err) GT_TRACE(gt, "Wait for 0x%08x engines reset failed\n", hw_domain_mask); + /* + * As we have observed that the engine state is still volatile + * after GDRST is acked, impose a small delay to let everything settle. + */ + udelay(50); + return err; } From 8874730ecefe295931a681a0ae749cda53653078 Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 23 Dec 2022 10:20:11 +0100 Subject: [PATCH 030/177] drm/i915: Reserve enough fence slot for i915_vma_unbind_async MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 476fdcdaaae7b06c780cdfc234c704107f16c529 upstream. A nested dma_resv_reserve_fences(1) will not reserve slot from the 2nd call onwards and folowing dma_resv_add_fence() might hit the "BUG_ON(fobj->num_fences >= fobj->max_fences)" check. I915 hit above nested dma_resv case in ttm_bo_handle_move_mem() with async unbind: dma_resv_reserve_fences() from --> ttm_bo_handle_move_mem() dma_resv_reserve_fences() from --> i915_vma_unbind_async() dma_resv_add_fence() from --> i915_vma_unbind_async() dma_resv_add_fence() from -->ttm_bo_move_accel_cleanup() Resolve this by adding an extra fence in i915_vma_unbind_async(). Suggested-by: Thomas Hellström Fixes: 2f6b90da9192 ("drm/i915: Use vma resources for async unbinding") Cc: # v5.18+ Signed-off-by: Nirmoy Das Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20221223092011.11657-1-nirmoy.das@intel.com (cherry picked from commit 4f0755c2faf7388616109717facc5bbde6850e60) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/i915_vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4d06875de14a..c8ad8f37e5cf 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -2114,7 +2114,7 @@ int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) if (!obj->mm.rsgt) return -EBUSY; - err = dma_resv_reserve_fences(obj->base.resv, 1); + err = dma_resv_reserve_fences(obj->base.resv, 2); if (err) return -EBUSY; From b696c627b3f56e173f7f70b8487d66da8ff22506 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 3 Jan 2023 15:49:46 -0800 Subject: [PATCH 031/177] drm/i915: Fix potential context UAFs commit afce71ff6daa9c0f852df0727fe32c6fb107f0fa upstream. gem_context_register() makes the context visible to userspace, and which point a separate thread can trigger the I915_GEM_CONTEXT_DESTROY ioctl. So we need to ensure that nothing uses the ctx ptr after this. And we need to ensure that adding the ctx to the xarray is the *last* thing that gem_context_register() does with the ctx pointer. Signed-off-by: Rob Clark Fixes: eb4dedae920a ("drm/i915/gem: Delay tracking the GEM context until it is registered") Fixes: a4c1cdd34e2c ("drm/i915/gem: Delay context creation (v3)") Fixes: 49bd54b390c2 ("drm/i915: Track all user contexts per client") Cc: # v5.10+ Reviewed-by: Tvrtko Ursulin Reviewed-by: Andi Shyti [tursulin: Stable and fixes tags add/tidy.] Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20230103234948.1218393-1-robdclark@gmail.com (cherry picked from commit bed4b455cf5374e68879be56971c1da563bcd90c) Signed-off-by: Rodrigo Vivi Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 24 +++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 1e29b1e6d186..2353723ca1bd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1688,6 +1688,10 @@ void i915_gem_init__contexts(struct drm_i915_private *i915) init_contexts(&i915->gem.contexts); } +/* + * Note that this implicitly consumes the ctx reference, by placing + * the ctx in the context_xa. + */ static void gem_context_register(struct i915_gem_context *ctx, struct drm_i915_file_private *fpriv, u32 id) @@ -1703,10 +1707,6 @@ static void gem_context_register(struct i915_gem_context *ctx, snprintf(ctx->name, sizeof(ctx->name), "%s[%d]", current->comm, pid_nr(ctx->pid)); - /* And finally expose ourselves to userspace via the idr */ - old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); - WARN_ON(old); - spin_lock(&ctx->client->ctx_lock); list_add_tail_rcu(&ctx->client_link, &ctx->client->ctx_list); spin_unlock(&ctx->client->ctx_lock); @@ -1714,6 +1714,10 @@ static void gem_context_register(struct i915_gem_context *ctx, spin_lock(&i915->gem.contexts.lock); list_add_tail(&ctx->link, &i915->gem.contexts.list); spin_unlock(&i915->gem.contexts.lock); + + /* And finally expose ourselves to userspace via the idr */ + old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL); + WARN_ON(old); } int i915_gem_context_open(struct drm_i915_private *i915, @@ -2199,14 +2203,22 @@ finalize_create_context_locked(struct drm_i915_file_private *file_priv, if (IS_ERR(ctx)) return ctx; + /* + * One for the xarray and one for the caller. We need to grab + * the reference *prior* to making the ctx visble to userspace + * in gem_context_register(), as at any point after that + * userspace can try to race us with another thread destroying + * the context under our feet. + */ + i915_gem_context_get(ctx); + gem_context_register(ctx, file_priv, id); old = xa_erase(&file_priv->proto_context_xa, id); GEM_BUG_ON(old != pc); proto_context_close(file_priv->dev_priv, pc); - /* One for the xarray and one for the caller */ - return i915_gem_context_get(ctx); + return ctx; } struct i915_gem_context * From dcfeba477b3e3df526e0f543b58fa71c045dff8b Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 27 Dec 2022 15:49:17 -0600 Subject: [PATCH 032/177] drm/amd: Delay removal of the firmware framebuffer commit 1923bc5a56daeeabd7e9093bad2febcd6af2416a upstream. Removing the firmware framebuffer from the driver means that even if the driver doesn't support the IP blocks in a GPU it will no longer be functional after the driver fails to initialize. This change will ensure that unsupported IP blocks at least cause the driver to work with the EFI framebuffer. Cc: stable@vger.kernel.org Suggested-by: Alex Deucher Reviewed-by: Alex Deucher Reviewed-by: Lijo Lazar Signed-off-by: Mario Limonciello Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0be85d19a6f3..8f83d5b6ceaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -89,6 +90,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_MAX_RETRY_LIMIT 2 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) +static const struct drm_driver amdgpu_kms_driver; + const char *amdgpu_asic_name[] = { "TAHITI", "PITCAIRN", @@ -3677,6 +3680,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* Get rid of things like offb */ + r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); + if (r) + return r; + /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b59466972ed7..2e5d78b6635c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -23,7 +23,6 @@ */ #include -#include #include #include #include @@ -2123,11 +2122,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, } #endif - /* Get rid of things like offb */ - ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); - if (ret) - return ret; - adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev); if (IS_ERR(adev)) return PTR_ERR(adev); From 9196eb7c52e55749a332974f0081f77d53d60199 Mon Sep 17 00:00:00 2001 From: YiPeng Chai Date: Fri, 6 Jan 2023 14:04:15 +0800 Subject: [PATCH 033/177] drm/amdgpu: Fixed bug on error when unloading amdgpu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 99f1a36c90a7524972be5a028424c57fa17753ee upstream. Fixed bug on error when unloading amdgpu. The error message is as follows: [ 377.706202] kernel BUG at drivers/gpu/drm/drm_buddy.c:278! [ 377.706215] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 377.706222] CPU: 4 PID: 8610 Comm: modprobe Tainted: G IOE 6.0.0-thomas #1 [ 377.706231] Hardware name: ASUS System Product Name/PRIME Z390-A, BIOS 2004 11/02/2021 [ 377.706238] RIP: 0010:drm_buddy_free_block+0x26/0x30 [drm_buddy] [ 377.706264] Code: 00 00 00 90 0f 1f 44 00 00 48 8b 0e 89 c8 25 00 0c 00 00 3d 00 04 00 00 75 10 48 8b 47 18 48 d3 e0 48 01 47 28 e9 fa fe ff ff <0f> 0b 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 41 54 55 48 89 f5 53 [ 377.706282] RSP: 0018:ffffad2dc4683cb8 EFLAGS: 00010287 [ 377.706289] RAX: 0000000000000000 RBX: ffff8b1743bd5138 RCX: 0000000000000000 [ 377.706297] RDX: ffff8b1743bd5160 RSI: ffff8b1743bd5c78 RDI: ffff8b16d1b25f70 [ 377.706304] RBP: ffff8b1743bd59e0 R08: 0000000000000001 R09: 0000000000000001 [ 377.706311] R10: ffff8b16c8572400 R11: ffffad2dc4683cf0 R12: ffff8b16d1b25f70 [ 377.706318] R13: ffff8b16d1b25fd0 R14: ffff8b1743bd59c0 R15: ffff8b16d1b25f70 [ 377.706325] FS: 00007fec56c72c40(0000) GS:ffff8b1836500000(0000) knlGS:0000000000000000 [ 377.706334] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 377.706340] CR2: 00007f9b88c1ba50 CR3: 0000000110450004 CR4: 00000000003706e0 [ 377.706347] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 377.706354] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 377.706361] Call Trace: [ 377.706365] [ 377.706369] drm_buddy_free_list+0x2a/0x60 [drm_buddy] [ 377.706376] amdgpu_vram_mgr_fini+0xea/0x180 [amdgpu] [ 377.706572] amdgpu_ttm_fini+0x12e/0x1a0 [amdgpu] [ 377.706650] amdgpu_bo_fini+0x22/0x90 [amdgpu] [ 377.706727] gmc_v11_0_sw_fini+0x26/0x30 [amdgpu] [ 377.706821] amdgpu_device_fini_sw+0xa1/0x3c0 [amdgpu] [ 377.706897] amdgpu_driver_release_kms+0x12/0x30 [amdgpu] [ 377.706975] drm_dev_release+0x20/0x40 [drm] [ 377.707006] release_nodes+0x35/0xb0 [ 377.707014] devres_release_all+0x8b/0xc0 [ 377.707020] device_unbind_cleanup+0xe/0x70 [ 377.707027] device_release_driver_internal+0xee/0x160 [ 377.707033] driver_detach+0x44/0x90 [ 377.707039] bus_remove_driver+0x55/0xe0 [ 377.707045] pci_unregister_driver+0x3b/0x90 [ 377.707052] amdgpu_exit+0x11/0x6c [amdgpu] [ 377.707194] __x64_sys_delete_module+0x142/0x2b0 [ 377.707201] ? fpregs_assert_state_consistent+0x22/0x50 [ 377.707208] ? exit_to_user_mode_prepare+0x3e/0x190 [ 377.707215] do_syscall_64+0x38/0x90 [ 377.707221] entry_SYSCALL_64_after_hwframe+0x63/0xcd Signed-off-by: YiPeng Chai Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 80dd1343594c..75c80c557b6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -882,7 +882,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) kfree(rsv); list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->blocks); + drm_buddy_free_list(&mgr->mm, &rsv->allocated); kfree(rsv); } drm_buddy_fini(&mgr->mm); From b435f68e415ca5e17e3425893ea991720a43b595 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 16 Dec 2022 17:12:53 +0800 Subject: [PATCH 034/177] drm/amd/pm: correct the reference clock for fan speed(rpm) calculation commit 6fea87637bf36bd285227f490132e83582ab7513 upstream. Correct the reference clock as 25Mhz for SMU13 fan speed calculation. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x, 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 9b96902adad3..cfb7f4475c82 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -1258,7 +1258,8 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu, uint32_t speed) { struct amdgpu_device *adev = smu->adev; - uint32_t tach_period, crystal_clock_freq; + uint32_t crystal_clock_freq = 2500; + uint32_t tach_period; int ret; if (!speed) @@ -1268,7 +1269,6 @@ int smu_v13_0_set_fan_speed_rpm(struct smu_context *smu, if (ret) return ret; - crystal_clock_freq = amdgpu_asic_get_xclk(adev); tach_period = 60 * crystal_clock_freq * 10000 / (8 * speed); WREG32_SOC15(THM, 0, regCG_TACH_CTRL, REG_SET_FIELD(RREG32_SOC15(THM, 0, regCG_TACH_CTRL), From 363781f72d46c3762d7c0fd8fc13f27dce47f5cf Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 4 Jan 2023 10:45:01 +0800 Subject: [PATCH 035/177] drm/amd/pm: add the missing mapping for PPT feature on SMU13.0.0 and 13.0.7 commit 318ca20893c19ead02845a08204c3f9249bb74cd upstream. Then we are able to set a new ppt limit via the hwmon interface(power1_cap). Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x, 6.1.x Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 1 + drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index b8430601304f..be43de9dd496 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -189,6 +189,7 @@ static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(SOC_PCC), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, }; static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 222924363a68..31deec2ce4b3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -191,6 +191,7 @@ static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] = FEA_MAP(SOC_PCC), [SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, [SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT}, + [SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT}, }; static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = { From 69a75087cd1f10d5be07527f7ed7ed38db429d7e Mon Sep 17 00:00:00 2001 From: Ao Zhong Date: Tue, 25 Oct 2022 23:17:49 +0200 Subject: [PATCH 036/177] drm/amd/display: move remaining FPU code to dml folder commit 58ddbecb14c792b7fe0d92ae5e25c9179d62ff25 upstream. pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; these two operations in dcn32/dcn32_resource.c still need to use FPU, This will cause compilation to fail on ARM64 platforms because -mgeneral-regs-only is enabled by default to disable the hardware FPU. Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function in dcn32_fpu.c, and move above two operations into this function. Reviewed-by: Rodrigo Siqueira Signed-off-by: Ao Zhong Signed-off-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++-- drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++++++ drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c index 33ab6fdc3617..9919c39f7ea0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c @@ -1919,8 +1919,9 @@ int dcn32_populate_dml_pipes_from_context( timing = &pipe->stream->timing; pipes[pipe_cnt].pipe.src.gpuvm = true; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; + DC_FP_START(); + dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt); + DC_FP_END(); pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch; pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c index d1bf49d207de..d90216d2fe3a 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c @@ -2546,3 +2546,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa } } +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + dc_assert_fp_enabled(); + + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h index 3a3dc2ce4c73..ab010e7e840b 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc, void dcn32_patch_dpm_table(struct clk_bw_params *bw_params); +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes, + int pipe_cnt); + #endif From f905e03c8ff65d80c24f42d8b93df3cec5a7ab12 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 13 Jan 2023 14:03:02 -0500 Subject: [PATCH 037/177] Revert "drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly"" This reverts commit 9ccd11718d76b95c69aa773f2abedef560776037 The original commit 16fb4dca95daa ("drm/amdgpu: getting fan speed pwm for vega10 properly") was reverted in commit 4545ae2ed3f2 ("drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly""). but the test that resulted in the revert was wrong and was fixed so the revert was reverted in commit 30b8e7b8ee3b ("Revert "drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly"""). That should have been the end of it, but then Sasha picked up the original revert again and it was committed as 9ccd11718d76. So drop that commit so we get back to where we need to be. Signed-off-by: Alex Deucher Cc: Sasha Levin Cc: stable@vger.kernel.org # 6.1.x Cc: Yury Zhuravlev Cc: Guchun Chen Cc: Asher Song Signed-off-by: Greg Kroah-Hartman --- .../amd/pm/powerplay/hwmgr/vega10_thermal.c | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c index dad3e3741a4e..190af79f3236 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c @@ -67,22 +67,21 @@ int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr, int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr, uint32_t *speed) { - uint32_t current_rpm; - uint32_t percent = 0; + struct amdgpu_device *adev = hwmgr->adev; + uint32_t duty100, duty; + uint64_t tmp64; - if (hwmgr->thermal_controller.fanInfo.bNoFan) - return 0; + duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1), + CG_FDO_CTRL1, FMAX_DUTY100); + duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS), + CG_THERMAL_STATUS, FDO_PWM_DUTY); - if (vega10_get_current_rpm(hwmgr, ¤t_rpm)) - return -1; + if (!duty100) + return -EINVAL; - if (hwmgr->thermal_controller. - advanceFanControlParameters.usMaxFanRPM != 0) - percent = current_rpm * 255 / - hwmgr->thermal_controller. - advanceFanControlParameters.usMaxFanRPM; - - *speed = MIN(percent, 255); + tmp64 = (uint64_t)duty * 255; + do_div(tmp64, duty100); + *speed = MIN((uint32_t)tmp64, 255); return 0; } From d4b7ff9edebc6f7498873493e5b50f50a2ba6d49 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Wed, 11 Jan 2023 12:37:58 +0100 Subject: [PATCH 038/177] cifs: Fix uninitialized memory read for smb311 posix symlink create commit a152d05ae4a71d802d50cf9177dba34e8bb09f68 upstream. If smb311 posix is enabled, we send the intended mode for file creation in the posix create context. Instead of using what's there on the stack, create the mfsymlink file with 0644. Fixes: ce558b0e17f8a ("smb3: Add posix create context for smb3.11 posix mounts") Cc: stable@vger.kernel.org Signed-off-by: Volker Lendecke Reviewed-by: Tom Talpey Reviewed-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/link.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/cifs/link.c b/fs/cifs/link.c index bd374feeccaa..a5a097a69983 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -428,6 +428,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.disposition = FILE_CREATE; oparms.fid = &fid; oparms.reconnect = false; + oparms.mode = 0644; rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL, NULL); From 5a574327d1c5f7036af84ebe99cddba88f7d2038 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 6 Jan 2023 20:28:30 -0300 Subject: [PATCH 039/177] cifs: fix file info setting in cifs_query_path_info() commit 29cf28235e3e57e0af01ae29db57a75f87a2ada8 upstream. We missed to set file info when CIFSSMBQPathInfo() returned 0, thus leaving cifs_open_info_data::fi unset. Fix this by setting cifs_open_info_data::fi when either CIFSSMBQPathInfo() or SMBQueryInformation() succeed. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216881 Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb1ops.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 50480751e521..5fe2c2f8ef41 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -562,17 +562,20 @@ static int cifs_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, if ((rc == -EOPNOTSUPP) || (rc == -EINVAL)) { rc = SMBQueryInformation(xid, tcon, full_path, &fi, cifs_sb->local_nls, cifs_remap(cifs_sb)); - if (!rc) - move_cifs_info_to_smb2(&data->fi, &fi); *adjustTZ = true; } - if (!rc && (le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) { + if (!rc) { int tmprc; int oplock = 0; struct cifs_fid fid; struct cifs_open_parms oparms; + move_cifs_info_to_smb2(&data->fi, &fi); + + if (!(le32_to_cpu(fi.Attributes) & ATTR_REPARSE)) + return 0; + oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = FILE_READ_ATTRIBUTES; From d54a3ef8b8bdb9e12dc5ff97d44b9827416eed54 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 6 Jan 2023 13:34:36 -0300 Subject: [PATCH 040/177] cifs: fix file info setting in cifs_open_file() commit ba5d4c1596cada37793d405dd18d695cd3508902 upstream. In cifs_open_file(), @buf must hold a pointer to a cifs_open_info_data structure which is passed by cifs_nt_open(), so assigning @buf directly to @fi was obviously wrong. Fix this by passing a valid FILE_ALL_INFO structure to SMBLegacyOpen() and CIFS_open(), and then copy the set structure to the corresponding cifs_open_info_data::fi field with move_cifs_info_to_smb2() helper. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216889 Fixes: 76894f3e2f71 ("cifs: improve symlink handling for smb2+") Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb1ops.c | 54 ++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 5fe2c2f8ef41..4cb364454e13 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -719,17 +719,25 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, static int cifs_open_file(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf) { - FILE_ALL_INFO *fi = buf; + struct cifs_open_info_data *data = buf; + FILE_ALL_INFO fi = {}; + int rc; if (!(oparms->tcon->ses->capabilities & CAP_NT_SMBS)) - return SMBLegacyOpen(xid, oparms->tcon, oparms->path, - oparms->disposition, - oparms->desired_access, - oparms->create_options, - &oparms->fid->netfid, oplock, fi, - oparms->cifs_sb->local_nls, - cifs_remap(oparms->cifs_sb)); - return CIFS_open(xid, oparms, oplock, fi); + rc = SMBLegacyOpen(xid, oparms->tcon, oparms->path, + oparms->disposition, + oparms->desired_access, + oparms->create_options, + &oparms->fid->netfid, oplock, &fi, + oparms->cifs_sb->local_nls, + cifs_remap(oparms->cifs_sb)); + else + rc = CIFS_open(xid, oparms, oplock, &fi); + + if (!rc && data) + move_cifs_info_to_smb2(&data->fi, &fi); + + return rc; } static void @@ -1053,7 +1061,7 @@ cifs_make_node(unsigned int xid, struct inode *inode, struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct inode *newinode = NULL; int rc = -EPERM; - FILE_ALL_INFO *buf = NULL; + struct cifs_open_info_data buf = {}; struct cifs_io_parms io_parms; __u32 oplock = 0; struct cifs_fid fid; @@ -1085,14 +1093,14 @@ cifs_make_node(unsigned int xid, struct inode *inode, cifs_sb->local_nls, cifs_remap(cifs_sb)); if (rc) - goto out; + return rc; rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (rc == 0) d_instantiate(dentry, newinode); - goto out; + return rc; } /* @@ -1100,19 +1108,13 @@ cifs_make_node(unsigned int xid, struct inode *inode, * support block and char device (no socket & fifo) */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) - goto out; + return rc; if (!S_ISCHR(mode) && !S_ISBLK(mode)) - goto out; + return rc; cifs_dbg(FYI, "sfu compat create special file\n"); - buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); - if (buf == NULL) { - rc = -ENOMEM; - goto out; - } - oparms.tcon = tcon; oparms.cifs_sb = cifs_sb; oparms.desired_access = GENERIC_WRITE; @@ -1127,21 +1129,21 @@ cifs_make_node(unsigned int xid, struct inode *inode, oplock = REQ_OPLOCK; else oplock = 0; - rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, buf); + rc = tcon->ses->server->ops->open(xid, &oparms, &oplock, &buf); if (rc) - goto out; + return rc; /* * BB Do not bother to decode buf since no local inode yet to put * timestamps in, but we can reuse it safely. */ - pdev = (struct win_dev *)buf; + pdev = (struct win_dev *)&buf.fi; io_parms.pid = current->tgid; io_parms.tcon = tcon; io_parms.offset = 0; io_parms.length = sizeof(struct win_dev); - iov[1].iov_base = buf; + iov[1].iov_base = &buf.fi; iov[1].iov_len = sizeof(struct win_dev); if (S_ISCHR(mode)) { memcpy(pdev->type, "IntxCHR", 8); @@ -1160,8 +1162,8 @@ cifs_make_node(unsigned int xid, struct inode *inode, d_drop(dentry); /* FIXME: add code here to set EAs */ -out: - kfree(buf); + + cifs_free_open_info(&buf); return rc; } From 888c060bc327e6a231304aefc0f30e97b1640338 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 10 Jan 2023 19:23:21 -0300 Subject: [PATCH 041/177] cifs: do not query ifaces on smb1 mounts commit 22aeb01db7080e18c6aeb4361cc2556c9887099a upstream. Users have reported the following error on every 600 seconds (SMB_INTERFACE_POLL_INTERVAL) when mounting SMB1 shares: CIFS: VFS: \\srv\share error -5 on ioctl to get interface list It's supported only by SMB2+, so do not query network interfaces on SMB1 mounts. Fixes: 6e1c1c08cdf3 ("cifs: periodically query network interfaces from server") Reviewed-by: Shyam Prasad N Reviewed-by: Tom Talpey Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/connect.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 7e7f712f97fd..fde1c371605a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2609,11 +2609,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) INIT_LIST_HEAD(&tcon->pending_opens); tcon->status = TID_GOOD; - /* schedule query interfaces poll */ INIT_DELAYED_WORK(&tcon->query_interfaces, smb2_query_server_interfaces); - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); + if (ses->server->dialect >= SMB30_PROT_ID && + (ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) { + /* schedule query interfaces poll */ + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + } spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); From f0f326dc9794cc5820e0804d8702d5a10fb84aca Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 10 Jan 2023 17:55:20 -0300 Subject: [PATCH 042/177] cifs: fix double free on failed kerberos auth commit 39e8db3c860e2678ce5a7d74193925876507c9eb upstream. If session setup failed with kerberos auth, we ended up freeing cifs_ses::auth_key.response twice in SMB2_auth_kerberos() and sesInfoFree(). Fix this by zeroing out cifs_ses::auth_key.response after freeing it in SMB2_auth_kerberos(). Fixes: a4e430c8c8ba ("cifs: replace kfree() with kfree_sensitive() for sensitive data") Signed-off-by: Paulo Alcantara (SUSE) Acked-by: Ronnie Sahlberg Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/cifs/smb2pdu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index a5695748a89b..4ac5b1bfaf78 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1479,8 +1479,11 @@ SMB2_auth_kerberos(struct SMB2_sess_data *sess_data) out_put_spnego_key: key_invalidate(spnego_key); key_put(spnego_key); - if (rc) + if (rc) { kfree_sensitive(ses->auth_key.response); + ses->auth_key.response = NULL; + ses->auth_key.len = 0; + } out: sess_data->result = rc; sess_data->func = NULL; From 617401df46051dfb5dddfc74e06a2ffd93c8e376 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 10 Jan 2023 10:24:52 -0700 Subject: [PATCH 043/177] io_uring/fdinfo: include locked hash table in fdinfo output commit ea97cbebaf861d99c3e892275147e6fca6d2c1ca upstream. A previous commit split the hash table for polled requests into two parts, but didn't get the fdinfo output updated. This means that it's less useful for debugging, as we may think a given request is not pending poll. Fix this up by dumping the locked hash table contents too. Fixes: 9ca9fb24d5fe ("io_uring: mutex locked poll hashing") Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/fdinfo.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 2e04850a657b..882bd56b01ed 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -170,12 +170,11 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, xa_for_each(&ctx->personalities, index, cred) io_uring_show_cred(m, index, cred); } - if (has_lock) - mutex_unlock(&ctx->uring_lock); seq_puts(m, "PollList:\n"); for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) { struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i]; + struct io_hash_bucket *hbl = &ctx->cancel_table_locked.hbs[i]; struct io_kiocb *req; spin_lock(&hb->lock); @@ -183,8 +182,17 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, seq_printf(m, " op=%d, task_works=%d\n", req->opcode, task_work_pending(req->task)); spin_unlock(&hb->lock); + + if (!has_lock) + continue; + hlist_for_each_entry(req, &hbl->list, hash_node) + seq_printf(m, " op=%d, task_works=%d\n", req->opcode, + task_work_pending(req->task)); } + if (has_lock) + mutex_unlock(&ctx->uring_lock); + seq_puts(m, "CqOverflowList:\n"); spin_lock(&ctx->completion_lock); list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { From 462ed8a2bebae3d7a039187d8d7a06094a43ff2e Mon Sep 17 00:00:00 2001 From: ChiYuan Huang Date: Thu, 29 Dec 2022 16:03:53 +0800 Subject: [PATCH 044/177] ASoC: rt9120: Make dev PM runtime bind AsoC component PM commit 7161bd540eebebae2bbe8c79de25d8caf12dbf78 upstream. RT9120 uses PM runtime autosuspend to decrease the frequently on/off spent time. This exists one case, when pcm is closed and dev PM is waiting for autosuspend time expired to enter runtime suspend state. At the mean time, system is going to enter suspend, dev PM runtime suspend won't be called. It makes the rt9120 suspend consumption current not as expected. This patch can fix the rt9120 dev PM issue during runtime autosuspend and system suspend by binding dev PM runtime and ASoC component PM. Fixes: 80b949f332e3 ("ASoC: rt9120: Use pm_runtime and regcache to optimize 'pwdnn' logic") Signed-off-by: ChiYuan Huang Link: https://lore.kernel.org/r/1672301033-3675-1-git-send-email-u0084500@gmail.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/codecs/rt9120.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/soc/codecs/rt9120.c b/sound/soc/codecs/rt9120.c index 644300e88b4c..fcf4fbaed3c7 100644 --- a/sound/soc/codecs/rt9120.c +++ b/sound/soc/codecs/rt9120.c @@ -177,8 +177,20 @@ static int rt9120_codec_probe(struct snd_soc_component *comp) return 0; } +static int rt9120_codec_suspend(struct snd_soc_component *comp) +{ + return pm_runtime_force_suspend(comp->dev); +} + +static int rt9120_codec_resume(struct snd_soc_component *comp) +{ + return pm_runtime_force_resume(comp->dev); +} + static const struct snd_soc_component_driver rt9120_component_driver = { .probe = rt9120_codec_probe, + .suspend = rt9120_codec_suspend, + .resume = rt9120_codec_resume, .controls = rt9120_snd_controls, .num_controls = ARRAY_SIZE(rt9120_snd_controls), .dapm_widgets = rt9120_dapm_widgets, From 1f4a9408024e268fe0519bfa3d2ed8c86ab1d4a4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Jan 2023 20:18:11 +0100 Subject: [PATCH 045/177] ACPI: video: Allow selecting NVidia-WMI-EC or Apple GMUX backlight from the cmdline commit 420a1116aef0e8e12c305508f45ce73e5ae30a09 upstream. The patches adding NVidia-WMI-EC and Apple GMUX backlight detection support to acpi_video_get_backlight_type(), forgot to update acpi_video_parse_cmdline() to allow manually selecting these from the commandline. Add support for these to acpi_video_parse_cmdline(). Fixes: fe7aebb40d42 ("ACPI: video: Add Nvidia WMI EC brightness control detection (v3)") Fixes: 21245df307cb ("ACPI: video: Add Apple GMUX brightness control detection") Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/video_detect.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 76b7e7f8894e..1db8e68cd8bc 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -50,6 +50,10 @@ static void acpi_video_parse_cmdline(void) acpi_backlight_cmdline = acpi_backlight_video; if (!strcmp("native", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_native; + if (!strcmp("nvidia_wmi_ec", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_nvidia_wmi_ec; + if (!strcmp("apple_gmux", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_apple_gmux; if (!strcmp("none", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_none; } From 3a79bad876164ac34f566c66074503124adf9336 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Dec 2022 23:07:24 +0100 Subject: [PATCH 046/177] platform/x86: dell-privacy: Only register SW_CAMERA_LENS_COVER if present commit 6dc485f9940df8105ea729cbeb7a7d18d409dde5 upstream. Unlike keys where userspace only reacts to keypresses, userspace may act on switches in both (0 and 1) of their positions. For example if a SW_TABLET_MODE switch is registered then GNOME will not automatically show the onscreen keyboard when a text field gets focus on touchscreen devices when SW_TABLET_MODE reports 0 and when SW_TABLET_MODE reports 1 libinput will block (filter out) builtin keyboard and touchpad events. So to avoid unwanted side-effects EV_SW type inputs should only be registered if they are actually present, only register SW_CAMERA_LENS_COVER if it is actually there. Fixes: 8af9fa37b8a3 ("platform/x86: dell-privacy: Add support for Dell hardware privacy") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221221220724.119594-2-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/dell/dell-wmi-privacy.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c index c82b3d6867c5..9fbd974b723e 100644 --- a/drivers/platform/x86/dell/dell-wmi-privacy.c +++ b/drivers/platform/x86/dell/dell-wmi-privacy.c @@ -292,7 +292,7 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) { struct privacy_wmi_data *priv; struct key_entry *keymap; - int ret, i; + int ret, i, j; ret = wmi_has_guid(DELL_PRIVACY_GUID); if (!ret) @@ -318,9 +318,20 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) /* remap the keymap code with Dell privacy key type 0x12 as prefix * KEY_MICMUTE scancode will be reported as 0x120001 */ - for (i = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) { - keymap[i] = dell_wmi_keymap_type_0012[i]; - keymap[i].code |= (0x0012 << 16); + for (i = 0, j = 0; i < ARRAY_SIZE(dell_wmi_keymap_type_0012); i++) { + /* + * Unlike keys where only presses matter, userspace may act + * on switches in both of their positions. Only register + * SW_CAMERA_LENS_COVER if it is actually there. + */ + if (dell_wmi_keymap_type_0012[i].type == KE_VSW && + dell_wmi_keymap_type_0012[i].sw.code == SW_CAMERA_LENS_COVER && + !(priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA))) + continue; + + keymap[j] = dell_wmi_keymap_type_0012[i]; + keymap[j].code |= (0x0012 << 16); + j++; } ret = sparse_keymap_setup(priv->input_dev, keymap, NULL); kfree(keymap); From 7221c2a7be6480cbaf2eb3012e564fe07beb55ad Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Fri, 2 Dec 2022 23:33:19 +0100 Subject: [PATCH 047/177] platform/surface: aggregator: Ignore command messages not intended for us commit ae0fa0a3126a86c801c3220fcd8eefe03aa39f3e upstream. It is possible that we (the host/kernel driver) receive command messages that are not intended for us. Ignore those for now. The whole story is a bit more complicated: It is possible to enable debug output on SAM, which is sent via SSH command messages. By default this output is sent to a debug connector, with its own target ID (TID=0x03). It is possible to override the target of the debug output and set it to the host/kernel driver. This, however, does not change the original target ID of the message. Meaning, we receive messages with TID=0x03 (debug) but expect to only receive messages with TID=0x00 (host). The problem is that the different target ID also comes with a different scope of request IDs. In particular, these do not follow the standard event rules (i.e. do not fall into a set of small reserved values). Therefore, current message handling interprets them as responses to pending requests and tries to match them up via the request ID. However, these debug output messages are not in fact responses, and therefore this will at best fail to find the request and at worst pass on the wrong data as response for a request. Therefore ignore any command messages not intended for us (host) for now. We can implement support for the debug messages once we have a better understanding of them. Note that this may also provide a bit more stability and avoid some driver confusion in case any other targets want to talk to us in the future, since we don't yet know what to do with those as well. A warning for the dropped messages should suffice for now and also give us a chance of discovering new targets if they come along without any potential for bugs/instabilities. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221202223327.690880-2-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- .../surface/aggregator/ssh_request_layer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/platform/surface/aggregator/ssh_request_layer.c b/drivers/platform/surface/aggregator/ssh_request_layer.c index f5565570f16c..69132976d297 100644 --- a/drivers/platform/surface/aggregator/ssh_request_layer.c +++ b/drivers/platform/surface/aggregator/ssh_request_layer.c @@ -916,6 +916,20 @@ static void ssh_rtl_rx_command(struct ssh_ptl *p, const struct ssam_span *data) if (sshp_parse_command(dev, data, &command, &command_data)) return; + /* + * Check if the message was intended for us. If not, drop it. + * + * Note: We will need to change this to handle debug messages. On newer + * generation devices, these seem to be sent to tid_out=0x03. We as + * host can still receive them as they can be forwarded via an override + * option on SAM, but doing so does not change tid_out=0x00. + */ + if (command->tid_out != 0x00) { + rtl_warn(rtl, "rtl: dropping message not intended for us (tid = %#04x)\n", + command->tid_out); + return; + } + if (ssh_rqid_is_event(get_unaligned_le16(&command->rqid))) ssh_rtl_rx_event(rtl, command, &command_data); else From c7acfd933d965cbeeb5f2493feea8d6dacde1917 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 11 Jan 2023 21:14:26 +0100 Subject: [PATCH 048/177] platform/x86: int3472/discrete: Ensure the clk/power enable pins are in output mode commit cf5ac2d45f6e4d11ad78e7b10ae9a4121ba5e995 upstream. acpi_get_and_request_gpiod() does not take a gpio_lookup_flags argument specifying that the pins direction should be initialized to a specific value. This means that in some cases the pins might be left in input mode, causing the gpiod_set() calls made to enable the clk / regulator to not work. One example of this problem is the clk-enable GPIO for the ov01a1s sensor on a Dell Latitude 9420 being left in input mode causing the clk to never get enabled. Explicitly set the direction of the pins to output to fix this. Fixes: 5de691bffe57 ("platform/x86: Add intel_skl_int3472 driver") Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Reviewed-by: Daniel Scally Reviewed-by: Sakari Ailus Link: https://lore.kernel.org/r/20230111201426.947853-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/intel/int3472/clk_and_regulator.c | 3 +++ drivers/platform/x86/intel/int3472/discrete.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/platform/x86/intel/int3472/clk_and_regulator.c b/drivers/platform/x86/intel/int3472/clk_and_regulator.c index b2342b3d78c7..74dc2cff799e 100644 --- a/drivers/platform/x86/intel/int3472/clk_and_regulator.c +++ b/drivers/platform/x86/intel/int3472/clk_and_regulator.c @@ -181,6 +181,9 @@ int skl_int3472_register_regulator(struct int3472_discrete_device *int3472, return PTR_ERR(int3472->regulator.gpio); } + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->regulator.gpio, 0); + cfg.dev = &int3472->adev->dev; cfg.init_data = &init_data; cfg.ena_gpiod = int3472->regulator.gpio; diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 974a132db651..c42c3faa2c32 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -168,6 +168,8 @@ static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472, return (PTR_ERR(gpio)); int3472->clock.ena_gpio = gpio; + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->clock.ena_gpio, 0); break; case INT3472_GPIO_TYPE_PRIVACY_LED: gpio = acpi_get_and_request_gpiod(path, pin, "int3472,privacy-led"); @@ -175,6 +177,8 @@ static int skl_int3472_map_gpio_to_clk(struct int3472_discrete_device *int3472, return (PTR_ERR(gpio)); int3472->clock.led_gpio = gpio; + /* Ensure the pin is in output mode and non-active state */ + gpiod_direction_output(int3472->clock.led_gpio, 0); break; default: dev_err(int3472->dev, "Invalid GPIO type 0x%02x for clock\n", type); From 127bf1d6bfe3add22136c9dd45f1b7ccf4442bc4 Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Thu, 12 Jan 2023 17:12:28 -0500 Subject: [PATCH 049/177] platform/x86: thinkpad_acpi: Fix profile mode display in AMT mode commit fde5f74ccfc771941b018b5415fa9664426e10ad upstream. Recently AMT mode was enabled (somewhat unexpectedly) on the Lenovo Z13 platform. The FW is advertising it is available and the driver tries to use it - unfortunately it reports the profile mode incorrectly. Note, there is also some extra work needed to enable the dynamic aspect of AMT support that I will be following up with; but more testing is needed first. This patch just fixes things so the profiles are reported correctly. Link: https://gitlab.freedesktop.org/hadess/power-profiles-daemon/-/issues/115 Fixes: 46dcbc61b739 ("platform/x86: thinkpad-acpi: Add support for automatic mode transitions") Reviewed-by: Mario Limonciello Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20230112221228.490946-1-mpearson-lenovo@squebb.ca Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/thinkpad_acpi.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index a1d91736a03b..4e95d2243161 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10315,9 +10315,11 @@ static DEFINE_MUTEX(dytc_mutex); static int dytc_capabilities; static bool dytc_mmc_get_available; -static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *profile) +static int convert_dytc_to_profile(int funcmode, int dytcmode, + enum platform_profile_option *profile) { - if (dytc_capabilities & BIT(DYTC_FC_MMC)) { + switch (funcmode) { + case DYTC_FUNCTION_MMC: switch (dytcmode) { case DYTC_MODE_MMC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10333,8 +10335,7 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p return -EINVAL; } return 0; - } - if (dytc_capabilities & BIT(DYTC_FC_PSC)) { + case DYTC_FUNCTION_PSC: switch (dytcmode) { case DYTC_MODE_PSC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10348,6 +10349,14 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p default: /* Unknown mode */ return -EINVAL; } + return 0; + case DYTC_FUNCTION_AMT: + /* For now return balanced. It's the closest we have to 'auto' */ + *profile = PLATFORM_PROFILE_BALANCED; + return 0; + default: + /* Unknown function */ + return -EOPNOTSUPP; } return 0; } @@ -10496,6 +10505,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output); if (err) goto unlock; + /* system supports AMT, activate it when on balanced */ if (dytc_capabilities & BIT(DYTC_FC_AMT)) dytc_control_amt(profile == PLATFORM_PROFILE_BALANCED); @@ -10511,7 +10521,7 @@ static void dytc_profile_refresh(void) { enum platform_profile_option profile; int output, err = 0; - int perfmode; + int perfmode, funcmode; mutex_lock(&dytc_mutex); if (dytc_capabilities & BIT(DYTC_FC_MMC)) { @@ -10526,8 +10536,9 @@ static void dytc_profile_refresh(void) if (err) return; + funcmode = (output >> DYTC_GET_FUNCTION_BIT) & 0xF; perfmode = (output >> DYTC_GET_MODE_BIT) & 0xF; - convert_dytc_to_profile(perfmode, &profile); + convert_dytc_to_profile(funcmode, perfmode, &profile); if (profile != dytc_current_profile) { dytc_current_profile = profile; platform_profile_notify(); From 0e082105895c448ea8b20daf881c3e2759ed7e15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 21 Dec 2022 17:59:51 +0000 Subject: [PATCH 050/177] platform/x86: asus-wmi: Don't load fan curves without fan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 01fd7e7851ba2275662f771ee17d1f80e7bbfa52 upstream. If we do not have a fan it does not make sense to load curves for it. This removes the following warnings from the kernel log: asus_wmi: fan_curve_get_factory_default (0x00110024) failed: -19 asus_wmi: fan_curve_get_factory_default (0x00110025) failed: -19 Fixes: a2bdf10ce96e ("platform/x86: asus-wmi: Increase FAN_CURVE_BUF_LEN to 32") Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20221221-asus-fan-v1-3-e07f3949725b@weissschuh.net Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/asus-wmi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 872efc1d5b36..f051b21653d6 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -2436,6 +2436,9 @@ static int fan_curve_check_present(struct asus_wmi *asus, bool *available, *available = false; + if (asus->fan_type == FAN_TYPE_NONE) + return 0; + err = fan_curve_get_factory_default(asus, fan_dev); if (err) { return 0; From dfa4ff480fe6d7b401a8d2f707b0228c4664de13 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 21 Dec 2022 23:07:23 +0100 Subject: [PATCH 051/177] platform/x86: dell-privacy: Fix SW_CAMERA_LENS_COVER reporting commit 1af7fef0d9d3fa075bf4e850f705df1fe97d33ce upstream. Use KE_VSW instead of KE_SW for the SW_CAMERA_LENS_COVER key_entry and get the value of the switch from the status field when handling SW_CAMERA_LENS_COVER events, instead of always reporting 0. Also correctly set the initial SW_CAMERA_LENS_COVER value. Fixes: 8af9fa37b8a3 ("platform/x86: dell-privacy: Add support for Dell hardware privacy") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221221220724.119594-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/dell/dell-wmi-privacy.c | 22 ++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/platform/x86/dell/dell-wmi-privacy.c b/drivers/platform/x86/dell/dell-wmi-privacy.c index 9fbd974b723e..c517bd45dd32 100644 --- a/drivers/platform/x86/dell/dell-wmi-privacy.c +++ b/drivers/platform/x86/dell/dell-wmi-privacy.c @@ -61,7 +61,7 @@ static const struct key_entry dell_wmi_keymap_type_0012[] = { /* privacy mic mute */ { KE_KEY, 0x0001, { KEY_MICMUTE } }, /* privacy camera mute */ - { KE_SW, 0x0002, { SW_CAMERA_LENS_COVER } }, + { KE_VSW, 0x0002, { SW_CAMERA_LENS_COVER } }, { KE_END, 0}, }; @@ -115,11 +115,15 @@ bool dell_privacy_process_event(int type, int code, int status) switch (code) { case DELL_PRIVACY_AUDIO_EVENT: /* Mic mute */ - case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */ priv->last_status = status; sparse_keymap_report_entry(priv->input_dev, key, 1, true); ret = true; break; + case DELL_PRIVACY_CAMERA_EVENT: /* Camera mute */ + priv->last_status = status; + sparse_keymap_report_entry(priv->input_dev, key, !(status & CAMERA_STATUS), false); + ret = true; + break; default: dev_dbg(&priv->wdev->dev, "unknown event type 0x%04x 0x%04x\n", type, code); } @@ -304,6 +308,11 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) dev_set_drvdata(&wdev->dev, priv); priv->wdev = wdev; + + ret = get_current_status(priv->wdev); + if (ret) + return ret; + /* create evdev passing interface */ priv->input_dev = devm_input_allocate_device(&wdev->dev); if (!priv->input_dev) @@ -342,11 +351,12 @@ static int dell_privacy_wmi_probe(struct wmi_device *wdev, const void *context) priv->input_dev->name = "Dell Privacy Driver"; priv->input_dev->id.bustype = BUS_HOST; - ret = input_register_device(priv->input_dev); - if (ret) - return ret; + /* Report initial camera-cover status */ + if (priv->features_present & BIT(DELL_PRIVACY_TYPE_CAMERA)) + input_report_switch(priv->input_dev, SW_CAMERA_LENS_COVER, + !(priv->last_status & CAMERA_STATUS)); - ret = get_current_status(priv->wdev); + ret = input_register_device(priv->input_dev); if (ret) return ret; From 96ec2f82aea0b2e51b10a58b2bedd2ac3ffac5ac Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 23 Dec 2022 02:10:08 +0000 Subject: [PATCH 052/177] dt-bindings: msm: dsi-controller-main: Fix operating-points-v2 constraint commit cdf64343f91a1225e9e3d4ce4261962cd41b4ddd upstream. The existing msm8916.dtsi does not depend on nor require operating points. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/515940/ Link: https://lore.kernel.org/r/20221223021025.1646636-2-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/display/msm/dsi-controller-main.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index 3b609c19e0bc..62849a4f64c6 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -135,7 +135,6 @@ required: - assigned-clocks - assigned-clock-parents - power-domains - - operating-points-v2 - ports additionalProperties: false From b107b08c41b3076a508113fbaaffe15ce1fe7f65 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 5 Jan 2023 03:47:43 +0200 Subject: [PATCH 053/177] drm/msm: another fix for the headless Adreno GPU commit 00dd060ab3cf95ca6ede7853bc14397014971b5e upstream. Fix another oops reproducible when rebooting the board with the Adreno GPU working in the headless mode (e.g. iMX platforms). Unable to handle kernel NULL pointer dereference at virtual address 00000000 when read [00000000] *pgd=74936831, *pte=00000000, *ppte=00000000 Internal error: Oops: 17 [#1] ARM CPU: 0 PID: 51 Comm: reboot Not tainted 6.2.0-rc1-dirty #11 Hardware name: Freescale i.MX53 (Device Tree Support) PC is at msm_atomic_commit_tail+0x50/0x970 LR is at commit_tail+0x9c/0x188 pc : [] lr : [] psr: 600e0013 sp : e0851d30 ip : ee4eb7eb fp : 00090acc r10: 00000058 r9 : c2193014 r8 : c4310000 r7 : c4759380 r6 : 07bef61d r5 : 00000000 r4 : 00000000 r3 : c44cc440 r2 : 00000000 r1 : 00000000 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 74910019 DAC: 00000051 Register r0 information: NULL pointer Register r1 information: NULL pointer Register r2 information: NULL pointer Register r3 information: slab kmalloc-1k start c44cc400 pointer offset 64 size 1024 Register r4 information: NULL pointer Register r5 information: NULL pointer Register r6 information: non-paged memory Register r7 information: slab kmalloc-128 start c4759380 pointer offset 0 size 128 Register r8 information: slab kmalloc-2k start c4310000 pointer offset 0 size 2048 Register r9 information: non-slab/vmalloc memory Register r10 information: non-paged memory Register r11 information: non-paged memory Register r12 information: non-paged memory Process reboot (pid: 51, stack limit = 0xc80046d9) Stack: (0xe0851d30 to 0xe0852000) 1d20: c4759380 fbd77200 000005ff 002b9c70 1d40: c4759380 c4759380 00000000 07bef61d 00000600 c0d6fe7c c2193014 00000058 1d60: 00090acc c067a214 00000000 c4759380 c4310000 00000000 c44cc854 c067a89c 1d80: 00000000 00000000 00000000 c4310468 00000000 c4759380 c4310000 c4310468 1da0: c4310470 c0643258 c4759380 00000000 00000000 c0c4ee24 00000000 c44cc810 1dc0: 00000000 c0c4ee24 00000000 c44cc810 00000000 0347d2a8 e0851e00 e0851e00 1de0: c4759380 c067ad20 c4310000 00000000 c44cc810 c27f8718 c44cc854 c067adb8 1e00: c4933000 00000002 00000001 00000000 00000000 c2130850 00000000 c2130854 1e20: c25fc488 00000000 c0ff162c 00000000 00000001 00000002 00000000 00000000 1e40: c43102c0 c43102c0 00000000 0347d2a8 c44cc810 c44cc814 c2133da8 c06d1a60 1e60: 00000000 00000000 00079028 c2012f24 fee1dead c4933000 00000058 c01431e4 1e80: 01234567 c0143a20 00000000 00000000 00000000 00000000 00000000 00000000 1ea0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1ec0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1ee0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f00: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f20: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f40: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 1f80: 00000000 00000000 00000000 0347d2a8 00000002 00000004 00000078 00000058 1fa0: c010028c c0100060 00000002 00000004 fee1dead 28121969 01234567 00079028 1fc0: 00000002 00000004 00000078 00000058 0002fdc5 00000000 00000000 00090acc 1fe0: 00000058 becc9c64 b6e97e05 b6e0e5f6 600e0030 fee1dead 00000000 00000000 msm_atomic_commit_tail from commit_tail+0x9c/0x188 commit_tail from drm_atomic_helper_commit+0x160/0x188 drm_atomic_helper_commit from drm_atomic_commit+0xac/0xe0 drm_atomic_commit from drm_atomic_helper_disable_all+0x1b0/0x1c0 drm_atomic_helper_disable_all from drm_atomic_helper_shutdown+0x88/0x140 drm_atomic_helper_shutdown from device_shutdown+0x16c/0x240 device_shutdown from kernel_restart+0x38/0x90 kernel_restart from __do_sys_reboot+0x174/0x224 __do_sys_reboot from ret_fast_syscall+0x0/0x1c Exception stack(0xe0851fa8 to 0xe0851ff0) 1fa0: 00000002 00000004 fee1dead 28121969 01234567 00079028 1fc0: 00000002 00000004 00000078 00000058 0002fdc5 00000000 00000000 00090acc 1fe0: 00000058 becc9c64 b6e97e05 b6e0e5f6 Code: 15922088 1184421c e1500003 1afffff8 (e5953000) ---[ end trace 0000000000000000 ]--- Fixes: 0a58d2ae572a ("drm/msm: Make .remove and .shutdown HW shutdown consistent") Reported-by: kernel test robot Signed-off-by: Dmitry Baryshkov Reviewed-by: Rob Clark Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/516909/ Link: https://lore.kernel.org/r/20230105014743.1478110-1-dmitry.baryshkov@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/msm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 105b5b48e828..681c1b889b31 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -1271,7 +1271,7 @@ void msm_drv_shutdown(struct platform_device *pdev) * msm_drm_init, drm_dev->registered is used as an indicator that the * shutdown will be successful. */ - if (drm && drm->registered) + if (drm && drm->registered && priv->kms) drm_atomic_helper_shutdown(drm); } From a30aafcfba8f9ec2a9f58769c5cdc9a2f3816008 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Nov 2022 10:18:26 +0000 Subject: [PATCH 054/177] firmware/psci: Fix MEM_PROTECT_RANGE function numbers commit f3dc61cde80d48751999c4cb46daf3b2185e6895 upstream. PSCI v1.1 offers 32-bit and 64-bit variants of the MEM_PROTECT_RANGE call using function identifier 20. Fix the incorrect definitions of the MEM_PROTECT_CHECK_RANGE calls in the PSCI UAPI header. Cc: Dmitry Baryshkov Cc: Lorenzo Pieralisi Cc: Arnd Bergmann Fixes: 3137f2e60098 ("firmware/psci: Add debugfs support to ease debugging") Acked-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20221125101826.22404-1-will@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/psci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 3511095c2702..42a40ad3fb62 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -58,7 +58,7 @@ #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) #define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) -#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) +#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(20) #define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) #define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) @@ -67,7 +67,7 @@ #define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) -#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) +#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(20) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff From d0f52562b89155bde80d640ff4ef63bc713fac53 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 5 Jan 2023 09:08:34 +0000 Subject: [PATCH 055/177] firmware/psci: Don't register with debugfs if PSCI isn't available commit cef139299fd86098c6e3dbd389d1d0b2462d7710 upstream. Contrary to popular belief, PSCI is not a universal property of an ARM/arm64 system. There is a garden variety of systems out there that don't (or even cannot) implement it. I'm the first one deplore such a situation, but hey... On such systems, a "cat /sys/kernel/debug/psci" results in fireworks, as no invocation callback is registered. Check for the invoke_psci_fn and psci_ops.get_version pointers before registering with the debugfs subsystem, avoiding the issue altogether. Fixes: 3137f2e60098 ("firmware/psci: Add debugfs support to ease debugging") Reported-by: Hector Martin Signed-off-by: Marc Zyngier Cc: Dmitry Baryshkov Cc: Mark Brown Cc: Ulf Hansson Cc: Arnd Bergmann Cc: Mark Rutland Cc: Lorenzo Pieralisi Reviewed-by: Hector Martin Acked-by: Dmitry Baryshkov Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20230105090834.630238-1-maz@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/psci/psci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index e7bcfca4159f..447ee4ea5c90 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -440,6 +440,9 @@ static const struct file_operations psci_debugfs_ops = { static int __init psci_debugfs_init(void) { + if (!invoke_psci_fn || !psci_ops.get_version) + return 0; + return PTR_ERR_OR_ZERO(debugfs_create_file("psci", 0444, NULL, NULL, &psci_debugfs_ops)); } From b72bd13bc50099e5e5f8a7a626a638b95da60548 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Mon, 2 Jan 2023 11:02:00 +0100 Subject: [PATCH 056/177] drm/msm/adreno: Make adreno quirks not overwrite each other commit 13ef096e342b00e30b95a90c6c13eee1f0bec4c5 upstream. So far the adreno quirks have all been assigned with an OR operator, which is problematic, because they were assigned consecutive integer values, which makes checking them with an AND operator kind of no bueno.. Switch to using BIT(n) so that only the quirks that the programmer chose are taken into account when evaluating info->quirks & ADRENO_QUIRK_... Fixes: 370063ee427a ("drm/msm/adreno: Add A540 support") Reviewed-by: Dmitry Baryshkov Reviewed-by: Marijn Suijten Reviewed-by: Rob Clark Signed-off-by: Konrad Dybcio Reviewed-by: Akhil P Oommen Patchwork: https://patchwork.freedesktop.org/patch/516456/ Link: https://lore.kernel.org/r/20230102100201.77286-1-konrad.dybcio@linaro.org Signed-off-by: Rob Clark Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/adreno/adreno_gpu.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index e7adc5c632d0..3d78efb066b1 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -29,11 +29,9 @@ enum { ADRENO_FW_MAX, }; -enum adreno_quirks { - ADRENO_QUIRK_TWO_PASS_USE_WFI = 1, - ADRENO_QUIRK_FAULT_DETECT_MASK = 2, - ADRENO_QUIRK_LMLOADKILL_DISABLE = 3, -}; +#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0) +#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(1) +#define ADRENO_QUIRK_LMLOADKILL_DISABLE BIT(2) struct adreno_rev { uint8_t core; @@ -65,7 +63,7 @@ struct adreno_info { const char *name; const char *fw[ADRENO_FW_MAX]; uint32_t gmem; - enum adreno_quirks quirks; + u64 quirks; struct msm_gpu *(*init)(struct drm_device *dev); const char *zapfw; u32 inactive_period; From 4fa6b43ba44bd26004b558549f7ad448a6255a64 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 Dec 2022 17:12:06 +0000 Subject: [PATCH 057/177] arm64/signal: Always allocate SVE signal frames on SME only systems commit f26cd7372160da2eba31061d7943348ab9f2c01d upstream. Currently we only allocate space for SVE signal frames on systems that support SVE, meaning that SME only systems do not allocate a signal frame for streaming mode SVE state. Change the check so space is allocated if either feature is supported. Fixes: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221223-arm64-fix-sme-only-v1-3-938d663f69e5@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 9ad911f1647c..5100893be6e4 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -729,7 +729,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, return err; } - if (system_supports_sve()) { + if (system_supports_sve() || system_supports_sme()) { unsigned int vq = 0; if (add_all || test_thread_flag(TIF_SVE) || From b5affe7e0b3a4e18cf98f86b9921f442c70c54a3 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 23 Dec 2022 02:10:09 +0000 Subject: [PATCH 058/177] dt-bindings: msm: dsi-controller-main: Fix power-domain constraint commit a6f033938beb31f893302a93f83ec0b6460c6cac upstream. power-domain is required for the sc7180 dispcc GDSC but not every qcom SoC has a similar dependency for example the apq8064. Most Qcom SoC's using mdss-dsi-ctrl seem to have the ability to power-collapse the MDP without collapsing DSI. For example the qcom vendor kernel commit for apq8084, msm8226, msm8916, msm8974. https://review.carbonrom.org/plugins/gitiles/CarbonROM/android_kernel_oneplus_msm8994/+/7b5c011a770daa2811778937ed646237a28a8694 "ARM: dts: msm: add mdss gdsc supply to dsi controller device It is possible for the DSI controller to be active when MDP is power collapsed. DSI controller needs to have it's own vote for mdss gdsc to ensure that gdsc remains on in such cases." This however doesn't appear to be the case for the apq8064 so we shouldn't be marking power-domain as required in yaml checks. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/515958/ Link: https://lore.kernel.org/r/20221223021025.1646636-3-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/display/msm/dsi-controller-main.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index 62849a4f64c6..ca6f71a923ac 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -134,7 +134,6 @@ required: - phy-names - assigned-clocks - assigned-clock-parents - - power-domains - ports additionalProperties: false From 4b855673cf3d699c1603ec07a87191caa9fd5aa9 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Fri, 23 Dec 2022 02:10:10 +0000 Subject: [PATCH 059/177] dt-bindings: msm: dsi-controller-main: Fix description of core clock commit 654ffe4b793b42ed6b5909daff0b91809916d94e upstream. There's a typo in describing the core clock as an 'escape' clock. The accurate description is 'core'. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Acked-by: Krzysztof Kozlowski Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/515938/ Link: https://lore.kernel.org/r/20221223021025.1646636-4-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/display/msm/dsi-controller-main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml index ca6f71a923ac..6c5b4783812a 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-controller-main.yaml @@ -32,7 +32,7 @@ properties: - description: Display byte clock - description: Display byte interface clock - description: Display pixel clock - - description: Display escape clock + - description: Display core clock - description: Display AHB clock - description: Display AXI clock From 05458ee3ff82b31a4564836a84ef20d6c233dfc7 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 27 Dec 2022 17:12:05 +0000 Subject: [PATCH 060/177] arm64/signal: Always accept SVE signal frames on SME only systems commit 7dde62f0687c8856b6c0660066c7ee83a6a6f033 upstream. Currently we reject an attempt to restore a SVE signal frame on a system with SME but not SVE supported. This means that it is not possible to disable streaming mode via signal return as this is configured via the flags in the SVE signal context. Instead accept the signal frame, we will require it to have a vector length of 0 specified and no payload since the task will have no SVE vector length configured. Fixes: 85ed24dad290 ("arm64/sme: Implement streaming SVE signal handling") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20221223-arm64-fix-sme-only-v1-2-938d663f69e5@kernel.org Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/signal.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 5100893be6e4..43adbfa5ead7 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -280,7 +280,12 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user) vl = task_get_sme_vl(current); } else { - if (!system_supports_sve()) + /* + * A SME only system use SVE for streaming mode so can + * have a SVE formatted context with a zero VL and no + * payload data. + */ + if (!system_supports_sve() && !system_supports_sme()) return -EINVAL; vl = task_get_sve_vl(current); From a006aaffd78ff70efcffa44e325633c0840b2bf2 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Tue, 22 Nov 2022 20:31:37 +0800 Subject: [PATCH 061/177] arm64/mm: add pud_user_exec() check in pud_user_accessible_page() commit 730a11f982e61aaef758ab552dfb7c30de79e99b upstream. Add check for the executable case in pud_user_accessible_page() too like what we did for pte and pmd. Fixes: 42b2547137f5 ("arm64/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK") Suggested-by: Will Deacon Signed-off-by: Liu Shixin Link: https://lore.kernel.org/r/20221122123137.429686-1-liushixin2@huawei.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index edf6625ce965..5582360209d5 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -682,7 +682,7 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) #define pud_leaf(pud) (pud_present(pud) && !pud_table(pud)) #define pud_valid(pud) pte_valid(pud_pte(pud)) #define pud_user(pud) pte_user(pud_pte(pud)) - +#define pud_user_exec(pud) pte_user_exec(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { @@ -868,7 +868,7 @@ static inline bool pmd_user_accessible_page(pmd_t pmd) static inline bool pud_user_accessible_page(pud_t pud) { - return pud_leaf(pud) && pud_user(pud); + return pud_leaf(pud) && (pud_user(pud) || pud_user_exec(pud)); } #endif From a9a6715272a7c0c51806f0cc686ad9f621ed2c12 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Thu, 29 Dec 2022 12:44:38 +0000 Subject: [PATCH 062/177] dt-bindings: msm: dsi-phy-28nm: Add missing qcom, dsi-phy-regulator-ldo-mode commit be79f805a1e1b95605c825f1c513bdd2c8b167ed upstream. Add in missing qcom,dsi-phy-regulator-ldo-mode to the 28nm DSI PHY. When converting from .txt to .yaml we missed this one. Fixes: 4dbe55c97741 ("dt-bindings: msm: dsi: add yaml schemas for DSI bindings") Reviewed-by: Dmitry Baryshkov Signed-off-by: Bryan O'Donoghue Patchwork: https://patchwork.freedesktop.org/patch/516205/ Link: https://lore.kernel.org/r/20221229124438.504770-2-bryan.odonoghue@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/display/msm/dsi-phy-28nm.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml index 3d8540a06fe2..2f1fd140c87d 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-28nm.yaml @@ -34,6 +34,10 @@ properties: vddio-supply: description: Phandle to vdd-io regulator device node. + qcom,dsi-phy-regulator-ldo-mode: + type: boolean + description: Indicates if the LDO mode PHY regulator is wanted. + required: - compatible - reg From 06f8be16be51638c8373f148680d8ec0313c73e2 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Wed, 14 Dec 2022 21:59:43 +0800 Subject: [PATCH 063/177] arm64: ptrace: Use ARM64_SME to guard the SME register enumerations commit eb9a85261e297292c4cc44b628c1373c996cedc2 upstream. We currently guard REGSET_{SSVE, ZA} using ARM64_SVE for no good reason. Both enumerations would be pointless without ARM64_SME and create two empty entries in aarch64_regsets[] which would then become part of a process's native regset view (they should be ignored though). Switch to use ARM64_SME instead. Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers") Signed-off-by: Zenghui Yu Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20221214135943.379-1-yuzenghui@huawei.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c2fb5755bbec..92bc9a2d702c 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1364,7 +1364,7 @@ enum aarch64_regset { #ifdef CONFIG_ARM64_SVE REGSET_SVE, #endif -#ifdef CONFIG_ARM64_SVE +#ifdef CONFIG_ARM64_SME REGSET_SSVE, REGSET_ZA, #endif From 21e5eca0ac9046da9918a919bc92b7b5a78d27e7 Mon Sep 17 00:00:00 2001 From: Liu Shixin Date: Mon, 21 Nov 2022 15:36:08 +0800 Subject: [PATCH 064/177] arm64/mm: fix incorrect file_map_count for invalid pmd commit 74c2f81054510d45b813548cb0a1c4ebf87cdd5f upstream. The page table check trigger BUG_ON() unexpectedly when split hugepage: ------------[ cut here ]------------ kernel BUG at mm/page_table_check.c:119! Internal error: Oops - BUG: 00000000f2000800 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 7 PID: 210 Comm: transhuge-stres Not tainted 6.1.0-rc3+ #748 Hardware name: linux,dummy-virt (DT) pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : page_table_check_set.isra.0+0x398/0x468 lr : page_table_check_set.isra.0+0x1c0/0x468 [...] Call trace: page_table_check_set.isra.0+0x398/0x468 __page_table_check_pte_set+0x160/0x1c0 __split_huge_pmd_locked+0x900/0x1648 __split_huge_pmd+0x28c/0x3b8 unmap_page_range+0x428/0x858 unmap_single_vma+0xf4/0x1c8 zap_page_range+0x2b0/0x410 madvise_vma_behavior+0xc44/0xe78 do_madvise+0x280/0x698 __arm64_sys_madvise+0x90/0xe8 invoke_syscall.constprop.0+0xdc/0x1d8 do_el0_svc+0xf4/0x3f8 el0_svc+0x58/0x120 el0t_64_sync_handler+0xb8/0xc0 el0t_64_sync+0x19c/0x1a0 [...] On arm64, pmd_leaf() will return true even if the pmd is invalid due to pmd_present_invalid() check. So in pmdp_invalidate() the file_map_count will not only decrease once but also increase once. Then in set_pte_at(), the file_map_count increase again, and so trigger BUG_ON() unexpectedly. Add !pmd_present_invalid() check in pmd_user_accessible_page() to fix the problem. Fixes: 42b2547137f5 ("arm64/mm: enable ARCH_SUPPORTS_PAGE_TABLE_CHECK") Reported-by: Denys Vlasenko Signed-off-by: Liu Shixin Acked-by: Pasha Tatashin Acked-by: David Hildenbrand Reviewed-by: Kefeng Wang Acked-by: Will Deacon Link: https://lore.kernel.org/r/20221121073608.4183459-1-liushixin2@huawei.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5582360209d5..f1cfc44ef52f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -863,7 +863,7 @@ static inline bool pte_user_accessible_page(pte_t pte) static inline bool pmd_user_accessible_page(pmd_t pmd) { - return pmd_leaf(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); + return pmd_leaf(pmd) && !pmd_present_invalid(pmd) && (pmd_user(pmd) || pmd_user_exec(pmd)); } static inline bool pud_user_accessible_page(pud_t pud) From 4b5d9c95b437e43c788d4fed9c5cc23764ea029d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 15 Dec 2022 16:43:57 +0100 Subject: [PATCH 065/177] platform/x86: ideapad-laptop: Add Legion 5 15ARH05 DMI id to set_fn_lock_led_list[] commit f4b7f8febd4d9b615fbec2a06bf352b9c3729b11 upstream. The Lenovo Legion 5 15ARH05 needs ideapad-laptop to call SALS_FNLOCK_ON / SALS_FNLOCK_OFF on Fn-lock state change to get the LED in the Fn key to correctly reflect the Fn-lock state. Add a DMI match for the Legion 5 15ARH05 to the set_fn_lock_led_list[] table for this. Fixes: 81a5603a0f50 ("platform/x86: ideapad-laptop: Fix interrupt storm on fn-lock toggle on some Yoga laptops") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20221215154357.123876-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/ideapad-laptop.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index fc3d47a75944..4e28c55f0ea5 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1615,6 +1615,12 @@ static const struct dmi_system_id set_fn_lock_led_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion R7000P2020H"), } }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo Legion 5 15ARH05"), + } + }, {} }; From b7dcbca46db3c77fdb02c2a9d6239e5aa3b06a59 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Tue, 27 Dec 2022 18:16:24 -0800 Subject: [PATCH 066/177] drm/msm/dp: do not complete dp_aux_cmd_fifo_tx() if irq is not for aux transfer commit 1cba0d150fa102439114a91b3e215909efc9f169 upstream. There are 3 possible interrupt sources are handled by DP controller, HPDstatus, Controller state changes and Aux read/write transaction. At every irq, DP controller have to check isr status of every interrupt sources and service the interrupt if its isr status bits shows interrupts are pending. There is potential race condition may happen at current aux isr handler implementation since it is always complete dp_aux_cmd_fifo_tx() even irq is not for aux read or write transaction. This may cause aux read transaction return premature if host aux data read is in the middle of waiting for sink to complete transferring data to host while irq happen. This will cause host's receiving buffer contains unexpected data. This patch fixes this problem by checking aux isr and return immediately at aux isr handler if there are no any isr status bits set. Current there is a bug report regrading eDP edid corruption happen during system booting up. After lengthy debugging to found that VIDEO_READY interrupt was continuously firing during system booting up which cause dp_aux_isr() to complete dp_aux_cmd_fifo_tx() prematurely to retrieve data from aux hardware buffer which is not yet contains complete data transfer from sink. This cause edid corruption. Follows are the signature at kernel logs when problem happen, EDID has corrupt header panel-simple-dp-aux aux-aea0000.edp: Couldn't identify panel via EDID Changes in v2: -- do complete if (ret == IRQ_HANDLED) ay dp-aux_isr() -- add more commit text Changes in v3: -- add Stephen suggested -- dp_aux_isr() return IRQ_XXX back to caller -- dp_ctrl_isr() return IRQ_XXX back to caller Changes in v4: -- split into two patches Changes in v5: -- delete empty line between tags Changes in v6: -- remove extra "that" and fixed line more than 75 char at commit text Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") Signed-off-by: Kuogee Hsieh Tested-by: Douglas Anderson Reviewed-by: Abhinav Kumar Reviewed-by: Dmitry Baryshkov Patchwork: https://patchwork.freedesktop.org/patch/516121/ Link: https://lore.kernel.org/r/1672193785-11003-2-git-send-email-quic_khsieh@quicinc.com Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/msm/dp/dp_aux.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/dp/dp_aux.c b/drivers/gpu/drm/msm/dp/dp_aux.c index d030a93a08c3..cc3efed593aa 100644 --- a/drivers/gpu/drm/msm/dp/dp_aux.c +++ b/drivers/gpu/drm/msm/dp/dp_aux.c @@ -423,6 +423,10 @@ void dp_aux_isr(struct drm_dp_aux *dp_aux) isr = dp_catalog_aux_get_irq(aux->catalog); + /* no interrupts pending, return immediately */ + if (!isr) + return; + if (!aux->cmd_busy) return; From 3c0d5bb904a24e56eb27aa0e5fcd3fb34203c4f3 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 16 Nov 2022 17:32:18 +0100 Subject: [PATCH 067/177] dt-bindings: msm/dsi: Don't require vdds-supply on 10nm PHY commit ef11cb7a29c0e13031c968190ea8f86104e7fb6a upstream. On some SoCs (hello SM6350) vdds-supply is not wired to any smd-rpm or rpmh regulator, but instead powered by the VDD_MX/mx.lvl line, which is voted for in the DSI ctrl node. Signed-off-by: Konrad Dybcio Acked-by: Rob Herring Fixes: 8fc939e72ff8 ("dt-bindings: msm: dsi: add yaml schemas for DSI PHY bindings") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/511889/ Link: https://lore.kernel.org/r/20221116163218.42449-1-konrad.dybcio@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml index d9ad8b659f58..3ec466c3ab38 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-10nm.yaml @@ -69,7 +69,6 @@ required: - compatible - reg - reg-names - - vdds-supply unevaluatedProperties: false From 0f21e225ff48a6110d3c4b1159e2f6816759705d Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Wed, 30 Nov 2022 14:58:07 +0100 Subject: [PATCH 068/177] dt-bindings: msm/dsi: Don't require vcca-supply on 14nm PHY commit a2117773c839a8439a3771e0c040b5c505b083a7 upstream. On some SoCs (hello SM6115) vcca-supply is not wired to any smd-rpm or rpmh regulator, but instead powered by the VDD_MX line, which is voted for in the DSI ctrl node. Signed-off-by: Konrad Dybcio Acked-by: Krzysztof Kozlowski Reviewed-by: Dmitry Baryshkov Fixes: 8fc939e72ff8 ("dt-bindings: msm: dsi: add yaml schemas for DSI PHY bindings") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/513555/ Link: https://lore.kernel.org/r/20221130135807.45028-1-konrad.dybcio@linaro.org Signed-off-by: Abhinav Kumar Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml index 1342d74ecfe0..0a7b5f110d88 100644 --- a/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml +++ b/Documentation/devicetree/bindings/display/msm/dsi-phy-14nm.yaml @@ -38,7 +38,6 @@ required: - compatible - reg - reg-names - - vcca-supply unevaluatedProperties: false From fb8534b7960fe4df3c1014fc2da4ac35a6605b54 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 13 Dec 2022 13:29:43 +0100 Subject: [PATCH 069/177] platform/x86: sony-laptop: Don't turn off 0x153 keyboard backlight during probe commit ad75bd85b1db69c97eefea07b375567821f6ef58 upstream. The 0x153 version of the kbd backlight control SNC handle has no separate address to probe if the backlight is there. This turns the probe call into a set keyboard backlight call with a value of 0 turning off the keyboard backlight. Skip probing when there is no separate probe address to avoid this. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1583752 Fixes: 800f20170dcf ("Keyboard backlight control for some Vaio Fit models") Signed-off-by: Hans de Goede Reviewed-by: Mattia Dongili Link: https://lore.kernel.org/r/20221213122943.11123-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/sony-laptop.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 765fcaba4d12..5ff5aaf92b56 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -1888,14 +1888,21 @@ static int sony_nc_kbd_backlight_setup(struct platform_device *pd, break; } - ret = sony_call_snc_handle(handle, probe_base, &result); - if (ret) - return ret; + /* + * Only probe if there is a separate probe_base, otherwise the probe call + * is equivalent to __sony_nc_kbd_backlight_mode_set(0), resulting in + * the keyboard backlight being turned off. + */ + if (probe_base) { + ret = sony_call_snc_handle(handle, probe_base, &result); + if (ret) + return ret; - if ((handle == 0x0137 && !(result & 0x02)) || - !(result & 0x01)) { - dprintk("no backlight keyboard found\n"); - return 0; + if ((handle == 0x0137 && !(result & 0x02)) || + !(result & 0x01)) { + dprintk("no backlight keyboard found\n"); + return 0; + } } kbdbl_ctl = kzalloc(sizeof(*kbdbl_ctl), GFP_KERNEL); From c49996c6aa03590e4ef5add8772cb6068d99fd59 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Tue, 29 Nov 2022 09:57:48 +0800 Subject: [PATCH 070/177] ixgbe: fix pci device refcount leak commit b93fb4405fcb5112c5739c5349afb52ec7f15c07 upstream. As the comment of pci_get_domain_bus_and_slot() says, it returns a PCI device with refcount incremented, when finish using it, the caller must decrement the reference count by calling pci_dev_put(). In ixgbe_get_first_secondary_devfn() and ixgbe_x550em_a_has_mii(), pci_dev_put() is called to avoid leak. Fixes: 8fa10ef01260 ("ixgbe: register a mdiobus") Signed-off-by: Yang Yingliang Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 24aa97f993ca..123dca9ce468 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -855,9 +855,11 @@ static struct pci_dev *ixgbe_get_first_secondary_devfn(unsigned int devfn) rp_pdev = pci_get_domain_bus_and_slot(0, 0, devfn); if (rp_pdev && rp_pdev->subordinate) { bus = rp_pdev->subordinate->number; + pci_dev_put(rp_pdev); return pci_get_domain_bus_and_slot(0, bus, 0); } + pci_dev_put(rp_pdev); return NULL; } @@ -874,6 +876,7 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) struct ixgbe_adapter *adapter = hw->back; struct pci_dev *pdev = adapter->pdev; struct pci_dev *func0_pdev; + bool has_mii = false; /* For the C3000 family of SoCs (x550em_a) the internal ixgbe devices * are always downstream of root ports @ 0000:00:16.0 & 0000:00:17.0 @@ -884,15 +887,16 @@ static bool ixgbe_x550em_a_has_mii(struct ixgbe_hw *hw) func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x16, 0)); if (func0_pdev) { if (func0_pdev == pdev) - return true; - else - return false; + has_mii = true; + goto out; } func0_pdev = ixgbe_get_first_secondary_devfn(PCI_DEVFN(0x17, 0)); if (func0_pdev == pdev) - return true; + has_mii = true; - return false; +out: + pci_dev_put(func0_pdev); + return has_mii; } /** From 0afa5f0736584411771299074bbeca8c1f9706d4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 10 Jan 2023 08:59:06 +0800 Subject: [PATCH 071/177] ipv6: raw: Deduct extension header length in rawv6_push_pending_frames commit cb3e9864cdbe35ff6378966660edbcbac955fe17 upstream. The total cork length created by ip6_append_data includes extension headers, so we must exclude them when comparing them against the IPV6_CHECKSUM offset which does not include extension headers. Reported-by: Kyle Zeng Fixes: 357b40a18b04 ("[IPV6]: IPV6_CHECKSUM socket option can corrupt kernel memory") Signed-off-by: Herbert Xu Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/raw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 722de9dd0ff7..8ffeac745656 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -505,6 +505,7 @@ csum_copy_err: static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { + struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; @@ -522,6 +523,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; + opt = inet6_sk(sk)->cork.opt; + total_len -= opt ? opt->opt_flen : 0; + if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); From 5e4194d89ffef609f980af4f5bc0e7eca03497e7 Mon Sep 17 00:00:00 2001 From: Daniil Tatianin Date: Tue, 20 Dec 2022 09:32:46 +0300 Subject: [PATCH 072/177] iavf/iavf_main: actually log ->src mask when talking about it commit 6650c8e906ce58404bfdfceceeba7bd10d397d40 upstream. This fixes a copy-paste issue where dev_err would log the dst mask even though it is clearly talking about src. Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Fixes: 0075fa0fadd0 ("i40evf: Add support to apply cloud filters") Signed-off-by: Daniil Tatianin Reviewed-by: Michal Swiatkowski Signed-off-by: Tony Nguyen Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/iavf/iavf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index f71e132ede09..260c55951c28 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3850,7 +3850,7 @@ static int iavf_parse_cls_flower(struct iavf_adapter *adapter, field_flags |= IAVF_CLOUD_FIELD_IIP; } else { dev_err(&adapter->pdev->dev, "Bad ip src mask 0x%08x\n", - be32_to_cpu(match.mask->dst)); + be32_to_cpu(match.mask->src)); return -EINVAL; } } From 5c855bcc730656c4b7d30aaddcd0eafc7003e112 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Sep 2022 16:26:51 -0700 Subject: [PATCH 073/177] drm/i915/gt: Cleanup partial engine discovery failures [ Upstream commit 78a033433a5ae4fee85511ee075bc9a48312c79e ] If we abort driver initialisation in the middle of gt/engine discovery, some engines will be fully setup and some not. Those incompletely setup engines only have 'engine->release == NULL' and so will leak any of the common objects allocated. v2: - Drop the destroy_pinned_context() helper for now. It's not really worth it with just a single callsite at the moment. (Janusz) Signed-off-by: Chris Wilson Cc: Janusz Krzysztofik Signed-off-by: Matt Roper Reviewed-by: Janusz Krzysztofik Link: https://patchwork.freedesktop.org/patch/msgid/20220915232654.3283095-2-matthew.d.roper@intel.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 83bfeb872bda..fcbccd8d244e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -1343,8 +1343,13 @@ int intel_engines_init(struct intel_gt *gt) return err; err = setup(engine); - if (err) + if (err) { + intel_engine_cleanup_common(engine); return err; + } + + /* The backend should now be responsible for cleanup */ + GEM_BUG_ON(engine->release == NULL); err = engine_init_common(engine); if (err) From 9bd180b6f9d0a03f66b2813358483ba89234904b Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Mon, 5 Dec 2022 21:15:26 +0100 Subject: [PATCH 074/177] usb: ulpi: defer ulpi_register on ulpi_read_id timeout [ Upstream commit 8a7b31d545d3a15f0e6f5984ae16f0ca4fd76aac ] Since commit 0f0101719138 ("usb: dwc3: Don't switch OTG -> peripheral if extcon is present") Dual Role support on Intel Merrifield platform broke due to rearranging the call to dwc3_get_extcon(). It appears to be caused by ulpi_read_id() on the first test write failing with -ETIMEDOUT. Currently ulpi_read_id() expects to discover the phy via DT when the test write fails and returns 0 in that case, even if DT does not provide the phy. As a result usb probe completes without phy. Make ulpi_read_id() return -ETIMEDOUT to its user if the first test write fails. The user should then handle it appropriately. A follow up patch will make dwc3_core_init() set -EPROBE_DEFER in this case and bail out. Fixes: ef6a7bcfb01c ("usb: ulpi: Support device discovery via DT") Cc: stable@vger.kernel.org Acked-by: Heikki Krogerus Signed-off-by: Ferry Toth Link: https://lore.kernel.org/r/20221205201527.13525-2-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/common/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index d7c8461976ce..60e8174686a1 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -207,7 +207,7 @@ static int ulpi_read_id(struct ulpi *ulpi) /* Test the interface */ ret = ulpi_write(ulpi, ULPI_SCRATCH, 0xaa); if (ret < 0) - goto err; + return ret; ret = ulpi_read(ulpi, ULPI_SCRATCH); if (ret < 0) From e59d46eed1d930318f36a90138898f7fa7730389 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Tue, 8 Nov 2022 08:30:36 +0800 Subject: [PATCH 075/177] drm/amd/pm: enable mode1 reset on smu_v13_0_10 [ Upstream commit 60cfad329ab877cb62975ea78ed442c2496990ba ] enable mode1 reset and prioritize debug port on smu_v13_0_10 as a more reliable message processing v2 - move mode1 reset callback to smu_v13_0_0_ppt.c Signed-off-by: Kenneth Feng Reviewed-by: Yang Wang Signed-off-by: Alex Deucher Stable-dep-of: 1794f6a9535b ("drm/amd/pm: enable GPO dynamic control support for SMU13.0.0") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 4 ++ .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 53 ++++++++++++++++++- drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 18 +++++++ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 3 ++ 5 files changed, 77 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 8b297ade69a2..f1913d879811 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -322,6 +322,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) switch (adev->ip_versions[MP1_HWIP][0]) { case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): + case IP_VERSION(13, 0, 10): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): return AMD_RESET_METHOD_MODE2; diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index f816b1dd110e..44bbf17e4bef 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -568,6 +568,10 @@ struct smu_context u32 param_reg; u32 msg_reg; u32 resp_reg; + + u32 debug_param_reg; + u32 debug_msg_reg; + u32 debug_resp_reg; }; struct i2c_adapter; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index be43de9dd496..73bae7eaefa2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -70,6 +70,26 @@ #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000 +#define mmMP1_SMN_C2PMSG_66 0x0282 +#define mmMP1_SMN_C2PMSG_66_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_82 0x0292 +#define mmMP1_SMN_C2PMSG_82_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_90 0x029a +#define mmMP1_SMN_C2PMSG_90_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_75 0x028b +#define mmMP1_SMN_C2PMSG_75_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_53 0x0275 +#define mmMP1_SMN_C2PMSG_53_BASE_IDX 0 + +#define mmMP1_SMN_C2PMSG_54 0x0276 +#define mmMP1_SMN_C2PMSG_54_BASE_IDX 0 + +#define DEBUGSMC_MSG_Mode1Reset 2 + static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1), @@ -1879,6 +1899,35 @@ static int smu_v13_0_0_set_df_cstate(struct smu_context *smu, NULL); } +static int smu_v13_0_0_mode1_reset(struct smu_context *smu) +{ + int ret; + struct amdgpu_device *adev = smu->adev; + + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10)) + ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset); + else + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL); + + if (!ret) + msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS); + + return ret; +} + +static void smu_v13_0_0_set_smu_mailbox_registers(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82); + smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66); + smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90); + + smu->debug_param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_53); + smu->debug_msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_75); + smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_54); +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -1946,7 +1995,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .baco_enter = smu_v13_0_0_baco_enter, .baco_exit = smu_v13_0_0_baco_exit, .mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported, - .mode1_reset = smu_v13_0_mode1_reset, + .mode1_reset = smu_v13_0_0_mode1_reset, .set_mp1_state = smu_v13_0_0_set_mp1_state, .set_df_cstate = smu_v13_0_0_set_df_cstate, }; @@ -1960,5 +2009,5 @@ void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) smu->table_map = smu_v13_0_0_table_map; smu->pwr_src_map = smu_v13_0_0_pwr_src_map; smu->workload_map = smu_v13_0_0_workload_map; - smu_v13_0_set_smu_mailbox_registers(smu); + smu_v13_0_0_set_smu_mailbox_registers(smu); } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index e4f8f90ac5aa..768b6e7dbd77 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -233,6 +233,18 @@ static void __smu_cmn_send_msg(struct smu_context *smu, WREG32(smu->msg_reg, msg); } +static int __smu_cmn_send_debug_msg(struct smu_context *smu, + u32 msg, + u32 param) +{ + struct amdgpu_device *adev = smu->adev; + + WREG32(smu->debug_param_reg, param); + WREG32(smu->debug_msg_reg, msg); + WREG32(smu->debug_resp_reg, 0); + + return 0; +} /** * smu_cmn_send_msg_without_waiting -- send the message; don't wait for status * @smu: pointer to an SMU context @@ -386,6 +398,12 @@ int smu_cmn_send_smc_msg(struct smu_context *smu, read_arg); } +int smu_cmn_send_debug_smc_msg(struct smu_context *smu, + uint32_t msg) +{ + return __smu_cmn_send_debug_msg(smu, msg, 0); +} + int smu_cmn_to_asic_specific_index(struct smu_context *smu, enum smu_cmn2asic_mapping_type type, uint32_t index) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h index 1526ce09c399..f82cf76dd3a4 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h @@ -42,6 +42,9 @@ int smu_cmn_send_smc_msg(struct smu_context *smu, enum smu_message_type msg, uint32_t *read_arg); +int smu_cmn_send_debug_smc_msg(struct smu_context *smu, + uint32_t msg); + int smu_cmn_wait_for_response(struct smu_context *smu); int smu_cmn_to_asic_specific_index(struct smu_context *smu, From 5e594dbf6e76cf0b75ab4cff2e5d42a3f63963a5 Mon Sep 17 00:00:00 2001 From: Candice Li Date: Thu, 17 Nov 2022 20:34:15 +0800 Subject: [PATCH 076/177] drm/amd/pm: Enable bad memory page/channel recording support for smu v13_0_0 [ Upstream commit 48aa62f07467c8fcd4b4ec7851e13c83e89a1558 ] Send message to SMU to update bad memory page and bad channel info. Signed-off-by: Candice Li Reviewed-by: Evan Quan Signed-off-by: Alex Deucher Stable-dep-of: 1794f6a9535b ("drm/amd/pm: enable GPO dynamic control support for SMU13.0.0") Signed-off-by: Sasha Levin --- .../pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h | 8 +++- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 4 +- .../drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 39 +++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h index 9ebb8f39732a..8b8266890a10 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h @@ -131,7 +131,13 @@ #define PPSMC_MSG_EnableAudioStutterWA 0x44 #define PPSMC_MSG_PowerUpUmsch 0x45 #define PPSMC_MSG_PowerDownUmsch 0x46 -#define PPSMC_Message_Count 0x47 +#define PPSMC_MSG_SetDcsArch 0x47 +#define PPSMC_MSG_TriggerVFFLR 0x48 +#define PPSMC_MSG_SetNumBadMemoryPagesRetired 0x49 +#define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A +#define PPSMC_MSG_SetPriorityDeltaGain 0x4B +#define PPSMC_MSG_AllowIHHostInterrupt 0x4C +#define PPSMC_Message_Count 0x4D //Debug Dump Message #define DEBUGSMC_MSG_TestMessage 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index 58098b82df66..a4e3425b1027 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -239,7 +239,9 @@ __SMU_DUMMY_MAP(DriverMode2Reset), \ __SMU_DUMMY_MAP(GetGfxOffStatus), \ __SMU_DUMMY_MAP(GetGfxOffEntryCount), \ - __SMU_DUMMY_MAP(LogGfxOffResidency), + __SMU_DUMMY_MAP(LogGfxOffResidency), \ + __SMU_DUMMY_MAP(SetNumBadMemoryPagesRetired), \ + __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 73bae7eaefa2..884d4176b412 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -141,6 +141,9 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareMp1ForUnload, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), + MSG_MAP(SetNumBadMemoryPagesRetired, PPSMC_MSG_SetNumBadMemoryPagesRetired, 0), + MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel, + PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -1928,6 +1931,40 @@ static void smu_v13_0_0_set_smu_mailbox_registers(struct smu_context *smu) smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_54); } +static int smu_v13_0_0_smu_send_bad_mem_page_num(struct smu_context *smu, + uint32_t size) +{ + int ret = 0; + + /* message SMU to update the bad page number on SMUBUS */ + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetNumBadMemoryPagesRetired, + size, NULL); + if (ret) + dev_err(smu->adev->dev, + "[%s] failed to message SMU to update bad memory pages number\n", + __func__); + + return ret; +} + +static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu, + uint32_t size) +{ + int ret = 0; + + /* message SMU to update the bad channel info on SMUBUS */ + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, + size, NULL); + if (ret) + dev_err(smu->adev->dev, + "[%s] failed to message SMU to update bad memory pages channel info\n", + __func__); + + return ret; +} + static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask, .set_default_dpm_table = smu_v13_0_0_set_default_dpm_table, @@ -1998,6 +2035,8 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .mode1_reset = smu_v13_0_0_mode1_reset, .set_mp1_state = smu_v13_0_0_set_mp1_state, .set_df_cstate = smu_v13_0_0_set_df_cstate, + .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num, + .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) From d167ce6c47db12de69d2c2e7bdde9e608183d2a1 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 2 Dec 2022 13:56:35 +0800 Subject: [PATCH 077/177] drm/amd/pm: enable GPO dynamic control support for SMU13.0.0 [ Upstream commit 1794f6a9535bb5234c2b747d1bc6dad03249245a ] To better support UMD pstate profilings, the GPO feature needs to be switched on/off accordingly. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h | 3 ++- drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h | 3 +++ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c | 15 +++++++++++++++ .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 2 ++ 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index a4e3425b1027..4180c71d930f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -241,7 +241,8 @@ __SMU_DUMMY_MAP(GetGfxOffEntryCount), \ __SMU_DUMMY_MAP(LogGfxOffResidency), \ __SMU_DUMMY_MAP(SetNumBadMemoryPagesRetired), \ - __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), + __SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), \ + __SMU_DUMMY_MAP(AllowGpo), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h index a9122b3b1532..e8c6febb8b64 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h @@ -273,6 +273,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu); int smu_v13_0_run_btc(struct smu_context *smu); +int smu_v13_0_gpo_control(struct smu_context *smu, + bool enablement); + int smu_v13_0_deep_sleep_control(struct smu_context *smu, bool enablement); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index cfb7f4475c82..9f9f64c5cdd8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -2148,6 +2148,21 @@ int smu_v13_0_run_btc(struct smu_context *smu) return res; } +int smu_v13_0_gpo_control(struct smu_context *smu, + bool enablement) +{ + int res; + + res = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_AllowGpo, + enablement ? 1 : 0, + NULL); + if (res) + dev_err(smu->adev->dev, "SetGpoAllow %d failed!\n", enablement); + + return res; +} + int smu_v13_0_deep_sleep_control(struct smu_context *smu, bool enablement) { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c index 884d4176b412..4c20d17e7416 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c @@ -144,6 +144,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetNumBadMemoryPagesRetired, PPSMC_MSG_SetNumBadMemoryPagesRetired, 0), MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel, PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel, 0), + MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), }; static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = { @@ -2037,6 +2038,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = { .set_df_cstate = smu_v13_0_0_set_df_cstate, .send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num, .send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag, + .gpo_control = smu_v13_0_gpo_control, }; void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu) From d90de26bdc97a51a195116428fbb4776394f79a7 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Fri, 2 Dec 2022 14:03:45 +0800 Subject: [PATCH 078/177] drm/amd/pm: enable GPO dynamic control support for SMU13.0.7 [ Upstream commit 62b9f835a6c60171845642afec4ce4b44865f10f ] To better support UMD pstate profilings, the GPO feature needs to be switched on/off accordingly. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0.x Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index 31deec2ce4b3..eea06939e7da 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -123,6 +123,7 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] = MSG_MAP(SetMGpuFanBoostLimitRpm, PPSMC_MSG_SetMGpuFanBoostLimitRpm, 0), MSG_MAP(DFCstateControl, PPSMC_MSG_SetExternalClientDfCstateAllow, 0), MSG_MAP(ArmD3, PPSMC_MSG_ArmD3, 0), + MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0), }; static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = { @@ -1712,6 +1713,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = { .mode1_reset = smu_v13_0_mode1_reset, .set_mp1_state = smu_v13_0_7_set_mp1_state, .set_df_cstate = smu_v13_0_7_set_df_cstate, + .gpo_control = smu_v13_0_gpo_control, }; void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) From 6ef4543f1f3b21b108021f3dec9bd02bf9f200ee Mon Sep 17 00:00:00 2001 From: Yifan Zhang Date: Wed, 12 Oct 2022 12:54:52 +0800 Subject: [PATCH 079/177] drm/amdgpu: add soc21 common ip block support for GC 11.0.4 [ Upstream commit 311d52367d0a7985ee1132662bad46f09169eed2 ] Add common soc21 ip block support for GC 11.0.4. Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Stable-dep-of: e1d900df63ad ("drm/amdgpu: enable VCN DPG for GC IP v11.0.4") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc21.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index f1913d879811..26f1e4edb4d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -653,6 +653,12 @@ static int soc21_common_early_init(void *handle) } adev->external_rev_id = adev->rev_id + 0x20; break; + case IP_VERSION(11, 0, 4): + adev->cg_flags = 0; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x1; + break; + default: /* FIXME: not supported yet */ return -EINVAL; From e30be0ada6e2a55e8581aa55860afc6cf5183300 Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Tue, 8 Nov 2022 11:24:48 +0530 Subject: [PATCH 080/177] drm/amdgpu: Enable pg/cg flags on GC11_0_4 for VCN [ Upstream commit 2a0fe2ca6e9c9bf9c47a9f9f0d67c13281a13f8c ] This enable VCN PG, CG and JPEG PG, CG Signed-off-by: Saleemkhan Jamadar Reviewed-by: Leo Liu Signed-off-by: Yifan Zhang Reviewed-by: Aaron Liu Signed-off-by: Alex Deucher Stable-dep-of: e1d900df63ad ("drm/amdgpu: enable VCN DPG for GC IP v11.0.4") Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc21.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 26f1e4edb4d5..599ddc28d8e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -654,8 +654,11 @@ static int soc21_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x20; break; case IP_VERSION(11, 0, 4): - adev->cg_flags = 0; - adev->pg_flags = 0; + adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x1; break; From 309278736f35f0937d6338d5986a6a4a48e90d2f Mon Sep 17 00:00:00 2001 From: Saleemkhan Jamadar Date: Tue, 20 Dec 2022 13:21:44 +0530 Subject: [PATCH 081/177] drm/amdgpu: enable VCN DPG for GC IP v11.0.4 [ Upstream commit e1d900df63adcb748905131dd6258e570e11aed1 ] Enable VCN Dynamic Power Gating control for GC IP v11.0.4. Signed-off-by: Saleemkhan Jamadar Reviewed-by: Veerabadhran Gopalakrishnan Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org # 6.0, 6.1 Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/soc21.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 599ddc28d8e1..909cf9f220c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -657,6 +657,7 @@ static int soc21_common_early_init(void *handle) adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x1; From 68a6f7dbf8a4fd4be9a99892ee3a5809c99778a7 Mon Sep 17 00:00:00 2001 From: Aaron Thompson Date: Fri, 6 Jan 2023 22:22:44 +0000 Subject: [PATCH 082/177] mm: Always release pages to the buddy allocator in memblock_free_late(). commit 115d9d77bb0f9152c60b6e8646369fa7f6167593 upstream. If CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, memblock_free_pages() only releases pages to the buddy allocator if they are not in the deferred range. This is correct for free pages (as defined by for_each_free_mem_pfn_range_in_zone()) because free pages in the deferred range will be initialized and released as part of the deferred init process. memblock_free_pages() is called by memblock_free_late(), which is used to free reserved ranges after memblock_free_all() has run. All pages in reserved ranges have been initialized at that point, and accordingly, those pages are not touched by the deferred init process. This means that currently, if the pages that memblock_free_late() intends to release are in the deferred range, they will never be released to the buddy allocator. They will forever be reserved. In addition, memblock_free_pages() calls kmsan_memblock_free_pages(), which is also correct for free pages but is not correct for reserved pages. KMSAN metadata for reserved pages is initialized by kmsan_init_shadow(), which runs shortly before memblock_free_all(). For both of these reasons, memblock_free_pages() should only be called for free pages, and memblock_free_late() should call __free_pages_core() directly instead. One case where this issue can occur in the wild is EFI boot on x86_64. The x86 EFI code reserves all EFI boot services memory ranges via memblock_reserve() and frees them later via memblock_free_late() (efi_reserve_boot_services() and efi_free_boot_services(), respectively). If any of those ranges happens to fall within the deferred init range, the pages will not be released and that memory will be unavailable. For example, on an Amazon EC2 t3.micro VM (1 GB) booting via EFI: v6.2-rc2: # grep -E 'Node|spanned|present|managed' /proc/zoneinfo Node 0, zone DMA spanned 4095 present 3999 managed 3840 Node 0, zone DMA32 spanned 246652 present 245868 managed 178867 v6.2-rc2 + patch: # grep -E 'Node|spanned|present|managed' /proc/zoneinfo Node 0, zone DMA spanned 4095 present 3999 managed 3840 Node 0, zone DMA32 spanned 246652 present 245868 managed 222816 # +43,949 pages Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set") Signed-off-by: Aaron Thompson Link: https://lore.kernel.org/r/01010185892de53e-e379acfb-7044-4b24-b30a-e2657c1ba989-000000@us-west-2.amazonses.com Signed-off-by: Mike Rapoport (IBM) Signed-off-by: Greg Kroah-Hartman --- mm/memblock.c | 8 +++++++- tools/testing/memblock/internal.h | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index 511d4783dcf1..fc3d8fbd2060 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1640,7 +1640,13 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size) end = PFN_DOWN(base + size); for (; cursor < end; cursor++) { - memblock_free_pages(pfn_to_page(cursor), cursor, 0); + /* + * Reserved pages are always initialized by the end of + * memblock_free_all() (by memmap_init() and, if deferred + * initialization is enabled, memmap_init_reserved_pages()), so + * these pages can be released directly to the buddy allocator. + */ + __free_pages_core(pfn_to_page(cursor), 0); totalram_pages_inc(); } } diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index fdb7f5db7308..85973e55489e 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -15,6 +15,10 @@ bool mirrored_kernelcore = false; struct page {}; +void __free_pages_core(struct page *page, unsigned int order) +{ +} + void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order) { From 61cbf790e7329ed78877560be7136f0b911bba7f Mon Sep 17 00:00:00 2001 From: Yunfei Wang Date: Wed, 11 Jan 2023 14:38:00 +0800 Subject: [PATCH 083/177] iommu/iova: Fix alloc iova overflows issue commit dcdb3ba7e2a8caae7bfefd603bc22fd0ce9a389c upstream. In __alloc_and_insert_iova_range, there is an issue that retry_pfn overflows. The value of iovad->anchor.pfn_hi is ~0UL, then when iovad->cached_node is iovad->anchor, curr_iova->pfn_hi + 1 will overflow. As a result, if the retry logic is executed, low_pfn is updated to 0, and then new_pfn < low_pfn returns false to make the allocation successful. This issue occurs in the following two situations: 1. The first iova size exceeds the domain size. When initializing iova domain, iovad->cached_node is assigned as iovad->anchor. For example, the iova domain size is 10M, start_pfn is 0x1_F000_0000, and the iova size allocated for the first time is 11M. The following is the log information, new->pfn_lo is smaller than iovad->cached_node. Example log as follows: [ 223.798112][T1705487] sh: [name:iova&]__alloc_and_insert_iova_range start_pfn:0x1f0000,retry_pfn:0x0,size:0xb00,limit_pfn:0x1f0a00 [ 223.799590][T1705487] sh: [name:iova&]__alloc_and_insert_iova_range success start_pfn:0x1f0000,new->pfn_lo:0x1efe00,new->pfn_hi:0x1f08ff 2. The node with the largest iova->pfn_lo value in the iova domain is deleted, iovad->cached_node will be updated to iovad->anchor, and then the alloc iova size exceeds the maximum iova size that can be allocated in the domain. After judging that retry_pfn is less than limit_pfn, call retry_pfn+1 to fix the overflow issue. Signed-off-by: jianjiao zeng Signed-off-by: Yunfei Wang Cc: # 5.15.* Fixes: 4e89dce72521 ("iommu/iova: Retry from last rb tree node if iova search fails") Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20230111063801.25107-1-yf.wang@mediatek.com Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/iova.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index a44ad92fc5eb..fe452ce46642 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -197,7 +197,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, curr = __get_cached_rbnode(iovad, limit_pfn); curr_iova = to_iova(curr); - retry_pfn = curr_iova->pfn_hi + 1; + retry_pfn = curr_iova->pfn_hi; retry: do { @@ -211,7 +211,7 @@ retry: if (high_pfn < size || new_pfn < low_pfn) { if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) { high_pfn = limit_pfn; - low_pfn = retry_pfn; + low_pfn = retry_pfn + 1; curr = iova_find_limit(iovad, limit_pfn); curr_iova = to_iova(curr); goto retry; From ead3e6c79479890444c777fd329afc125fecde48 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Dec 2022 16:12:51 +0200 Subject: [PATCH 084/177] iommu/arm-smmu-v3: Don't unregister on shutdown commit 32ea2c57dc216b6ad8125fa680d31daa5d421c95 upstream. Similar to SMMUv2, this driver calls iommu_device_unregister() from the shutdown path, which removes the IOMMU groups with no coordination whatsoever with their users - shutdown methods are optional in device drivers. This can lead to NULL pointer dereferences in those drivers' DMA API calls, or worse. Instead of calling the full arm_smmu_device_remove() from arm_smmu_device_shutdown(), let's pick only the relevant function call - arm_smmu_device_disable() - more or less the reverse of arm_smmu_device_reset() - and call just that from the shutdown path. Fixes: 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") Suggested-by: Robin Murphy Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20221215141251.3688780-2-vladimir.oltean@nxp.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6d5df91c5c46..d4d8bfee9feb 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3854,7 +3854,9 @@ static int arm_smmu_device_remove(struct platform_device *pdev) static void arm_smmu_device_shutdown(struct platform_device *pdev) { - arm_smmu_device_remove(pdev); + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); + + arm_smmu_device_disable(smmu); } static const struct of_device_id arm_smmu_of_match[] = { From abebd865a8964182fc9b170fd40391565126b305 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 19 Dec 2022 19:06:22 +0100 Subject: [PATCH 085/177] iommu/mediatek-v1: Fix an error handling path in mtk_iommu_v1_probe() commit 142e821f68cf5da79ce722cb9c1323afae30e185 upstream. A clk, prepared and enabled in mtk_iommu_v1_hw_init(), is not released in the error handling path of mtk_iommu_v1_probe(). Add the corresponding clk_disable_unprepare(), as already done in the remove function. Fixes: b17336c55d89 ("iommu/mediatek: add support for mtk iommu generation one HW") Signed-off-by: Christophe JAILLET Reviewed-by: Yong Wu Reviewed-by: AngeloGioacchino Del Regno Reviewed-by: Matthias Brugger Link: https://lore.kernel.org/r/593e7b7d97c6e064b29716b091a9d4fd122241fb.1671473163.git.christophe.jaillet@wanadoo.fr Signed-off-by: Joerg Roedel Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/mtk_iommu_v1.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 6e0e65831eb7..a978220eb620 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -685,7 +685,7 @@ static int mtk_iommu_v1_probe(struct platform_device *pdev) ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL, dev_name(&pdev->dev)); if (ret) - return ret; + goto out_clk_unprepare; ret = iommu_device_register(&data->iommu, &mtk_iommu_v1_ops, dev); if (ret) @@ -700,6 +700,8 @@ out_dev_unreg: iommu_device_unregister(&data->iommu); out_sysfs_remove: iommu_device_sysfs_remove(&data->iommu); +out_clk_unprepare: + clk_disable_unprepare(data->bclk); return ret; } From a1b9c7b1978aacf4b2f33e34bde1e2bb80b8497a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 15 Dec 2022 16:12:50 +0200 Subject: [PATCH 086/177] iommu/arm-smmu: Don't unregister on shutdown commit ce31e6ca68bd7639bd3e5ef97be215031842bbab upstream. Michael Walle says he noticed the following stack trace while performing a shutdown with "reboot -f". He suggests he got "lucky" and just hit the correct spot for the reboot while there was a packet transmission in flight. Unable to handle kernel NULL pointer dereference at virtual address 0000000000000098 CPU: 0 PID: 23 Comm: kworker/0:1 Not tainted 6.1.0-rc5-00088-gf3600ff8e322 #1930 Hardware name: Kontron KBox A-230-LS (DT) pc : iommu_get_dma_domain+0x14/0x20 lr : iommu_dma_map_page+0x9c/0x254 Call trace: iommu_get_dma_domain+0x14/0x20 dma_map_page_attrs+0x1ec/0x250 enetc_start_xmit+0x14c/0x10b0 enetc_xmit+0x60/0xdc dev_hard_start_xmit+0xb8/0x210 sch_direct_xmit+0x11c/0x420 __dev_queue_xmit+0x354/0xb20 ip6_finish_output2+0x280/0x5b0 __ip6_finish_output+0x15c/0x270 ip6_output+0x78/0x15c NF_HOOK.constprop.0+0x50/0xd0 mld_sendpack+0x1bc/0x320 mld_ifc_work+0x1d8/0x4dc process_one_work+0x1e8/0x460 worker_thread+0x178/0x534 kthread+0xe0/0xe4 ret_from_fork+0x10/0x20 Code: d503201f f9416800 d503233f d50323bf (f9404c00) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt This appears to be reproducible when the board has a fixed IP address, is ping flooded from another host, and "reboot -f" is used. The following is one more manifestation of the issue: $ reboot -f kvm: exiting hardware virtualization cfg80211: failed to load regulatory.db arm-smmu 5000000.iommu: disabling translation sdhci-esdhc 2140000.mmc: Removing from iommu group 11 sdhci-esdhc 2150000.mmc: Removing from iommu group 12 fsl-edma 22c0000.dma-controller: Removing from iommu group 17 dwc3 3100000.usb: Removing from iommu group 9 dwc3 3110000.usb: Removing from iommu group 10 ahci-qoriq 3200000.sata: Removing from iommu group 2 fsl-qdma 8380000.dma-controller: Removing from iommu group 20 platform f080000.display: Removing from iommu group 0 etnaviv-gpu f0c0000.gpu: Removing from iommu group 1 etnaviv etnaviv: Removing from iommu group 1 caam_jr 8010000.jr: Removing from iommu group 13 caam_jr 8020000.jr: Removing from iommu group 14 caam_jr 8030000.jr: Removing from iommu group 15 caam_jr 8040000.jr: Removing from iommu group 16 fsl_enetc 0000:00:00.0: Removing from iommu group 4 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x80000002, GFSYNR0 0x00000002, GFSYNR1 0x00000429, GFSYNR2 0x00000000 fsl_enetc 0000:00:00.1: Removing from iommu group 5 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x80000002, GFSYNR0 0x00000002, GFSYNR1 0x00000429, GFSYNR2 0x00000000 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x80000002, GFSYNR0 0x00000000, GFSYNR1 0x00000429, GFSYNR2 0x00000000 fsl_enetc 0000:00:00.2: Removing from iommu group 6 fsl_enetc_mdio 0000:00:00.3: Removing from iommu group 8 mscc_felix 0000:00:00.5: Removing from iommu group 3 fsl_enetc 0000:00:00.6: Removing from iommu group 7 pcieport 0001:00:00.0: Removing from iommu group 18 arm-smmu 5000000.iommu: Blocked unknown Stream ID 0x429; boot with "arm-smmu.disable_bypass=0" to allow, but this may have security implications arm-smmu 5000000.iommu: GFSR 0x00000002, GFSYNR0 0x00000000, GFSYNR1 0x00000429, GFSYNR2 0x00000000 pcieport 0002:00:00.0: Removing from iommu group 19 Unable to handle kernel NULL pointer dereference at virtual address 00000000000000a8 pc : iommu_get_dma_domain+0x14/0x20 lr : iommu_dma_unmap_page+0x38/0xe0 Call trace: iommu_get_dma_domain+0x14/0x20 dma_unmap_page_attrs+0x38/0x1d0 enetc_unmap_tx_buff.isra.0+0x6c/0x80 enetc_poll+0x170/0x910 __napi_poll+0x40/0x1e0 net_rx_action+0x164/0x37c __do_softirq+0x128/0x368 run_ksoftirqd+0x68/0x90 smpboot_thread_fn+0x14c/0x190 Code: d503201f f9416800 d503233f d50323bf (f9405400) ---[ end trace 0000000000000000 ]--- Kernel panic - not syncing: Oops: Fatal exception in interrupt ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- The problem seems to be that iommu_group_remove_device() is allowed to run with no coordination whatsoever with the shutdown procedure of the enetc PCI device. In fact, it almost seems as if it implies that the pci_driver :: shutdown() method is mandatory if DMA is used with an IOMMU, otherwise this is inevitable. That was never the case; shutdown methods are optional in device drivers. This is the call stack that leads to iommu_group_remove_device() during reboot: kernel_restart -> device_shutdown -> platform_shutdown -> arm_smmu_device_shutdown -> arm_smmu_device_remove -> iommu_device_unregister -> bus_for_each_dev -> remove_iommu_group -> iommu_release_device -> iommu_group_remove_device I don't know much about the arm_smmu driver, but arm_smmu_device_shutdown() invoking arm_smmu_device_remove() looks suspicious, since it causes the IOMMU device to unregister and that's where everything starts to unravel. It forces all other devices which depend on IOMMU groups to also point their ->shutdown() to ->remove(), which will make reboot slower overall. There are 2 moments relevant to this behavior. First was commit b06c076ea962 ("Revert "iommu/arm-smmu: Make arm-smmu explicitly non-modular"") when arm_smmu_device_shutdown() was made to run the exact same thing as arm_smmu_device_remove(). Prior to that, there was no iommu_device_unregister() call in arm_smmu_device_shutdown(). However, that was benign until commit 57365a04c921 ("iommu: Move bus setup to IOMMU device registration"), which made iommu_device_unregister() call remove_iommu_group(). Restore the old shutdown behavior by making remove() call shutdown(), but shutdown() does not call the remove() specific bits. Fixes: 57365a04c921 ("iommu: Move bus setup to IOMMU device registration") Reported-by: Michael Walle Tested-by: Michael Walle # on kontron-sl28 Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20221215141251.3688780-1-vladimir.oltean@nxp.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 30dab1418e3f..b2cf0871a5c0 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -2188,19 +2188,16 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return 0; } -static int arm_smmu_device_remove(struct platform_device *pdev) +static void arm_smmu_device_shutdown(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); if (!smmu) - return -ENODEV; + return; if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS)) dev_notice(&pdev->dev, "disabling translation\n"); - iommu_device_unregister(&smmu->iommu); - iommu_device_sysfs_remove(&smmu->iommu); - arm_smmu_rpm_get(smmu); /* Turn the thing off */ arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD); @@ -2212,12 +2209,21 @@ static int arm_smmu_device_remove(struct platform_device *pdev) clk_bulk_disable(smmu->num_clks, smmu->clks); clk_bulk_unprepare(smmu->num_clks, smmu->clks); - return 0; } -static void arm_smmu_device_shutdown(struct platform_device *pdev) +static int arm_smmu_device_remove(struct platform_device *pdev) { - arm_smmu_device_remove(pdev); + struct arm_smmu_device *smmu = platform_get_drvdata(pdev); + + if (!smmu) + return -ENODEV; + + iommu_device_unregister(&smmu->iommu); + iommu_device_sysfs_remove(&smmu->iommu); + + arm_smmu_device_shutdown(pdev); + + return 0; } static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) From bad1a2194485fa7c8d727c06d26c70676901db8a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 15 Dec 2022 16:51:55 +0000 Subject: [PATCH 087/177] iommu/arm-smmu: Report IOMMU_CAP_CACHE_COHERENCY even betterer commit ac9c5e92dd15b9927e7355ccf79df76a58b44344 upstream. Although it's vanishingly unlikely that anyone would integrate an SMMU within a coherent interconnect without also making the pagetable walk interface coherent, the same effect happens if a coherent SMMU fails to advertise CTTW correctly. This turns out to be the case on some popular NXP SoCs, where VFIO started failing the IOMMU_CAP_CACHE_COHERENCY test, even though IOMMU_CACHE *was* previously achieving the desired effect anyway thanks to the underlying integration. While those SoCs stand to gain some more general benefits from a firmware update to override CTTW correctly in DT/ACPI, it's also easy to work around this in Linux as well, to avoid imposing too much on affected users - since the upstream client devices *are* correctly marked as coherent, we can trivially infer their coherent paths through the SMMU as well. Reported-by: Vladimir Oltean Fixes: df198b37e72c ("iommu/arm-smmu: Report IOMMU_CAP_CACHE_COHERENCY better") Signed-off-by: Robin Murphy Tested-by: Vladimir Oltean Link: https://lore.kernel.org/r/d6dc41952961e5c7b21acac08a8bf1eb0f69e124.1671123115.git.robin.murphy@arm.com Signed-off-by: Will Deacon Signed-off-by: Greg Kroah-Hartman --- drivers/iommu/arm/arm-smmu/arm-smmu.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index b2cf0871a5c0..f38b54a88767 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1319,8 +1319,14 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: - /* Assume that a coherent TCU implies coherent TBUs */ - return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK; + /* + * It's overwhelmingly the case in practice that when the pagetable + * walk interface is connected to a coherent interconnect, all the + * translation interfaces are too. Furthermore if the device is + * natively coherent, then its translation interface must also be. + */ + return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK || + device_get_dma_attr(dev) == DEV_DMA_COHERENT; case IOMMU_CAP_NOEXEC: return true; default: From 7b5cc7fd1789ea5dbb942c9f8207b076d365badc Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Fri, 30 Dec 2022 23:11:19 -0500 Subject: [PATCH 088/177] sched/core: Fix use-after-free bug in dup_user_cpus_ptr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 87ca4f9efbd7cc649ff43b87970888f2812945b8 upstream. Since commit 07ec77a1d4e8 ("sched: Allow task CPU affinity to be restricted on asymmetric systems"), the setting and clearing of user_cpus_ptr are done under pi_lock for arm64 architecture. However, dup_user_cpus_ptr() accesses user_cpus_ptr without any lock protection. Since sched_setaffinity() can be invoked from another process, the process being modified may be undergoing fork() at the same time. When racing with the clearing of user_cpus_ptr in __set_cpus_allowed_ptr_locked(), it can lead to user-after-free and possibly double-free in arm64 kernel. Commit 8f9ea86fdf99 ("sched: Always preserve the user requested cpumask") fixes this problem as user_cpus_ptr, once set, will never be cleared in a task's lifetime. However, this bug was re-introduced in commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") which allows the clearing of user_cpus_ptr in do_set_cpus_allowed(). This time, it will affect all arches. Fix this bug by always clearing the user_cpus_ptr of the newly cloned/forked task before the copying process starts and check the user_cpus_ptr state of the source task under pi_lock. Note to stable, this patch won't be applicable to stable releases. Just copy the new dup_user_cpus_ptr() function over. Fixes: 07ec77a1d4e8 ("sched: Allow task CPU affinity to be restricted on asymmetric systems") Fixes: 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") Reported-by: David Wang 王标 Signed-off-by: Waiman Long Signed-off-by: Ingo Molnar Reviewed-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221231041120.440785-2-longman@redhat.com Signed-off-by: Greg Kroah-Hartman --- kernel/sched/core.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 535af9fbea7b..981e41cc4121 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2587,14 +2587,43 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node) { - if (!src->user_cpus_ptr) + cpumask_t *user_mask; + unsigned long flags; + + /* + * Always clear dst->user_cpus_ptr first as their user_cpus_ptr's + * may differ by now due to racing. + */ + dst->user_cpus_ptr = NULL; + + /* + * This check is racy and losing the race is a valid situation. + * It is not worth the extra overhead of taking the pi_lock on + * every fork/clone. + */ + if (data_race(!src->user_cpus_ptr)) return 0; - dst->user_cpus_ptr = kmalloc_node(cpumask_size(), GFP_KERNEL, node); - if (!dst->user_cpus_ptr) + user_mask = kmalloc_node(cpumask_size(), GFP_KERNEL, node); + if (!user_mask) return -ENOMEM; - cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + /* + * Use pi_lock to protect content of user_cpus_ptr + * + * Though unlikely, user_cpus_ptr can be reset to NULL by a concurrent + * do_set_cpus_allowed(). + */ + raw_spin_lock_irqsave(&src->pi_lock, flags); + if (src->user_cpus_ptr) { + swap(dst->user_cpus_ptr, user_mask); + cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr); + } + raw_spin_unlock_irqrestore(&src->pi_lock, flags); + + if (unlikely(user_mask)) + kfree(user_mask); + return 0; } From e88865876d47c790be0d5e23973499d75d034364 Mon Sep 17 00:00:00 2001 From: Gavrilov Ilia Date: Wed, 11 Jan 2023 11:57:39 +0000 Subject: [PATCH 089/177] netfilter: ipset: Fix overflow before widen in the bitmap_ip_create() function. commit 9ea4b476cea1b7d461d16dda25ca3c7e616e2d15 upstream. When first_ip is 0, last_ip is 0xFFFFFFFF, and netmask is 31, the value of an arithmetic expression 2 << (netmask - mask_bits - 1) is subject to overflow due to a failure casting operands to a larger data type before performing the arithmetic. Note that it's harmless since the value will be checked at the next step. Found by InfoTeCS on behalf of Linux Verification Center (linuxtesting.org) with SVACE. Fixes: b9fed748185a ("netfilter: ipset: Check and reject crazy /0 input parameters") Signed-off-by: Ilia.Gavrilov Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/ipset/ip_set_bitmap_ip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index a8ce04a4bb72..e4fa00abde6a 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -308,8 +308,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -IPSET_ERR_BITMAP_RANGE; pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask); - hosts = 2 << (32 - netmask - 1); - elements = 2 << (netmask - mask_bits - 1); + hosts = 2U << (32 - netmask - 1); + elements = 2UL << (netmask - mask_bits - 1); } if (elements > IPSET_BITMAP_MAX_RANGE + 1) return -IPSET_ERR_BITMAP_RANGE_SIZE; From 213b22abdf8f4892e86c07e65d8deb6c3e77c939 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Jan 2023 12:54:42 +0100 Subject: [PATCH 090/177] selftests: netfilter: fix transaction test script timeout handling commit c273289fac370b6488757236cd62cc2cf04830b7 upstream. The kselftest framework uses a default timeout of 45 seconds for all test scripts. Increase the timeout to two minutes for the netfilter tests, this should hopefully be enough, Make sure that, should the script be canceled, the net namespace and the spawned ping instances are removed. Fixes: 25d8bcedbf43 ("selftests: add script to stress-test nft packet path vs. control plane") Reported-by: Mirsad Goran Todorovac Signed-off-by: Florian Westphal Tested-by: Mirsad Goran Todorovac Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- .../selftests/netfilter/nft_trans_stress.sh | 16 +++++++++------- tools/testing/selftests/netfilter/settings | 1 + 2 files changed, 10 insertions(+), 7 deletions(-) create mode 100644 tools/testing/selftests/netfilter/settings diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh index a7f62ad4f661..2ffba45a78bf 100755 --- a/tools/testing/selftests/netfilter/nft_trans_stress.sh +++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh @@ -10,12 +10,20 @@ ksft_skip=4 testns=testns-$(mktemp -u "XXXXXXXX") +tmp="" tables="foo bar baz quux" global_ret=0 eret=0 lret=0 +cleanup() { + ip netns pids "$testns" | xargs kill 2>/dev/null + ip netns del "$testns" + + rm -f "$tmp" +} + check_result() { local r=$1 @@ -43,6 +51,7 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +trap cleanup EXIT tmp=$(mktemp) for table in $tables; do @@ -139,11 +148,4 @@ done check_result $lret "add/delete with nftrace enabled" -pkill -9 ping - -wait - -rm -f "$tmp" -ip netns del "$testns" - exit $global_ret diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/netfilter/settings @@ -0,0 +1 @@ +timeout=120 From 424bcb570cb320d1d15238cd4c933522b90f78fa Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Fri, 6 Jan 2023 12:21:57 +0530 Subject: [PATCH 091/177] powerpc/imc-pmu: Fix use of mutex in IRQs disabled section commit 76d588dddc459fefa1da96e0a081a397c5c8e216 upstream. Current imc-pmu code triggers a WARNING with CONFIG_DEBUG_ATOMIC_SLEEP and CONFIG_PROVE_LOCKING enabled, while running a thread_imc event. Command to trigger the warning: # perf stat -e thread_imc/CPM_CS_FROM_L4_MEM_X_DPTEG/ sleep 5 Performance counter stats for 'sleep 5': 0 thread_imc/CPM_CS_FROM_L4_MEM_X_DPTEG/ 5.002117947 seconds time elapsed 0.000131000 seconds user 0.001063000 seconds sys Below is snippet of the warning in dmesg: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580 in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2869, name: perf-exec preempt_count: 2, expected: 0 4 locks held by perf-exec/2869: #0: c00000004325c540 (&sig->cred_guard_mutex){+.+.}-{3:3}, at: bprm_execve+0x64/0xa90 #1: c00000004325c5d8 (&sig->exec_update_lock){++++}-{3:3}, at: begin_new_exec+0x460/0xef0 #2: c0000003fa99d4e0 (&cpuctx_lock){-...}-{2:2}, at: perf_event_exec+0x290/0x510 #3: c000000017ab8418 (&ctx->lock){....}-{2:2}, at: perf_event_exec+0x29c/0x510 irq event stamp: 4806 hardirqs last enabled at (4805): [] _raw_spin_unlock_irqrestore+0x94/0xd0 hardirqs last disabled at (4806): [] perf_event_exec+0x394/0x510 softirqs last enabled at (0): [] copy_process+0xc34/0x1ff0 softirqs last disabled at (0): [<0000000000000000>] 0x0 CPU: 36 PID: 2869 Comm: perf-exec Not tainted 6.2.0-rc2-00011-g1247637727f2 #61 Hardware name: 8375-42A POWER9 0x4e1202 opal:v7.0-16-g9b85f7d961 PowerNV Call Trace: dump_stack_lvl+0x98/0xe0 (unreliable) __might_resched+0x2f8/0x310 __mutex_lock+0x6c/0x13f0 thread_imc_event_add+0xf4/0x1b0 event_sched_in+0xe0/0x210 merge_sched_in+0x1f0/0x600 visit_groups_merge.isra.92.constprop.166+0x2bc/0x6c0 ctx_flexible_sched_in+0xcc/0x140 ctx_sched_in+0x20c/0x2a0 ctx_resched+0x104/0x1c0 perf_event_exec+0x340/0x510 begin_new_exec+0x730/0xef0 load_elf_binary+0x3f8/0x1e10 ... do not call blocking ops when !TASK_RUNNING; state=2001 set at [<00000000fd63e7cf>] do_nanosleep+0x60/0x1a0 WARNING: CPU: 36 PID: 2869 at kernel/sched/core.c:9912 __might_sleep+0x9c/0xb0 CPU: 36 PID: 2869 Comm: sleep Tainted: G W 6.2.0-rc2-00011-g1247637727f2 #61 Hardware name: 8375-42A POWER9 0x4e1202 opal:v7.0-16-g9b85f7d961 PowerNV NIP: c000000000194a1c LR: c000000000194a18 CTR: c000000000a78670 REGS: c00000004d2134e0 TRAP: 0700 Tainted: G W (6.2.0-rc2-00011-g1247637727f2) MSR: 9000000000021033 CR: 48002824 XER: 00000000 CFAR: c00000000013fb64 IRQMASK: 1 The above warning triggered because the current imc-pmu code uses mutex lock in interrupt disabled sections. The function mutex_lock() internally calls __might_resched(), which will check if IRQs are disabled and in case IRQs are disabled, it will trigger the warning. Fix the issue by changing the mutex lock to spinlock. Fixes: 8f95faaac56c ("powerpc/powernv: Detect and create IMC device") Reported-by: Michael Petlan Reported-by: Peter Zijlstra Signed-off-by: Kajol Jain [mpe: Fix comments, trim oops in change log, add reported-by tags] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20230106065157.182648-1-kjain@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/include/asm/imc-pmu.h | 2 +- arch/powerpc/perf/imc-pmu.c | 136 ++++++++++++++--------------- 2 files changed, 67 insertions(+), 71 deletions(-) diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 4f897993b710..699a88584ae1 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -137,7 +137,7 @@ struct imc_pmu { * are inited. */ struct imc_pmu_ref { - struct mutex lock; + spinlock_t lock; unsigned int id; int refc; }; diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index d517aba94d1b..100e97daf76b 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -14,6 +14,7 @@ #include #include #include +#include /* Nest IMC data structures and variables */ @@ -21,7 +22,7 @@ * Used to avoid races in counting the nest-pmu units during hotplug * register and unregister */ -static DEFINE_MUTEX(nest_init_lock); +static DEFINE_SPINLOCK(nest_init_lock); static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); static struct imc_pmu **per_nest_pmu_arr; static cpumask_t nest_imc_cpumask; @@ -50,7 +51,7 @@ static int trace_imc_mem_size; * core and trace-imc */ static struct imc_pmu_ref imc_global_refc = { - .lock = __MUTEX_INITIALIZER(imc_global_refc.lock), + .lock = __SPIN_LOCK_INITIALIZER(imc_global_refc.lock), .id = 0, .refc = 0, }; @@ -400,7 +401,7 @@ static int ppc_nest_imc_cpu_offline(unsigned int cpu) get_hard_smp_processor_id(cpu)); /* * If this is the last cpu in this chip then, skip the reference - * count mutex lock and make the reference count on this chip zero. + * count lock and make the reference count on this chip zero. */ ref = get_nest_pmu_ref(cpu); if (!ref) @@ -462,15 +463,15 @@ static void nest_imc_counters_release(struct perf_event *event) /* * See if we need to disable the nest PMU. * If no events are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing + * lock to ensure that we don't race with another task doing * enable or disable the nest counters. */ ref = get_nest_pmu_ref(event->cpu); if (!ref) return; - /* Take the mutex lock for this node and then decrement the reference count */ - mutex_lock(&ref->lock); + /* Take the lock for this node and then decrement the reference count */ + spin_lock(&ref->lock); if (ref->refc == 0) { /* * The scenario where this is true is, when perf session is @@ -482,7 +483,7 @@ static void nest_imc_counters_release(struct perf_event *event) * an OPAL call to disable the engine in that node. * */ - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return; } ref->refc--; @@ -490,7 +491,7 @@ static void nest_imc_counters_release(struct perf_event *event) rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id); return; } @@ -498,7 +499,7 @@ static void nest_imc_counters_release(struct perf_event *event) WARN(1, "nest-imc: Invalid event reference count\n"); ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); } static int nest_imc_event_init(struct perf_event *event) @@ -557,26 +558,25 @@ static int nest_imc_event_init(struct perf_event *event) /* * Get the imc_pmu_ref struct for this node. - * Take the mutex lock and then increment the count of nest pmu events - * inited. + * Take the lock and then increment the count of nest pmu events inited. */ ref = get_nest_pmu_ref(event->cpu); if (!ref) return -EINVAL; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("nest-imc: Unable to start the counters for node %d\n", node_id); return rc; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); event->destroy = nest_imc_counters_release; return 0; @@ -612,9 +612,8 @@ static int core_imc_mem_init(int cpu, int size) return -ENOMEM; mem_info->vbase = page_address(page); - /* Init the mutex */ core_imc_refc[core_id].id = core_id; - mutex_init(&core_imc_refc[core_id].lock); + spin_lock_init(&core_imc_refc[core_id].lock); rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE, __pa((void *)mem_info->vbase), @@ -703,9 +702,8 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu); } else { /* - * If this is the last cpu in this core then, skip taking refernce - * count mutex lock for this core and directly zero "refc" for - * this core. + * If this is the last cpu in this core then skip taking reference + * count lock for this core and directly zero "refc" for this core. */ opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(cpu)); @@ -720,11 +718,11 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) * last cpu in this core and core-imc event running * in this cpu. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_CORE) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); } return 0; } @@ -739,7 +737,7 @@ static int core_imc_pmu_cpumask_init(void) static void reset_global_refc(struct perf_event *event) { - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); imc_global_refc.refc--; /* @@ -751,7 +749,7 @@ static void reset_global_refc(struct perf_event *event) imc_global_refc.refc = 0; imc_global_refc.id = 0; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); } static void core_imc_counters_release(struct perf_event *event) @@ -764,17 +762,17 @@ static void core_imc_counters_release(struct perf_event *event) /* * See if we need to disable the IMC PMU. * If no events are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing + * lock to ensure that we don't race with another task doing * enable or disable the core counters. */ core_id = event->cpu / threads_per_core; - /* Take the mutex lock and decrement the refernce count for this core */ + /* Take the lock and decrement the refernce count for this core */ ref = &core_imc_refc[core_id]; if (!ref) return; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { /* * The scenario where this is true is, when perf session is @@ -786,7 +784,7 @@ static void core_imc_counters_release(struct perf_event *event) * an OPAL call to disable the engine in that core. * */ - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return; } ref->refc--; @@ -794,7 +792,7 @@ static void core_imc_counters_release(struct perf_event *event) rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("IMC: Unable to stop the counters for core %d\n", core_id); return; } @@ -802,7 +800,7 @@ static void core_imc_counters_release(struct perf_event *event) WARN(1, "core-imc: Invalid event reference count\n"); ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); reset_global_refc(event); } @@ -840,7 +838,6 @@ static int core_imc_event_init(struct perf_event *event) if ((!pcmi->vbase)) return -ENODEV; - /* Get the core_imc mutex for this core */ ref = &core_imc_refc[core_id]; if (!ref) return -EINVAL; @@ -848,22 +845,22 @@ static int core_imc_event_init(struct perf_event *event) /* * Core pmu units are enabled only when it is used. * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the core counters. + * If yes, take the lock and enable the core counters. * If not, just increment the count in core_imc_refc struct. */ - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(event->cpu)); if (rc) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("core-imc: Unable to start the counters for core %d\n", core_id); return rc; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); /* * Since the system can run either in accumulation or trace-mode @@ -874,7 +871,7 @@ static int core_imc_event_init(struct perf_event *event) * to know whether any other trace/thread imc * events are running. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) { /* * No other trace/thread imc events are running in @@ -883,10 +880,10 @@ static int core_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_CORE; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); event->destroy = core_imc_counters_release; @@ -958,10 +955,10 @@ static int ppc_thread_imc_cpu_offline(unsigned int cpu) mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); /* Reduce the refc if thread-imc event running on this cpu */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_THREAD) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return 0; } @@ -1001,7 +998,7 @@ static int thread_imc_event_init(struct perf_event *event) if (!target) return -EINVAL; - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); /* * Check if any other trace/core imc events are running in the * system, if not set the global id to thread-imc. @@ -1010,10 +1007,10 @@ static int thread_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_THREAD; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->pmu->task_ctx_nr = perf_sw_context; event->destroy = reset_global_refc; @@ -1135,25 +1132,25 @@ static int thread_imc_event_add(struct perf_event *event, int flags) /* * imc pmus are enabled only when it is used. * See if this is triggered for the first time. - * If yes, take the mutex lock and enable the counters. + * If yes, take the lock and enable the counters. * If not, just increment the count in ref count struct. */ ref = &core_imc_refc[core_id]; if (!ref) return -EINVAL; - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("thread-imc: Unable to start the counter\ for core %d\n", core_id); return -EINVAL; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return 0; } @@ -1170,12 +1167,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags) return; } - mutex_lock(&ref->lock); + spin_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("thread-imc: Unable to stop the counters\ for core %d\n", core_id); return; @@ -1183,7 +1180,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags) } else if (ref->refc < 0) { ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); @@ -1224,9 +1221,8 @@ static int trace_imc_mem_alloc(int cpu_id, int size) } } - /* Init the mutex, if not already */ trace_imc_refc[core_id].id = core_id; - mutex_init(&trace_imc_refc[core_id].lock); + spin_lock_init(&trace_imc_refc[core_id].lock); mtspr(SPRN_LDBAR, 0); return 0; @@ -1246,10 +1242,10 @@ static int ppc_trace_imc_cpu_offline(unsigned int cpu) * Reduce the refc if any trace-imc event running * on this cpu. */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == IMC_DOMAIN_TRACE) imc_global_refc.refc--; - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return 0; } @@ -1371,17 +1367,17 @@ static int trace_imc_event_add(struct perf_event *event, int flags) } mtspr(SPRN_LDBAR, ldbar_value); - mutex_lock(&ref->lock); + spin_lock(&ref->lock); if (ref->refc == 0) { if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); return -EINVAL; } } ++ref->refc; - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); return 0; } @@ -1414,19 +1410,19 @@ static void trace_imc_event_del(struct perf_event *event, int flags) return; } - mutex_lock(&ref->lock); + spin_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE, get_hard_smp_processor_id(smp_processor_id()))) { - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id); return; } } else if (ref->refc < 0) { ref->refc = 0; } - mutex_unlock(&ref->lock); + spin_unlock(&ref->lock); trace_imc_event_stop(event, flags); } @@ -1448,7 +1444,7 @@ static int trace_imc_event_init(struct perf_event *event) * no other thread is running any core/thread imc * events */ - mutex_lock(&imc_global_refc.lock); + spin_lock(&imc_global_refc.lock); if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) { /* * No core/thread imc events are running in the @@ -1457,10 +1453,10 @@ static int trace_imc_event_init(struct perf_event *event) imc_global_refc.id = IMC_DOMAIN_TRACE; imc_global_refc.refc++; } else { - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); return -EBUSY; } - mutex_unlock(&imc_global_refc.lock); + spin_unlock(&imc_global_refc.lock); event->hw.idx = -1; @@ -1533,10 +1529,10 @@ static int init_nest_pmu_ref(void) i = 0; for_each_node(nid) { /* - * Mutex lock to avoid races while tracking the number of + * Take the lock to avoid races while tracking the number of * sessions using the chip's nest pmu units. */ - mutex_init(&nest_imc_refc[i].lock); + spin_lock_init(&nest_imc_refc[i].lock); /* * Loop to init the "id" with the node_id. Variable "i" initialized to @@ -1633,7 +1629,7 @@ static void imc_common_mem_free(struct imc_pmu *pmu_ptr) static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) { if (pmu_ptr->domain == IMC_DOMAIN_NEST) { - mutex_lock(&nest_init_lock); + spin_lock(&nest_init_lock); if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); @@ -1643,7 +1639,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus > 0) nest_pmus--; - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); } /* Free core_imc memory */ @@ -1800,11 +1796,11 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id * rest. To handle the cpuhotplug callback unregister, we track * the number of nest pmus in "nest_pmus". */ - mutex_lock(&nest_init_lock); + spin_lock(&nest_init_lock); if (nest_pmus == 0) { ret = init_nest_pmu_ref(); if (ret) { - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); kfree(per_nest_pmu_arr); per_nest_pmu_arr = NULL; goto err_free_mem; @@ -1812,7 +1808,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id /* Register for cpu hotplug notification. */ ret = nest_pmu_cpumask_init(); if (ret) { - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); kfree(nest_imc_refc); kfree(per_nest_pmu_arr); per_nest_pmu_arr = NULL; @@ -1820,7 +1816,7 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id } } nest_pmus++; - mutex_unlock(&nest_init_lock); + spin_unlock(&nest_init_lock); break; case IMC_DOMAIN_CORE: ret = core_imc_pmu_cpumask_init(); From b43d52eeca7d86e09ad683b570eb791d31625a44 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Jan 2023 12:15:40 +0100 Subject: [PATCH 092/177] x86/boot: Avoid using Intel mnemonics in AT&T syntax asm commit 7c6dd961d0c8e7e8f9fdc65071fb09ece702e18d upstream. With 'GNU assembler (GNU Binutils for Debian) 2.39.90.20221231' the build now reports: arch/x86/realmode/rm/../../boot/bioscall.S: Assembler messages: arch/x86/realmode/rm/../../boot/bioscall.S:35: Warning: found `movsd'; assuming `movsl' was meant arch/x86/realmode/rm/../../boot/bioscall.S:70: Warning: found `movsd'; assuming `movsl' was meant arch/x86/boot/bioscall.S: Assembler messages: arch/x86/boot/bioscall.S:35: Warning: found `movsd'; assuming `movsl' was meant arch/x86/boot/bioscall.S:70: Warning: found `movsd'; assuming `movsl' was meant Which is due to: PR gas/29525 Note that with the dropped CMPSD and MOVSD Intel Syntax string insn templates taking operands, mixed IsString/non-IsString template groups (with memory operands) cannot occur anymore. With that maybe_adjust_templates() becomes unnecessary (and is hence being removed). More details: https://sourceware.org/bugzilla/show_bug.cgi?id=29525 Borislav Petkov further explains: " the particular problem here is is that the 'd' suffix is "conflicting" in the sense that you can have SSE mnemonics like movsD %xmm... and the same thing also for string ops (which is the case here) so apparently the agreement in binutils land is to use the always accepted suffixes 'l' or 'q' and phase out 'd' slowly... " Fixes: 7a734e7dd93b ("x86, setup: "glove box" BIOS calls -- infrastructure") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/Y71I3Ex2pvIxMpsP@hirez.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/boot/bioscall.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S index 5521ea12f44e..aa9b96457584 100644 --- a/arch/x86/boot/bioscall.S +++ b/arch/x86/boot/bioscall.S @@ -32,7 +32,7 @@ intcall: movw %dx, %si movw %sp, %di movw $11, %cx - rep; movsd + rep; movsl /* Pop full state from the stack */ popal @@ -67,7 +67,7 @@ intcall: jz 4f movw %sp, %si movw $11, %cx - rep; movsd + rep; movsl 4: addw $44, %sp /* Restore state and return */ From 1dc8c5574cc2491016691b86a70fcfa4d05279ce Mon Sep 17 00:00:00 2001 From: Eliav Farber Date: Thu, 20 Oct 2022 12:44:58 +0000 Subject: [PATCH 093/177] EDAC/device: Fix period calculation in edac_device_reset_delay_period() commit e84077437902ec99eba0a6b516df772653f142c7 upstream. Fix period calculation in case user sets a value of 1000. The input of round_jiffies_relative() should be in jiffies and not in milli-seconds. [ bp: Use the same code pattern as in edac_device_workq_setup() for clarity. ] Fixes: c4cf3b454eca ("EDAC: Rework workqueue handling") Signed-off-by: Eliav Farber Signed-off-by: Borislav Petkov (AMD) Cc: Link: https://lore.kernel.org/r/20221020124458.22153-1-farbere@amazon.com Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_device.c | 17 ++++++++--------- drivers/edac/edac_module.h | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c index 19522c568aa5..878deb4880cd 100644 --- a/drivers/edac/edac_device.c +++ b/drivers/edac/edac_device.c @@ -394,17 +394,16 @@ static void edac_device_workq_teardown(struct edac_device_ctl_info *edac_dev) * Then restart the workq on the new delay */ void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, - unsigned long value) + unsigned long msec) { - unsigned long jiffs = msecs_to_jiffies(value); + edac_dev->poll_msec = msec; + edac_dev->delay = msecs_to_jiffies(msec); - if (value == 1000) - jiffs = round_jiffies_relative(value); - - edac_dev->poll_msec = value; - edac_dev->delay = jiffs; - - edac_mod_work(&edac_dev->work, jiffs); + /* See comment in edac_device_workq_setup() above */ + if (edac_dev->poll_msec == 1000) + edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); + else + edac_mod_work(&edac_dev->work, edac_dev->delay); } int edac_device_alloc_index(void) diff --git a/drivers/edac/edac_module.h b/drivers/edac/edac_module.h index 50ed9f2425bb..4ed24d664d83 100644 --- a/drivers/edac/edac_module.h +++ b/drivers/edac/edac_module.h @@ -52,7 +52,7 @@ bool edac_stop_work(struct delayed_work *work); bool edac_mod_work(struct delayed_work *work, unsigned long delay); extern void edac_device_reset_delay_period(struct edac_device_ctl_info - *edac_dev, unsigned long value); + *edac_dev, unsigned long msec); extern void edac_mc_reset_delay_period(unsigned long value); /* From c1c59538337ab6d45700cb4a1c9725e67f59bc6e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 10 Jan 2023 07:54:27 +0100 Subject: [PATCH 094/177] x86/pat: Fix pat_x_mtrr_type() for MTRR disabled case commit 90b926e68f500844dff16b5bcea178dc55cf580a upstream. Since 72cbc8f04fe2 ("x86/PAT: Have pat_enabled() properly reflect state when running on Xen") PAT can be enabled without MTRR. This has resulted in problems e.g. for a SEV-SNP guest running under Hyper-V, when trying to establish a new mapping via memremap() with WB caching mode, as pat_x_mtrr_type() will call mtrr_type_lookup(), which in turn is returning MTRR_TYPE_INVALID due to MTRR being disabled in this configuration. The result is a mapping with UC- caching, leading to severe performance degradation. Fix that by handling MTRR_TYPE_INVALID the same way as MTRR_TYPE_WRBACK in pat_x_mtrr_type() because MTRR_TYPE_INVALID means MTRRs are disabled. [ bp: Massage commit message. ] Fixes: 72cbc8f04fe2 ("x86/PAT: Have pat_enabled() properly reflect state when running on Xen") Reported-by: Michael Kelley (LINUX) Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Michael Kelley Tested-by: Michael Kelley Cc: Link: https://lore.kernel.org/r/20230110065427.20767-1-jgross@suse.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pat/memtype.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 66a209f7eb86..2642bc4c8ec0 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -434,7 +434,8 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, u8 mtrr_type, uniform; mtrr_type = mtrr_type_lookup(start, end, &uniform); - if (mtrr_type != MTRR_TYPE_WRBACK) + if (mtrr_type != MTRR_TYPE_WRBACK && + mtrr_type != MTRR_TYPE_INVALID) return _PAGE_CACHE_MODE_UC_MINUS; return _PAGE_CACHE_MODE_WB; From d01c6557478bfe10eeef280abae700def7de3ffd Mon Sep 17 00:00:00 2001 From: Peter Newman Date: Tue, 20 Dec 2022 17:11:23 +0100 Subject: [PATCH 095/177] x86/resctrl: Fix task CLOSID/RMID update race commit fe1f0714385fbcf76b0cbceb02b7277d842014fc upstream. When the user moves a running task to a new rdtgroup using the task's file interface or by deleting its rdtgroup, the resulting change in CLOSID/RMID must be immediately propagated to the PQR_ASSOC MSR on the task(s) CPUs. x86 allows reordering loads with prior stores, so if the task starts running between a task_curr() check that the CPU hoisted before the stores in the CLOSID/RMID update then it can start running with the old CLOSID/RMID until it is switched again because __rdtgroup_move_task() failed to determine that it needs to be interrupted to obtain the new CLOSID/RMID. Refer to the diagram below: CPU 0 CPU 1 ----- ----- __rdtgroup_move_task(): curr <- t1->cpu->rq->curr __schedule(): rq->curr <- t1 resctrl_sched_in(): t1->{closid,rmid} -> {1,1} t1->{closid,rmid} <- {2,2} if (curr == t1) // false IPI(t1->cpu) A similar race impacts rdt_move_group_tasks(), which updates tasks in a deleted rdtgroup. In both cases, use smp_mb() to order the task_struct::{closid,rmid} stores before the loads in task_curr(). In particular, in the rdt_move_group_tasks() case, simply execute an smp_mb() on every iteration with a matching task. It is possible to use a single smp_mb() in rdt_move_group_tasks(), but this would require two passes and a means of remembering which task_structs were updated in the first loop. However, benchmarking results below showed too little performance impact in the simple approach to justify implementing the two-pass approach. Times below were collected using `perf stat` to measure the time to remove a group containing a 1600-task, parallel workload. CPU: Intel(R) Xeon(R) Platinum P-8136 CPU @ 2.00GHz (112 threads) # mkdir /sys/fs/resctrl/test # echo $$ > /sys/fs/resctrl/test/tasks # perf bench sched messaging -g 40 -l 100000 task-clock time ranges collected using: # perf stat rmdir /sys/fs/resctrl/test Baseline: 1.54 - 1.60 ms smp_mb() every matching task: 1.57 - 1.67 ms [ bp: Massage commit message. ] Fixes: ae28d1aae48a ("x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR") Fixes: 0efc89be9471 ("x86/intel_rdt: Update task closid immediately on CPU in rmdir and unmount") Signed-off-by: Peter Newman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Reviewed-by: Babu Moger Cc: Link: https://lore.kernel.org/r/20221220161123.432120-1-peternewman@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index e5a48f05e787..5993da21d822 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -580,8 +580,10 @@ static int __rdtgroup_move_task(struct task_struct *tsk, /* * Ensure the task's closid and rmid are written before determining if * the task is current that will decide if it will be interrupted. + * This pairs with the full barrier between the rq->curr update and + * resctrl_sched_in() during context switch. */ - barrier(); + smp_mb(); /* * By now, the task's closid and rmid are set. If the task is current @@ -2401,6 +2403,14 @@ static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to, WRITE_ONCE(t->closid, to->closid); WRITE_ONCE(t->rmid, to->mon.rmid); + /* + * Order the closid/rmid stores above before the loads + * in task_curr(). This pairs with the full barrier + * between the rq->curr update and resctrl_sched_in() + * during context switch. + */ + smp_mb(); + /* * If the task is on a CPU, set the CPU in the mask. * The detection is inaccurate as tasks might move or From 07ac5db2d840e47428c871097809dc70e68f66c3 Mon Sep 17 00:00:00 2001 From: Peter Newman Date: Tue, 20 Dec 2022 17:41:31 +0100 Subject: [PATCH 096/177] x86/resctrl: Fix event counts regression in reused RMIDs commit 2a81160d29d65b5876ab3f824fda99ae0219f05e upstream. When creating a new monitoring group, the RMID allocated for it may have been used by a group which was previously removed. In this case, the hardware counters will have non-zero values which should be deducted from what is reported in the new group's counts. resctrl_arch_reset_rmid() initializes the prev_msr value for counters to 0, causing the initial count to be charged to the new group. Resurrect __rmid_read() and use it to initialize prev_msr correctly. Unlike before, __rmid_read() checks for error bits in the MSR read so that callers don't need to. Fixes: 1d81d15db39c ("x86/resctrl: Move mbm_overflow_count() into resctrl_arch_rmid_read()") Signed-off-by: Peter Newman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Tested-by: Babu Moger Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221220164132.443083-1-peternewman@google.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/resctrl/monitor.c | 49 ++++++++++++++++++--------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index efe0c30d3a12..77538abeb72a 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid) return entry; } +static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) +{ + u64 msr_val; + + /* + * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured + * with a valid event code for supported resource type and the bits + * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, + * IA32_QM_CTR.data (bits 61:0) reports the monitored data. + * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) + * are error bits. + */ + wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); + rdmsrl(MSR_IA32_QM_CTR, msr_val); + + if (msr_val & RMID_VAL_ERROR) + return -EIO; + if (msr_val & RMID_VAL_UNAVAIL) + return -EINVAL; + + *val = msr_val; + return 0; +} + static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, u32 rmid, enum resctrl_event_id eventid) @@ -172,8 +196,12 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, struct arch_mbm_state *am; am = get_arch_mbm_state(hw_dom, rmid, eventid); - if (am) + if (am) { memset(am, 0, sizeof(*am)); + + /* Record any initial, non-zero count value. */ + __rmid_read(rmid, eventid, &am->prev_msr); + } } static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) @@ -191,25 +219,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); struct arch_mbm_state *am; u64 msr_val, chunks; + int ret; if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) return -EINVAL; - /* - * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured - * with a valid event code for supported resource type and the bits - * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, - * IA32_QM_CTR.data (bits 61:0) reports the monitored data. - * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) - * are error bits. - */ - wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); - rdmsrl(MSR_IA32_QM_CTR, msr_val); - - if (msr_val & RMID_VAL_ERROR) - return -EIO; - if (msr_val & RMID_VAL_UNAVAIL) - return -EINVAL; + ret = __rmid_read(rmid, eventid, &msr_val); + if (ret) + return ret; am = get_arch_mbm_state(hw_dom, rmid, eventid); if (am) { From ad1336274f733a7cb1f87b5c5908165a2c14df53 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Sun, 27 Nov 2022 22:06:02 +0100 Subject: [PATCH 097/177] regulator: da9211: Use irq handler when ready [ Upstream commit 02228f6aa6a64d588bc31e3267d05ff184d772eb ] If the system does not come from reset (like when it is kexec()), the regulator might have an IRQ waiting for us. If we enable the IRQ handler before its structures are ready, we crash. This patch fixes: [ 1.141839] Unable to handle kernel read from unreadable memory at virtual address 0000000000000078 [ 1.316096] Call trace: [ 1.316101] blocking_notifier_call_chain+0x20/0xa8 [ 1.322757] cpu cpu0: dummy supplies not allowed for exclusive requests [ 1.327823] regulator_notifier_call_chain+0x1c/0x2c [ 1.327825] da9211_irq_handler+0x68/0xf8 [ 1.327829] irq_thread+0x11c/0x234 [ 1.327833] kthread+0x13c/0x154 Signed-off-by: Ricardo Ribalda Reviewed-by: Adam Ward Link: https://lore.kernel.org/r/20221124-da9211-v2-0-1779e3c5d491@chromium.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/regulator/da9211-regulator.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c index e01b32d1fa17..00828f5baa97 100644 --- a/drivers/regulator/da9211-regulator.c +++ b/drivers/regulator/da9211-regulator.c @@ -498,6 +498,12 @@ static int da9211_i2c_probe(struct i2c_client *i2c) chip->chip_irq = i2c->irq; + ret = da9211_regulator_init(chip); + if (ret < 0) { + dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); + return ret; + } + if (chip->chip_irq != 0) { ret = devm_request_threaded_irq(chip->dev, chip->chip_irq, NULL, da9211_irq_handler, @@ -512,11 +518,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c) dev_warn(chip->dev, "No IRQ configured\n"); } - ret = da9211_regulator_init(chip); - - if (ret < 0) - dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret); - return ret; } From 87c71e88f6a6619ffb1ff88f84dff48ef6d57adb Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 4 Dec 2022 11:52:44 -0800 Subject: [PATCH 098/177] scsi: storvsc: Fix swiotlb bounce buffer leak in confidential VM [ Upstream commit 67ff3d0a49f3d445c3922e30a54e03c161da561e ] storvsc_queuecommand() maps the scatter/gather list using scsi_dma_map(), which in a confidential VM allocates swiotlb bounce buffers. If the I/O submission fails in storvsc_do_io(), the I/O is typically retried by higher level code, but the bounce buffer memory is never freed. The mostly like cause of I/O submission failure is a full VMBus channel ring buffer, which is not uncommon under high I/O loads. Eventually enough bounce buffer memory leaks that the confidential VM can't do any I/O. The same problem can arise in a non-confidential VM with kernel boot parameter swiotlb=force. Fix this by doing scsi_dma_unmap() in the case of an I/O submission error, which frees the bounce buffer memory. Fixes: 743b237c3a7b ("scsi: storvsc: Add Isolation VM support for storvsc driver") Signed-off-by: Michael Kelley Link: https://lore.kernel.org/r/1670183564-76254-1-git-send-email-mikelley@microsoft.com Tested-by: Dexuan Cui Reviewed-by: Dexuan Cui Reviewed-by: Tianyu Lan Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/storvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 3c5b7e4227b2..55d6fb452680 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1823,6 +1823,9 @@ static int storvsc_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scmnd) ret = storvsc_do_io(dev, cmd_request, get_cpu()); put_cpu(); + if (ret) + scsi_dma_unmap(scmnd); + if (ret == -EAGAIN) { /* no more space */ ret = SCSI_MLQUEUE_DEVICE_BUSY; From 9bfa3a61503743c91081004141953da90d2eaca0 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Wed, 7 Dec 2022 11:36:59 +0900 Subject: [PATCH 099/177] scsi: mpi3mr: Refer CONFIG_SCSI_MPI3MR in Makefile [ Upstream commit f0a43ba6c66cc0688e2748d986a1459fdd3442ef ] When Kconfig item CONFIG_SCSI_MPI3MR was introduced for mpi3mr driver, the Makefile of the driver was not modified to refer the Kconfig item. As a result, mpi3mr.ko is built regardless of the Kconfig item value y or m. Also, if 'make localmodconfig' can not find the Kconfig item in the Makefile, then it does not generate CONFIG_SCSI_MPI3MR=m even when mpi3mr.ko is loaded on the system. Refer to the Kconfig item to avoid the issues. Fixes: c4f7ac64616e ("scsi: mpi3mr: Add mpi30 Rev-R headers and Kconfig") Signed-off-by: Shin'ichiro Kawasaki Link: https://lore.kernel.org/r/20221207023659.2411785-1-shinichiro.kawasaki@wdc.com Reviewed-by: Damien Le Moal Acked-by: Sathya Prakash Veerichetty Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/mpi3mr/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/mpi3mr/Makefile b/drivers/scsi/mpi3mr/Makefile index ef86ca46646b..3bf8cf34e1c3 100644 --- a/drivers/scsi/mpi3mr/Makefile +++ b/drivers/scsi/mpi3mr/Makefile @@ -1,5 +1,5 @@ # mpi3mr makefile -obj-m += mpi3mr.o +obj-$(CONFIG_SCSI_MPI3MR) += mpi3mr.o mpi3mr-y += mpi3mr_os.o \ mpi3mr_fw.o \ mpi3mr_app.o \ From bf5838132d1eeff6bf09d604bedf98778ceac1cf Mon Sep 17 00:00:00 2001 From: Peter Wang Date: Thu, 8 Dec 2022 15:25:20 +0800 Subject: [PATCH 100/177] scsi: ufs: core: WLUN suspend SSU/enter hibern8 fail recovery [ Upstream commit 1a5665fc8d7a000671ebd3fe69c6f9acf1e0dcd9 ] When SSU/enter hibern8 fail in WLUN suspend flow, trigger the error handler and return busy to break the suspend. Otherwise the consumer will get stuck in runtime suspend status. Fixes: b294ff3e3449 ("scsi: ufs: core: Enable power management for wlun") Signed-off-by: Peter Wang Link: https://lore.kernel.org/r/20221208072520.26210-1-peter.wang@mediatek.com Reviewed-by: Stanley Chu Reviewed-by: Bart Van Assche Reviewed-by: Adrian Hunter Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index d1db6be80156..b048357d21e3 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -6094,6 +6094,14 @@ void ufshcd_schedule_eh_work(struct ufs_hba *hba) } } +static void ufshcd_force_error_recovery(struct ufs_hba *hba) +{ + spin_lock_irq(hba->host->host_lock); + hba->force_reset = true; + ufshcd_schedule_eh_work(hba); + spin_unlock_irq(hba->host->host_lock); +} + static void ufshcd_clk_scaling_allow(struct ufs_hba *hba, bool allow) { down_write(&hba->clk_scaling_lock); @@ -9066,6 +9074,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (!hba->dev_info.b_rpm_dev_flush_capable) { ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode); + if (ret && pm_op != UFS_SHUTDOWN_PM) { + /* + * If return err in suspend flow, IO will hang. + * Trigger error handler and break suspend for + * error recovery. + */ + ufshcd_force_error_recovery(hba); + ret = -EBUSY; + } if (ret) goto enable_scaling; } @@ -9077,6 +9094,15 @@ static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) */ check_for_bkops = !ufshcd_is_ufs_dev_deepsleep(hba); ret = ufshcd_link_state_transition(hba, req_link_state, check_for_bkops); + if (ret && pm_op != UFS_SHUTDOWN_PM) { + /* + * If return err in suspend flow, IO will hang. + * Trigger error handler and break suspend for + * error recovery. + */ + ufshcd_force_error_recovery(hba); + ret = -EBUSY; + } if (ret) goto set_dev_active; From c8eb7ac95a1eea0d467bcf8f3443be22bee6b9a0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Dec 2022 14:25:48 +0100 Subject: [PATCH 101/177] ASoC: Intel: fix sof-nau8825 link failure [ Upstream commit 63f3d99b7efe4c5404a9388c05780917099cecf4 ] The snd-soc-sof_nau8825.ko module fails to link unless the sof_realtek_common support is also enabled: ERROR: modpost: "sof_rt1015p_codec_conf" [sound/soc/intel/boards/snd-soc-sof_nau8825.ko] undefined! ERROR: modpost: "sof_rt1015p_dai_link" [sound/soc/intel/boards/snd-soc-sof_nau8825.ko] undefined! Fixes: 8d0872f6239f ("ASoC: Intel: add sof-nau8825 machine driver") Signed-off-by: Arnd Bergmann Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20221221132559.2402341-1-arnd@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/intel/boards/Kconfig b/sound/soc/intel/boards/Kconfig index aa12d7e3dd2f..ca49cc49c378 100644 --- a/sound/soc/intel/boards/Kconfig +++ b/sound/soc/intel/boards/Kconfig @@ -558,6 +558,7 @@ config SND_SOC_INTEL_SOF_NAU8825_MACH select SND_SOC_HDAC_HDMI select SND_SOC_INTEL_HDA_DSP_COMMON select SND_SOC_INTEL_SOF_MAXIM_COMMON + select SND_SOC_INTEL_SOF_REALTEK_COMMON help This adds support for ASoC machine driver for SOF platforms with nau8825 codec. From bc9b113d3c88689d67cf5fe47b473f96786d8b47 Mon Sep 17 00:00:00 2001 From: Brent Lu Date: Thu, 17 Nov 2022 17:19:19 -0600 Subject: [PATCH 102/177] ASoC: Intel: sof_nau8825: support rt1015p speaker amplifier [ Upstream commit 13c459fa37c9f26e9bf884a832dd67598b5c4d3e ] Add rt1015p speaker amplifier support with a new board info 'adl_rt1015p_nau8825' which supports NAU8825 on SSP0 and ALC1015Q on SSP1. Reviewed-by: Bard Liao Signed-off-by: Brent Lu Signed-off-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20221117231919.112483-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown Stable-dep-of: 3e78986a840d ("ASoC: Intel: sof-nau8825: fix module alias overflow") Signed-off-by: Sasha Levin --- sound/soc/intel/boards/sof_nau8825.c | 16 ++++++++++++++++ .../soc/intel/common/soc-acpi-intel-adl-match.c | 12 ++++++++++++ 2 files changed, 28 insertions(+) diff --git a/sound/soc/intel/boards/sof_nau8825.c b/sound/soc/intel/boards/sof_nau8825.c index 5585c217f78d..27880224359d 100644 --- a/sound/soc/intel/boards/sof_nau8825.c +++ b/sound/soc/intel/boards/sof_nau8825.c @@ -47,6 +47,7 @@ #define SOF_RT1019P_SPEAKER_AMP_PRESENT BIT(14) #define SOF_MAX98373_SPEAKER_AMP_PRESENT BIT(15) #define SOF_MAX98360A_SPEAKER_AMP_PRESENT BIT(16) +#define SOF_RT1015P_SPEAKER_AMP_PRESENT BIT(17) static unsigned long sof_nau8825_quirk = SOF_NAU8825_SSP_CODEC(0); @@ -483,6 +484,8 @@ static struct snd_soc_dai_link *sof_card_dai_links_create(struct device *dev, } else if (sof_nau8825_quirk & SOF_MAX98360A_SPEAKER_AMP_PRESENT) { max_98360a_dai_link(&links[id]); + } else if (sof_nau8825_quirk & SOF_RT1015P_SPEAKER_AMP_PRESENT) { + sof_rt1015p_dai_link(&links[id]); } else { goto devm_err; } @@ -576,6 +579,8 @@ static int sof_audio_probe(struct platform_device *pdev) if (sof_nau8825_quirk & SOF_MAX98373_SPEAKER_AMP_PRESENT) max_98373_set_codec_conf(&sof_audio_card_nau8825); + else if (sof_nau8825_quirk & SOF_RT1015P_SPEAKER_AMP_PRESENT) + sof_rt1015p_codec_conf(&sof_audio_card_nau8825); if (sof_nau8825_quirk & SOF_SSP_BT_OFFLOAD_PRESENT) sof_audio_card_nau8825.num_links++; @@ -642,6 +647,16 @@ static const struct platform_device_id board_ids[] = { SOF_SSP_BT_OFFLOAD_PRESENT), }, + { + .name = "adl_rt1015p_nau8825", + .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | + SOF_SPEAKER_AMP_PRESENT | + SOF_RT1015P_SPEAKER_AMP_PRESENT | + SOF_NAU8825_SSP_AMP(1) | + SOF_NAU8825_NUM_HDMIDEV(4) | + SOF_BT_OFFLOAD_SSP(2) | + SOF_SSP_BT_OFFLOAD_PRESENT), + }, { } }; MODULE_DEVICE_TABLE(platform, board_ids); @@ -663,3 +678,4 @@ MODULE_AUTHOR("Mac Chiang "); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS(SND_SOC_INTEL_HDA_DSP_COMMON); MODULE_IMPORT_NS(SND_SOC_INTEL_SOF_MAXIM_COMMON); +MODULE_IMPORT_NS(SND_SOC_INTEL_SOF_REALTEK_COMMON); diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index 9990d5502d26..ce4d8ec86f2c 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -430,6 +430,11 @@ static const struct snd_soc_acpi_codecs adl_rt5682_rt5682s_hp = { .codecs = {"10EC5682", "RTL5682"}, }; +static const struct snd_soc_acpi_codecs adl_rt1015p_amp = { + .num_codecs = 1, + .codecs = {"RTL1015"} +}; + static const struct snd_soc_acpi_codecs adl_rt1019p_amp = { .num_codecs = 1, .codecs = {"RTL1019"} @@ -495,6 +500,13 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { .quirk_data = &adl_rt1019p_amp, .sof_tplg_filename = "sof-adl-rt1019-rt5682.tplg", }, + { + .id = "10508825", + .drv_name = "adl_rt1015p_nau8825", + .machine_quirk = snd_soc_acpi_codec_list, + .quirk_data = &adl_rt1015p_amp, + .sof_tplg_filename = "sof-adl-rt1015-nau8825.tplg", + }, { .id = "10508825", .drv_name = "sof_nau8825", From fba1b23befd88366fe646787b3797e64d7338fd2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 21 Dec 2022 14:24:56 +0100 Subject: [PATCH 103/177] ASoC: Intel: sof-nau8825: fix module alias overflow [ Upstream commit 3e78986a840d59dd27e636eae3f52dc11125c835 ] The maximum name length for a platform_device_id entry is 20 characters including the trailing NUL byte. The sof_nau8825.c file exceeds that, which causes an obscure error message: sound/soc/intel/boards/snd-soc-sof_nau8825.mod.c:35:45: error: illegal character encoding in string literal [-Werror,-Winvalid-source-encoding] MODULE_ALIAS("platform:adl_max98373_nau8825"); ^~~~ include/linux/module.h:168:49: note: expanded from macro 'MODULE_ALIAS' ^~~~~~ include/linux/module.h:165:56: note: expanded from macro 'MODULE_INFO' ^~~~ include/linux/moduleparam.h:26:47: note: expanded from macro '__MODULE_INFO' = __MODULE_INFO_PREFIX __stringify(tag) "=" info I could not figure out how to make the module handling robust enough to handle this better, but as a quick fix, using slightly shorter names that are still unique avoids the build issue. Fixes: 8d0872f6239f ("ASoC: Intel: add sof-nau8825 machine driver") Signed-off-by: Arnd Bergmann Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20221221132515.2363276-1-arnd@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/intel/boards/sof_nau8825.c | 8 ++++---- sound/soc/intel/common/soc-acpi-intel-adl-match.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sound/soc/intel/boards/sof_nau8825.c b/sound/soc/intel/boards/sof_nau8825.c index 27880224359d..009a41fbefa1 100644 --- a/sound/soc/intel/boards/sof_nau8825.c +++ b/sound/soc/intel/boards/sof_nau8825.c @@ -618,7 +618,7 @@ static const struct platform_device_id board_ids[] = { }, { - .name = "adl_rt1019p_nau8825", + .name = "adl_rt1019p_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_RT1019P_SPEAKER_AMP_PRESENT | @@ -626,7 +626,7 @@ static const struct platform_device_id board_ids[] = { SOF_NAU8825_NUM_HDMIDEV(4)), }, { - .name = "adl_max98373_nau8825", + .name = "adl_max98373_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_MAX98373_SPEAKER_AMP_PRESENT | @@ -637,7 +637,7 @@ static const struct platform_device_id board_ids[] = { }, { /* The limitation of length of char array, shorten the name */ - .name = "adl_mx98360a_nau8825", + .name = "adl_mx98360a_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_MAX98360A_SPEAKER_AMP_PRESENT | @@ -648,7 +648,7 @@ static const struct platform_device_id board_ids[] = { }, { - .name = "adl_rt1015p_nau8825", + .name = "adl_rt1015p_8825", .driver_data = (kernel_ulong_t)(SOF_NAU8825_SSP_CODEC(0) | SOF_SPEAKER_AMP_PRESENT | SOF_RT1015P_SPEAKER_AMP_PRESENT | diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c index ce4d8ec86f2c..68b4fa352354 100644 --- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c +++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c @@ -474,21 +474,21 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { }, { .id = "10508825", - .drv_name = "adl_rt1019p_nau8825", + .drv_name = "adl_rt1019p_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_rt1019p_amp, .sof_tplg_filename = "sof-adl-rt1019-nau8825.tplg", }, { .id = "10508825", - .drv_name = "adl_max98373_nau8825", + .drv_name = "adl_max98373_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_max98373_amp, .sof_tplg_filename = "sof-adl-max98373-nau8825.tplg", }, { .id = "10508825", - .drv_name = "adl_mx98360a_nau8825", + .drv_name = "adl_mx98360a_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_max98360a_amp, .sof_tplg_filename = "sof-adl-max98360a-nau8825.tplg", @@ -502,7 +502,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = { }, { .id = "10508825", - .drv_name = "adl_rt1015p_nau8825", + .drv_name = "adl_rt1015p_8825", .machine_quirk = snd_soc_acpi_codec_list, .quirk_data = &adl_rt1015p_amp, .sof_tplg_filename = "sof-adl-rt1015-nau8825.tplg", From 7928737dd1498bc365068d766ccf4eaeaff8ddf4 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 15 Nov 2022 09:49:02 +0800 Subject: [PATCH 104/177] drm/msm/dpu: Fix some kernel-doc comments [ Upstream commit 1bdeb321d1f856346fe0078af09c9e7ffbd2ca7a ] Make the description of @init to @p in dpu_encoder_phys_wb_init() and remove @wb_roi in dpu_encoder_phys_wb_setup_fb() to clear the below warnings: drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c:139: warning: Excess function parameter 'wb_roi' description in 'dpu_encoder_phys_wb_setup_fb' drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c:699: warning: Function parameter or member 'p' not described in 'dpu_encoder_phys_wb_init' drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c:699: warning: Excess function parameter 'init' description in 'dpu_encoder_phys_wb_init' Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3067 Reported-by: Abaci Robot Signed-off-by: Yang Li Fixes: d7d0e73f7de3 ("drm/msm/dpu: introduce the dpu_encoder_phys_* for writeback") Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/511605/ Link: https://lore.kernel.org/r/20221115014902.45240-1-yang.lee@linux.alibaba.com Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 7cbcef6efe17..62f6ff6abf41 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -132,7 +132,6 @@ static void dpu_encoder_phys_wb_set_qos(struct dpu_encoder_phys *phys_enc) * dpu_encoder_phys_wb_setup_fb - setup output framebuffer * @phys_enc: Pointer to physical encoder * @fb: Pointer to output framebuffer - * @wb_roi: Pointer to output region of interest */ static void dpu_encoder_phys_wb_setup_fb(struct dpu_encoder_phys *phys_enc, struct drm_framebuffer *fb) @@ -692,7 +691,7 @@ static void dpu_encoder_phys_wb_init_ops(struct dpu_encoder_phys_ops *ops) /** * dpu_encoder_phys_wb_init - initialize writeback encoder - * @init: Pointer to init info structure with initialization params + * @p: Pointer to init info structure with initialization params */ struct dpu_encoder_phys *dpu_encoder_phys_wb_init( struct dpu_enc_phys_init_params *p) From c6fa1de83fd87267ab24359e6fa52f98f5cee3f9 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Wed, 7 Dec 2022 10:59:22 +0400 Subject: [PATCH 105/177] drm/msm/dpu: Fix memory leak in msm_mdss_parse_data_bus_icc_path [ Upstream commit 45dac1352b55b1d8cb17f218936b2bc2bc1fb4ee ] of_icc_get() alloc resources for path1, we should release it when not need anymore. Early return when IS_ERR_OR_NULL(path0) may leak path1. Defer getting path1 to fix this. Fixes: b9364eed9232 ("drm/msm/dpu: Move min BW request and full BW disable back to mdss") Signed-off-by: Miaoqian Lin Reviewed-by: Douglas Anderson Reviewed-by: Abhinav Kumar Patchwork: https://patchwork.freedesktop.org/patch/514264/ Link: https://lore.kernel.org/r/20221207065922.2086368-1-linmq006@gmail.com Signed-off-by: Abhinav Kumar Signed-off-by: Sasha Levin --- drivers/gpu/drm/msm/msm_mdss.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index e13c5c12b775..3b8d6991b04e 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -46,15 +46,17 @@ struct msm_mdss { static int msm_mdss_parse_data_bus_icc_path(struct device *dev, struct msm_mdss *msm_mdss) { - struct icc_path *path0 = of_icc_get(dev, "mdp0-mem"); - struct icc_path *path1 = of_icc_get(dev, "mdp1-mem"); + struct icc_path *path0; + struct icc_path *path1; + path0 = of_icc_get(dev, "mdp0-mem"); if (IS_ERR_OR_NULL(path0)) return PTR_ERR_OR_ZERO(path0); msm_mdss->path[0] = path0; msm_mdss->num_paths = 1; + path1 = of_icc_get(dev, "mdp1-mem"); if (!IS_ERR_OR_NULL(path1)) { msm_mdss->path[1] = path1; msm_mdss->num_paths++; From c5d032482c23cf7aaec1490a77f7d85dbea1f3aa Mon Sep 17 00:00:00 2001 From: Emanuele Ghidoli Date: Fri, 23 Dec 2022 09:02:47 +0100 Subject: [PATCH 106/177] ASoC: wm8904: fix wrong outputs volume after power reactivation [ Upstream commit 472a6309c6467af89dbf660a8310369cc9cb041f ] Restore volume after charge pump and PGA activation to ensure that volume settings are correctly applied when re-enabling codec from SND_SOC_BIAS_OFF state. CLASS_W, CHARGE_PUMP and POWER_MANAGEMENT_2 register configuration affect how the volume register are applied and must be configured first. Fixes: a91eb199e4dc ("ASoC: Initial WM8904 CODEC driver") Link: https://lore.kernel.org/all/c7864c35-738c-a867-a6a6-ddf9f98df7e7@gmail.com/ Signed-off-by: Emanuele Ghidoli Signed-off-by: Francesco Dolcini Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20221223080247.7258-1-francesco@dolcini.it Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/wm8904.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c index ca6a01a230af..791d8738d1c0 100644 --- a/sound/soc/codecs/wm8904.c +++ b/sound/soc/codecs/wm8904.c @@ -697,6 +697,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, int dcs_mask; int dcs_l, dcs_r; int dcs_l_reg, dcs_r_reg; + int an_out_reg; int timeout; int pwr_reg; @@ -712,6 +713,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, dcs_mask = WM8904_DCS_ENA_CHAN_0 | WM8904_DCS_ENA_CHAN_1; dcs_r_reg = WM8904_DC_SERVO_8; dcs_l_reg = WM8904_DC_SERVO_9; + an_out_reg = WM8904_ANALOGUE_OUT1_LEFT; dcs_l = 0; dcs_r = 1; break; @@ -720,6 +722,7 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, dcs_mask = WM8904_DCS_ENA_CHAN_2 | WM8904_DCS_ENA_CHAN_3; dcs_r_reg = WM8904_DC_SERVO_6; dcs_l_reg = WM8904_DC_SERVO_7; + an_out_reg = WM8904_ANALOGUE_OUT2_LEFT; dcs_l = 2; dcs_r = 3; break; @@ -792,6 +795,10 @@ static int out_pga_event(struct snd_soc_dapm_widget *w, snd_soc_component_update_bits(component, reg, WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP, WM8904_HPL_ENA_OUTP | WM8904_HPR_ENA_OUTP); + + /* Update volume, requires PGA to be powered */ + val = snd_soc_component_read(component, an_out_reg); + snd_soc_component_write(component, an_out_reg, val); break; case SND_SOC_DAPM_POST_PMU: From 6e4131245240910ba6810927dc555df947017d8a Mon Sep 17 00:00:00 2001 From: Mikhail Zhilkin Date: Thu, 8 Dec 2022 23:28:29 +0300 Subject: [PATCH 107/177] mtd: parsers: scpart: fix __udivdi3 undefined on mips [ Upstream commit 105c14b84d93168431abba5d55e6c26fa4b65abb ] This fixes the following compile error on mips architecture with clang version 16.0.0 reported by the 0-DAY CI Kernel Test Service: ld.lld: error: undefined symbol: __udivdi3 referenced by scpart.c mtd/parsers/scpart.o:(scpart_parse) in archive drivers/built-in.a As a workaround this makes 'offs' a 32-bit type. This is enough, because the mtd containing partition table practically does not exceed 1 MB. We can revert this when the [Link] has been resolved. Link: https://github.com/ClangBuiltLinux/linux/issues/1635 Fixes: 9b78ef0c7997 ("mtd: parsers: add support for Sercomm partitions") Reported-by: kernel test robot Suggested-by: Arnd Bergmann Signed-off-by: Mikhail Zhilkin Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/805fe58e-690f-6a3f-5ebf-2f6f6e6e4599@gmail.com Signed-off-by: Sasha Levin --- drivers/mtd/parsers/scpart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/parsers/scpart.c b/drivers/mtd/parsers/scpart.c index 02601bb33de4..6e5e11c37078 100644 --- a/drivers/mtd/parsers/scpart.c +++ b/drivers/mtd/parsers/scpart.c @@ -50,7 +50,7 @@ static int scpart_scan_partmap(struct mtd_info *master, loff_t partmap_offs, int cnt = 0; int res = 0; int res2; - loff_t offs; + uint32_t offs; size_t retlen; struct sc_part_desc *pdesc = NULL; struct sc_part_desc *tmpdesc; From fc78cb85abd0c5dbe5c057057d5dca94a1dfa808 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Dec 2022 15:13:34 +0100 Subject: [PATCH 108/177] mtd: cfi: allow building spi-intel standalone [ Upstream commit d19ab1f785d0b6b9f709799f0938658903821ba1 ] When MTD or MTD_CFI_GEOMETRY is disabled, the spi-intel driver fails to build, as it includes the shared CFI header: include/linux/mtd/cfi.h:62:2: error: #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. [-Werror=cpp] 62 | #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. linux/mtd/spi-nor.h does not actually need to include cfi.h, so remove the inclusion here to fix the warning. This uncovers a missing #include in spi-nor/core.c so add that there to prevent a different build issue. Fixes: e23e5a05d1fd ("mtd: spi-nor: intel-spi: Convert to SPI MEM") Signed-off-by: Arnd Bergmann Reviewed-by: Mika Westerberg Reviewed-by: Tokunori Ikegami Acked-by: Pratyush Yadav Reviewed-by: Tudor Ambarus Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20221220141352.1486360-1-arnd@kernel.org Signed-off-by: Sasha Levin --- drivers/mtd/spi-nor/core.c | 1 + include/linux/mtd/spi-nor.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 2e0655c0b606..5dbf52aa0355 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 42218a1164f6..f92bf7f7a754 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -7,7 +7,6 @@ #define __LINUX_MTD_SPI_NOR_H #include -#include #include #include From 254328ba4cd3da2b4fb0afdc96e5033315e64441 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 2 Jan 2023 18:07:57 +0100 Subject: [PATCH 109/177] ALSA: usb-audio: Make sure to stop endpoints before closing EPs [ Upstream commit 0599313e26666e79f6e7fe1450588431b8cb25d5 ] At the PCM hw params, we may re-configure the endpoints and it's done by a temporary EP close followed by re-open. A potential problem there is that the EP might be already running internally at the PCM prepare stage; it's seen typically in the playback stream with the implicit feedback sync. As this stream start isn't tracked by the core PCM layer, we'd need to stop it explicitly, and that's the missing piece. This patch adds the stop_endpoints() call at snd_usb_hw_params() to assure the stream stop before closing the EPs. Fixes: bf6313a0ff76 ("ALSA: usb-audio: Refactor endpoint management") Link: https://lore.kernel.org/r/4e509aea-e563-e592-e652-ba44af6733fe@veniogames.com Link: https://lore.kernel.org/r/20230102170759.29610-2-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index d2c652aa1385..535eb95bc9ee 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -527,6 +527,8 @@ static int snd_usb_hw_params(struct snd_pcm_substream *substream, if (snd_usb_endpoint_compatible(chip, subs->data_endpoint, fmt, hw_params)) goto unlock; + if (stop_endpoints(subs, false)) + sync_pending_stops(subs); close_endpoints(chip, subs); } From 0d81b8e86e7562da9961537519920b4974de513c Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 2 Jan 2023 18:07:58 +0100 Subject: [PATCH 110/177] ALSA: usb-audio: Relax hw constraints for implicit fb sync [ Upstream commit d463ac1acb454fafed58f695cb3067fbf489f3a0 ] The fix commit the commit e4ea77f8e53f ("ALSA: usb-audio: Always apply the hw constraints for implicit fb sync") tried to address the bug where an incorrect PCM parameter is chosen when two (implicit fb) streams are set up at the same time. This change had, however, some side effect: once when the sync endpoint is chosen and set up, this restriction is applied at the next hw params unless it's freed via hw free explicitly. This patch is a workaround for the problem by relaxing the hw constraints a bit for the implicit fb sync. We still keep applying the hw constraints for implicit fb sync, but only when the matching sync EP is being used by other streams. Fixes: e4ea77f8e53f ("ALSA: usb-audio: Always apply the hw constraints for implicit fb sync") Reported-by: Ruud van Asseldonk Link: https://lore.kernel.org/r/4e509aea-e563-e592-e652-ba44af6733fe@veniogames.com Link: https://lore.kernel.org/r/20230102170759.29610-3-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/pcm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 535eb95bc9ee..29838000eee0 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -939,8 +939,13 @@ get_sync_ep_from_substream(struct snd_usb_substream *subs) continue; /* for the implicit fb, check the sync ep as well */ ep = snd_usb_get_endpoint(chip, fp->sync_ep); - if (ep && ep->cur_audiofmt) - return ep; + if (ep && ep->cur_audiofmt) { + /* ditto, if the sync (data) ep is used by others, + * this stream is restricted by the sync ep + */ + if (ep != subs->sync_endpoint || ep->opened > 1) + return ep; + } } return NULL; } From 34aed3646e82082ce1685e1630911d9559d464f2 Mon Sep 17 00:00:00 2001 From: Biao Huang Date: Thu, 5 Jan 2023 09:07:11 +0800 Subject: [PATCH 111/177] stmmac: dwmac-mediatek: remove the dwmac_fix_mac_speed [ Upstream commit c26de7507d1f5ffa5daf6a4980ef7896889691a9 ] In current driver, MAC will always enable 2ns delay in RGMII mode, but that's not the correct usage. Remove the dwmac_fix_mac_speed() in driver, and recommend "rgmii-id" for phy-mode in device tree. Fixes: f2d356a6ab71 ("stmmac: dwmac-mediatek: add support for mt8195") Reviewed-by: Andrew Lunn Signed-off-by: Biao Huang Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../ethernet/stmicro/stmmac/dwmac-mediatek.c | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index d42e1afb6521..2f7d8e4561d9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -90,7 +90,6 @@ struct mediatek_dwmac_plat_data { struct mediatek_dwmac_variant { int (*dwmac_set_phy_interface)(struct mediatek_dwmac_plat_data *plat); int (*dwmac_set_delay)(struct mediatek_dwmac_plat_data *plat); - void (*dwmac_fix_mac_speed)(void *priv, unsigned int speed); /* clock ids to be requested */ const char * const *clk_list; @@ -443,32 +442,9 @@ static int mt8195_set_delay(struct mediatek_dwmac_plat_data *plat) return 0; } -static void mt8195_fix_mac_speed(void *priv, unsigned int speed) -{ - struct mediatek_dwmac_plat_data *priv_plat = priv; - - if ((phy_interface_mode_is_rgmii(priv_plat->phy_mode))) { - /* prefer 2ns fixed delay which is controlled by TXC_PHASE_CTRL, - * when link speed is 1Gbps with RGMII interface, - * Fall back to delay macro circuit for 10/100Mbps link speed. - */ - if (speed == SPEED_1000) - regmap_update_bits(priv_plat->peri_regmap, - MT8195_PERI_ETH_CTRL0, - MT8195_RGMII_TXC_PHASE_CTRL | - MT8195_DLY_GTXC_ENABLE | - MT8195_DLY_GTXC_INV | - MT8195_DLY_GTXC_STAGES, - MT8195_RGMII_TXC_PHASE_CTRL); - else - mt8195_set_delay(priv_plat); - } -} - static const struct mediatek_dwmac_variant mt8195_gmac_variant = { .dwmac_set_phy_interface = mt8195_set_interface, .dwmac_set_delay = mt8195_set_delay, - .dwmac_fix_mac_speed = mt8195_fix_mac_speed, .clk_list = mt8195_dwmac_clk_l, .num_clks = ARRAY_SIZE(mt8195_dwmac_clk_l), .dma_bit_mask = 35, @@ -619,8 +595,6 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->bsp_priv = priv_plat; plat->init = mediatek_dwmac_init; plat->clks_config = mediatek_dwmac_clks_config; - if (priv_plat->variant->dwmac_fix_mac_speed) - plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed; plat->safety_feat_cfg = devm_kzalloc(&pdev->dev, sizeof(*plat->safety_feat_cfg), From 5ac01865a2b99d6da24e66d12cfea2f7c185c610 Mon Sep 17 00:00:00 2001 From: Tung Nguyen Date: Thu, 5 Jan 2023 06:02:51 +0000 Subject: [PATCH 112/177] tipc: fix unexpected link reset due to discovery messages [ Upstream commit c244c092f1ed2acfb5af3d3da81e22367d3dd733 ] This unexpected behavior is observed: node 1 | node 2 ------ | ------ link is established | link is established reboot | link is reset up | send discovery message receive discovery message | link is established | link is established send discovery message | | receive discovery message | link is reset (unexpected) | send reset message link is reset | It is due to delayed re-discovery as described in function tipc_node_check_dest(): "this link endpoint has already reset and re-established contact with the peer, before receiving a discovery message from that node." However, commit 598411d70f85 has changed the condition for calling tipc_node_link_down() which was the acceptance of new media address. This commit fixes this by restoring the old and correct behavior. Fixes: 598411d70f85 ("tipc: make resetting of links non-atomic") Acked-by: Jon Maloy Signed-off-by: Tung Nguyen Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/tipc/node.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 49ddc484c4fe..5e000fde8067 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1179,8 +1179,9 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool addr_match = false; bool sign_match = false; bool link_up = false; + bool link_is_reset = false; bool accept_addr = false; - bool reset = true; + bool reset = false; char *if_name; unsigned long intv; u16 session; @@ -1200,14 +1201,14 @@ void tipc_node_check_dest(struct net *net, u32 addr, /* Prepare to validate requesting node's signature and media address */ l = le->link; link_up = l && tipc_link_is_up(l); + link_is_reset = l && tipc_link_is_reset(l); addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr)); sign_match = (signature == n->signature); /* These three flags give us eight permutations: */ if (sign_match && addr_match && link_up) { - /* All is fine. Do nothing. */ - reset = false; + /* All is fine. Ignore requests. */ /* Peer node is not a container/local namespace */ if (!n->peer_hash_mix) n->peer_hash_mix = hash_mixes; @@ -1232,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, */ accept_addr = true; *respond = true; + reset = true; } else if (!sign_match && addr_match && link_up) { /* Peer node rebooted. Two possibilities: * - Delayed re-discovery; this link endpoint has already @@ -1263,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, n->signature = signature; accept_addr = true; *respond = true; + reset = true; } if (!accept_addr) @@ -1291,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, tipc_link_fsm_evt(l, LINK_RESET_EVT); if (n->state == NODE_FAILINGOVER) tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT); + link_is_reset = tipc_link_is_reset(l); le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); @@ -1303,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: tipc_node_write_unlock(n); - if (reset && l && !tipc_link_is_reset(l)) + if (reset && !link_is_reset) tipc_node_link_down(n, b->identity, false); tipc_node_put(n); } From 90d5095b76f4ee50cf25cb80cda2def4a403c951 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:46:38 -0400 Subject: [PATCH 113/177] NFSD: Pass the target nfsd_file to nfsd_commit() [ Upstream commit c252849082ff525af18b4f253b3c9ece94e951ed ] In a moment I'm going to introduce separate nfsd_file types, one of which is garbage-collected; the other, not. The garbage-collected variety is to be used by NFSv2 and v3, and the non-garbage-collected variety is to be used by NFSv4. nfsd_commit() is invoked by both NFSv3 and NFSv4 consumers. We want nfsd_commit() to find and use the correct variety of cached nfsd_file object for the NFS version that is in use. Signed-off-by: Chuck Lever Tested-by: Jeff Layton Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/nfs3proc.c | 10 +++++++++- fs/nfsd/nfs4proc.c | 11 ++++++++++- fs/nfsd/vfs.c | 15 ++++----------- fs/nfsd/vfs.h | 3 ++- 4 files changed, 25 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 923d9a80df92..ff2920546333 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -13,6 +13,7 @@ #include "cache.h" #include "xdr3.h" #include "vfs.h" +#include "filecache.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -763,6 +764,7 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) { struct nfsd3_commitargs *argp = rqstp->rq_argp; struct nfsd3_commitres *resp = rqstp->rq_resp; + struct nfsd_file *nf; dprintk("nfsd: COMMIT(3) %s %u@%Lu\n", SVCFH_fmt(&argp->fh), @@ -770,8 +772,14 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) (unsigned long long) argp->offset); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset, + resp->status = nfsd_file_acquire(rqstp, &resp->fh, NFSD_MAY_WRITE | + NFSD_MAY_NOT_BREAK_LEASE, &nf); + if (resp->status) + goto out; + resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset, argp->count, resp->verf); + nfsd_file_put(nf); +out: return rpc_success; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index c7329523a10f..30a08ec31a70 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -731,10 +731,19 @@ nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_commit *commit = &u->commit; + struct nfsd_file *nf; + __be32 status; - return nfsd_commit(rqstp, &cstate->current_fh, commit->co_offset, + status = nfsd_file_acquire(rqstp, &cstate->current_fh, NFSD_MAY_WRITE | + NFSD_MAY_NOT_BREAK_LEASE, &nf); + if (status != nfs_ok) + return status; + + status = nfsd_commit(rqstp, &cstate->current_fh, nf, commit->co_offset, commit->co_count, (__be32 *)commit->co_verf.data); + nfsd_file_put(nf); + return status; } static __be32 diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 849a720ab43f..f1919834a99d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1133,6 +1133,7 @@ out: * nfsd_commit - Commit pending writes to stable storage * @rqstp: RPC request being processed * @fhp: NFS filehandle + * @nf: target file * @offset: raw offset from beginning of file * @count: raw count of bytes to sync * @verf: filled in with the server's current write verifier @@ -1149,19 +1150,13 @@ out: * An nfsstat value in network byte order. */ __be32 -nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, - u32 count, __be32 *verf) +nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, + u64 offset, u32 count, __be32 *verf) { + __be32 err = nfs_ok; u64 maxbytes; loff_t start, end; struct nfsd_net *nn; - struct nfsd_file *nf; - __be32 err; - - err = nfsd_file_acquire(rqstp, fhp, - NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); - if (err) - goto out; /* * Convert the client-provided (offset, count) range to a @@ -1202,8 +1197,6 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, } else nfsd_copy_write_verifier(verf, nn); - nfsd_file_put(nf); -out: return err; } diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 120521bc7b24..9744b041105b 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -88,7 +88,8 @@ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); __be32 nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_fh *resfhp, struct nfsd_attrs *iap); __be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, - u64 offset, u32 count, __be32 *verf); + struct nfsd_file *nf, u64 offset, u32 count, + __be32 *verf); #ifdef CONFIG_NFSD_V4 __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name, void **bufp, int *lenp); From 7e4d3d500458aec3bb58c9429999dd040077326b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:46:44 -0400 Subject: [PATCH 114/177] NFSD: Revert "NFSD: NFSv4 CLOSE should release an nfsd_file immediately" [ Upstream commit dcf3f80965ca787c70def402cdf1553c93c75529 ] This reverts commit 5e138c4a750dc140d881dab4a8804b094bbc08d2. That commit attempted to make files available to other users as soon as all NFSv4 clients were done with them, rather than waiting until the filecache LRU had garbage collected them. It gets the reference counting wrong, for one thing. But it also misses that DELEGRETURN should release a file in the same fashion. In fact, any nfsd_file_put() on an file held open by an NFSv4 client needs potentially to release the file immediately... Clear the way for implementing that idea. Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Reviewed-by: NeilBrown Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 18 ------------------ fs/nfsd/filecache.h | 1 - fs/nfsd/nfs4state.c | 4 ++-- 3 files changed, 2 insertions(+), 21 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index ec3fceb92236..babea79d3f6f 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -444,24 +444,6 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_put_noref(nf); } -/** - * nfsd_file_close - Close an nfsd_file - * @nf: nfsd_file to close - * - * If this is the final reference for @nf, free it immediately. - * This reflects an on-the-wire CLOSE or DELEGRETURN into the - * VFS and exported filesystem. - */ -void nfsd_file_close(struct nfsd_file *nf) -{ - nfsd_file_put(nf); - if (refcount_dec_if_one(&nf->nf_ref)) { - nfsd_file_unhash(nf); - nfsd_file_lru_remove(nf); - nfsd_file_free(nf); - } -} - struct nfsd_file * nfsd_file_get(struct nfsd_file *nf) { diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 357832bac736..6b012ea4bd9d 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -52,7 +52,6 @@ void nfsd_file_cache_shutdown(void); int nfsd_file_cache_start_net(struct net *net); void nfsd_file_cache_shutdown_net(struct net *net); void nfsd_file_put(struct nfsd_file *nf); -void nfsd_file_close(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 52b5552d0d70..16c3e991ddcc 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -842,9 +842,9 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag) swap(f2, fp->fi_fds[O_RDWR]); spin_unlock(&fp->fi_lock); if (f1) - nfsd_file_close(f1); + nfsd_file_put(f1); if (f2) - nfsd_file_close(f2); + nfsd_file_put(f2); } } From 1c36dc563e1c9649b040c2af00355b21d905a958 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2022 10:46:51 -0400 Subject: [PATCH 115/177] NFSD: Add an NFSD_FILE_GC flag to enable nfsd_file garbage collection [ Upstream commit 4d1ea8455716ca070e3cd85767e6f6a562a58b1b ] NFSv4 operations manage the lifetime of nfsd_file items they use by means of NFSv4 OPEN and CLOSE. Hence there's no need for them to be garbage collected. Introduce a mechanism to enable garbage collection for nfsd_file items used only by NFSv2/3 callers. Note that the change in nfsd_file_put() ensures that both CLOSE and DELEGRETURN will actually close out and free an nfsd_file on last reference of a non-garbage-collected file. Link: https://bugzilla.linux-nfs.org/show_bug.cgi?id=394 Suggested-by: Trond Myklebust Signed-off-by: Chuck Lever Tested-by: Jeff Layton Reviewed-by: NeilBrown Reviewed-by: Jeff Layton Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 63 +++++++++++++++++++++++++++++++++++++++------ fs/nfsd/filecache.h | 3 +++ fs/nfsd/nfs3proc.c | 4 +-- fs/nfsd/trace.h | 3 ++- fs/nfsd/vfs.c | 4 +-- 5 files changed, 64 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index babea79d3f6f..cee44405cf7d 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -63,6 +63,7 @@ struct nfsd_file_lookup_key { struct net *net; const struct cred *cred; unsigned char need; + bool gc; enum nfsd_file_lookup_type type; }; @@ -162,6 +163,8 @@ static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg, return 1; if (!nfsd_match_cred(nf->nf_cred, key->cred)) return 1; + if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc) + return 1; if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) return 1; break; @@ -297,6 +300,8 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) nf->nf_flags = 0; __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); + if (key->gc) + __set_bit(NFSD_FILE_GC, &nf->nf_flags); nf->nf_inode = key->inode; /* nf_ref is pre-incremented for hash table */ refcount_set(&nf->nf_ref, 2); @@ -428,16 +433,27 @@ nfsd_file_put_noref(struct nfsd_file *nf) } } +static void +nfsd_file_unhash_and_put(struct nfsd_file *nf) +{ + if (nfsd_file_unhash(nf)) + nfsd_file_put_noref(nf); +} + void nfsd_file_put(struct nfsd_file *nf) { might_sleep(); - nfsd_file_lru_add(nf); - if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { + if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) + nfsd_file_lru_add(nf); + else if (refcount_read(&nf->nf_ref) == 2) + nfsd_file_unhash_and_put(nf); + + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { nfsd_file_flush(nf); nfsd_file_put_noref(nf); - } else if (nf->nf_file) { + } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { nfsd_file_put_noref(nf); nfsd_file_schedule_laundrette(); } else @@ -1016,12 +1032,14 @@ nfsd_file_is_cached(struct inode *inode) static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf, bool open) + unsigned int may_flags, struct nfsd_file **pnf, + bool open, bool want_gc) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_FULL, .need = may_flags & NFSD_FILE_MAY_MASK, .net = SVC_NET(rqstp), + .gc = want_gc, }; bool open_retry = true; struct nfsd_file *nf; @@ -1117,14 +1135,35 @@ open_file: * then unhash. */ if (status != nfs_ok || key.inode->i_nlink == 0) - if (nfsd_file_unhash(nf)) - nfsd_file_put_noref(nf); + nfsd_file_unhash_and_put(nf); clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); goto out; } +/** + * nfsd_file_acquire_gc - Get a struct nfsd_file with an open file + * @rqstp: the RPC transaction being executed + * @fhp: the NFS filehandle of the file to be opened + * @may_flags: NFSD_MAY_ settings for the file + * @pnf: OUT: new or found "struct nfsd_file" object + * + * The nfsd_file object returned by this API is reference-counted + * and garbage-collected. The object is retained for a few + * seconds after the final nfsd_file_put() in case the caller + * wants to re-use it. + * + * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in + * network byte order is returned. + */ +__be32 +nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **pnf) +{ + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); +} + /** * nfsd_file_acquire - Get a struct nfsd_file with an open file * @rqstp: the RPC transaction being executed @@ -1132,6 +1171,10 @@ open_file: * @may_flags: NFSD_MAY_ settings for the file * @pnf: OUT: new or found "struct nfsd_file" object * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is unhashed after the + * final nfsd_file_put(). + * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ @@ -1139,7 +1182,7 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); } /** @@ -1149,6 +1192,10 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, * @may_flags: NFSD_MAY_ settings for the file * @pnf: OUT: new or found "struct nfsd_file" object * + * The nfsd_file_object returned by this API is reference-counted + * but not garbage-collected. The object is released immediately + * one RCU grace period after the final nfsd_file_put(). + * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ @@ -1156,7 +1203,7 @@ __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 6b012ea4bd9d..b7efb2c3ddb1 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -38,6 +38,7 @@ struct nfsd_file { #define NFSD_FILE_HASHED (0) #define NFSD_FILE_PENDING (1) #define NFSD_FILE_REFERENCED (2) +#define NFSD_FILE_GC (3) unsigned long nf_flags; struct inode *nf_inode; /* don't deref */ refcount_t nf_ref; @@ -55,6 +56,8 @@ void nfsd_file_put(struct nfsd_file *nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); bool nfsd_file_is_cached(struct inode *inode); +__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index ff2920546333..d01b29aba662 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -772,8 +772,8 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) (unsigned long long) argp->offset); fh_copy(&resp->fh, &argp->fh); - resp->status = nfsd_file_acquire(rqstp, &resp->fh, NFSD_MAY_WRITE | - NFSD_MAY_NOT_BREAK_LEASE, &nf); + resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE | + NFSD_MAY_NOT_BREAK_LEASE, &nf); if (resp->status) goto out; resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset, diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index d4b6839bb459..3fcfeb7b560f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -817,7 +817,8 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) + { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ + { 1 << NFSD_FILE_GC, "GC"}) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f1919834a99d..2934ab1d9862 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1085,7 +1085,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, __be32 err; trace_nfsd_read_start(rqstp, fhp, offset, *count); - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf); + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf); if (err) return err; @@ -1117,7 +1117,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, trace_nfsd_write_start(rqstp, fhp, offset, *cnt); - err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE, &nf); + err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf); if (err) goto out; From d7a6c190012af6d5821356343b4e0eae9c164000 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Nov 2022 14:44:47 -0400 Subject: [PATCH 116/177] nfsd: remove the pages_flushed statistic from filecache [ Upstream commit 1f696e230ea5198e393368b319eb55651828d687 ] We're counting mapping->nrpages, but not all of those are necessarily dirty. We don't really have a simple way to count just the dirty pages, so just remove this stat since it's not accurate. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index cee44405cf7d..28fff3672df9 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -33,7 +33,6 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions); static DEFINE_PER_CPU(unsigned long, nfsd_file_releases); static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age); -static DEFINE_PER_CPU(unsigned long, nfsd_file_pages_flushed); static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions); struct nfsd_fcache_disposal { @@ -371,7 +370,6 @@ nfsd_file_flush(struct nfsd_file *nf) if (!file || !(file->f_mode & FMODE_WRITE)) return; - this_cpu_add(nfsd_file_pages_flushed, file->f_mapping->nrpages); if (vfs_fsync(file, 1) != 0) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } @@ -998,7 +996,6 @@ nfsd_file_cache_shutdown(void) per_cpu(nfsd_file_acquisitions, i) = 0; per_cpu(nfsd_file_releases, i) = 0; per_cpu(nfsd_file_total_age, i) = 0; - per_cpu(nfsd_file_pages_flushed, i) = 0; per_cpu(nfsd_file_evictions, i) = 0; } } @@ -1213,7 +1210,7 @@ nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, */ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) { - unsigned long releases = 0, pages_flushed = 0, evictions = 0; + unsigned long releases = 0, evictions = 0; unsigned long hits = 0, acquisitions = 0; unsigned int i, count = 0, buckets = 0; unsigned long lru = 0, total_age = 0; @@ -1241,7 +1238,6 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) releases += per_cpu(nfsd_file_releases, i); total_age += per_cpu(nfsd_file_total_age, i); evictions += per_cpu(nfsd_file_evictions, i); - pages_flushed += per_cpu(nfsd_file_pages_flushed, i); } seq_printf(m, "total entries: %u\n", count); @@ -1255,6 +1251,5 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v) seq_printf(m, "mean age (ms): %ld\n", total_age / releases); else seq_printf(m, "mean age (ms): -\n"); - seq_printf(m, "pages flushed: %lu\n", pages_flushed); return 0; } From 4ec7a0f277d07dceabb591e927a5d8279d980da2 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 2 Nov 2022 14:44:48 -0400 Subject: [PATCH 117/177] nfsd: reorganize filecache.c [ Upstream commit 8214118589881b2d390284410c5ff275e7a5e03c ] In a coming patch, we're going to rework how the filecache refcounting works. Move some code around in the function to reduce the churn in the later patches, and rename some of the functions with (hopefully) clearer names: nfsd_file_flush becomes nfsd_file_fsync, and nfsd_file_unhash_and_dispose is renamed to nfsd_file_unhash_and_queue. Also, the nfsd_file_put_final tracepoint is renamed to nfsd_file_free, to better match the name of the function from which it's called. Signed-off-by: Jeff Layton Reviewed-by: NeilBrown Signed-off-by: Chuck Lever Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 111 ++++++++++++++++++++++---------------------- fs/nfsd/trace.h | 4 +- 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 28fff3672df9..f54dd6695741 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -310,16 +310,59 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) return nf; } +static void +nfsd_file_fsync(struct nfsd_file *nf) +{ + struct file *file = nf->nf_file; + + if (!file || !(file->f_mode & FMODE_WRITE)) + return; + if (vfs_fsync(file, 1) != 0) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); +} + +static int +nfsd_file_check_write_error(struct nfsd_file *nf) +{ + struct file *file = nf->nf_file; + + if (!file || !(file->f_mode & FMODE_WRITE)) + return 0; + return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); +} + +static void +nfsd_file_hash_remove(struct nfsd_file *nf) +{ + trace_nfsd_file_unhash(nf); + + if (nfsd_file_check_write_error(nf)) + nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); + rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, + nfsd_file_rhash_params); +} + +static bool +nfsd_file_unhash(struct nfsd_file *nf) +{ + if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + nfsd_file_hash_remove(nf); + return true; + } + return false; +} + static bool nfsd_file_free(struct nfsd_file *nf) { s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); bool flush = false; + trace_nfsd_file_free(nf); + this_cpu_inc(nfsd_file_releases); this_cpu_add(nfsd_file_total_age, age); - trace_nfsd_file_put_final(nf); if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { @@ -353,27 +396,6 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } -static int -nfsd_file_check_write_error(struct nfsd_file *nf) -{ - struct file *file = nf->nf_file; - - if (!file || !(file->f_mode & FMODE_WRITE)) - return 0; - return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); -} - -static void -nfsd_file_flush(struct nfsd_file *nf) -{ - struct file *file = nf->nf_file; - - if (!file || !(file->f_mode & FMODE_WRITE)) - return; - if (vfs_fsync(file, 1) != 0) - nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); -} - static void nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); @@ -387,31 +409,18 @@ static void nfsd_file_lru_remove(struct nfsd_file *nf) trace_nfsd_file_lru_del(nf); } -static void -nfsd_file_hash_remove(struct nfsd_file *nf) +struct nfsd_file * +nfsd_file_get(struct nfsd_file *nf) { - trace_nfsd_file_unhash(nf); - - if (nfsd_file_check_write_error(nf)) - nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); - rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash, - nfsd_file_rhash_params); -} - -static bool -nfsd_file_unhash(struct nfsd_file *nf) -{ - if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_hash_remove(nf); - return true; - } - return false; + if (likely(refcount_inc_not_zero(&nf->nf_ref))) + return nf; + return NULL; } static void -nfsd_file_unhash_and_dispose(struct nfsd_file *nf, struct list_head *dispose) +nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose) { - trace_nfsd_file_unhash_and_dispose(nf); + trace_nfsd_file_unhash_and_queue(nf); if (nfsd_file_unhash(nf)) { /* caller must call nfsd_file_dispose_list() later */ nfsd_file_lru_remove(nf); @@ -449,7 +458,7 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_unhash_and_put(nf); if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_flush(nf); + nfsd_file_fsync(nf); nfsd_file_put_noref(nf); } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { nfsd_file_put_noref(nf); @@ -458,14 +467,6 @@ nfsd_file_put(struct nfsd_file *nf) nfsd_file_put_noref(nf); } -struct nfsd_file * -nfsd_file_get(struct nfsd_file *nf) -{ - if (likely(refcount_inc_not_zero(&nf->nf_ref))) - return nf; - return NULL; -} - static void nfsd_file_dispose_list(struct list_head *dispose) { @@ -474,7 +475,7 @@ nfsd_file_dispose_list(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del_init(&nf->nf_lru); - nfsd_file_flush(nf); + nfsd_file_fsync(nf); nfsd_file_put_noref(nf); } } @@ -488,7 +489,7 @@ nfsd_file_dispose_list_sync(struct list_head *dispose) while(!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del_init(&nf->nf_lru); - nfsd_file_flush(nf); + nfsd_file_fsync(nf); if (!refcount_dec_and_test(&nf->nf_ref)) continue; if (nfsd_file_free(nf)) @@ -688,7 +689,7 @@ __nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) nfsd_file_rhash_params); if (!nf) break; - nfsd_file_unhash_and_dispose(nf, dispose); + nfsd_file_unhash_and_queue(nf, dispose); count++; } while (1); rcu_read_unlock(); @@ -890,7 +891,7 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { if (!net || nf->nf_net == net) - nfsd_file_unhash_and_dispose(nf, &dispose); + nfsd_file_unhash_and_queue(nf, &dispose); nf = rhashtable_walk_next(&iter); } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 3fcfeb7b560f..55e9e19cb1ec 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -850,10 +850,10 @@ DEFINE_EVENT(nfsd_file_class, name, \ TP_PROTO(struct nfsd_file *nf), \ TP_ARGS(nf)) -DEFINE_NFSD_FILE_EVENT(nfsd_file_put_final); +DEFINE_NFSD_FILE_EVENT(nfsd_file_free); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); -DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_dispose); +DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); TRACE_EVENT(nfsd_file_alloc, TP_PROTO( From 65469b10522fcd9ab30543c84a66d927df8c6828 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Nov 2022 16:22:48 -0400 Subject: [PATCH 118/177] NFSD: Add an nfsd_file_fsync tracepoint [ Upstream commit d7064eaf688cfe454c50db9f59298463d80d403c ] Add a tracepoint to capture the number of filecache-triggered fsync calls and which files needed it. Also, record when an fsync triggers a write verifier reset. Examples: <...>-97 [007] 262.505611: nfsd_file_free: inode=0xffff888171e08140 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d2400 <...>-97 [007] 262.505612: nfsd_file_fsync: inode=0xffff888171e08140 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d2400 ret=0 <...>-97 [007] 262.505623: nfsd_file_free: inode=0xffff888171e08dc0 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d1e00 <...>-97 [007] 262.505624: nfsd_file_fsync: inode=0xffff888171e08dc0 ref=0 flags=GC may=WRITE nf_file=0xffff8881373d1e00 ret=0 Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 5 ++++- fs/nfsd/trace.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index f54dd6695741..7c673f98f95c 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -314,10 +314,13 @@ static void nfsd_file_fsync(struct nfsd_file *nf) { struct file *file = nf->nf_file; + int ret; if (!file || !(file->f_mode & FMODE_WRITE)) return; - if (vfs_fsync(file, 1) != 0) + ret = vfs_fsync(file, 1); + trace_nfsd_file_fsync(nf, ret); + if (ret) nfsd_reset_write_verifier(net_generic(nf->nf_net, nfsd_net_id)); } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 55e9e19cb1ec..08e2738adf8f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -1182,6 +1182,37 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); +TRACE_EVENT(nfsd_file_fsync, + TP_PROTO( + const struct nfsd_file *nf, + int ret + ), + TP_ARGS(nf, ret), + TP_STRUCT__entry( + __field(void *, nf_inode) + __field(int, nf_ref) + __field(int, ret) + __field(unsigned long, nf_flags) + __field(unsigned char, nf_may) + __field(struct file *, nf_file) + ), + TP_fast_assign( + __entry->nf_inode = nf->nf_inode; + __entry->nf_ref = refcount_read(&nf->nf_ref); + __entry->ret = ret; + __entry->nf_flags = nf->nf_flags; + __entry->nf_may = nf->nf_may; + __entry->nf_file = nf->nf_file; + ), + TP_printk("inode=%p ref=%d flags=%s may=%s nf_file=%p ret=%d", + __entry->nf_inode, + __entry->nf_ref, + show_nf_flags(__entry->nf_flags), + show_nfsd_may_flags(__entry->nf_may), + __entry->nf_file, __entry->ret + ) +); + #include "cache.h" TRACE_DEFINE_ENUM(RC_DROPIT); From 0aba4dd7f269bf50407d8d1d776148e8246b2dcb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sun, 11 Dec 2022 06:19:33 -0500 Subject: [PATCH 119/177] nfsd: rework refcounting in filecache [ Upstream commit ac3a2585f018f10039b4a856dcb122da88c1c1c9 ] The filecache refcounting is a bit non-standard for something searchable by RCU, in that we maintain a sentinel reference while it's hashed. This in turn requires that we have to do things differently in the "put" depending on whether its hashed, which we believe to have led to races. There are other problems in here too. nfsd_file_close_inode_sync can end up freeing an nfsd_file while there are still outstanding references to it, and there are a number of subtle ToC/ToU races. Rework the code so that the refcount is what drives the lifecycle. When the refcount goes to zero, then unhash and rcu free the object. A task searching for a nfsd_file is allowed to bump its refcount, but only if it's not already 0. Ensure that we don't make any other changes to it until a reference is held. With this change, the LRU carries a reference. Take special care to deal with it when removing an entry from the list, and ensure that we only repurpose the nf_lru list_head when the refcount is 0 to ensure exclusive access to it. Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Stable-dep-of: 0b3a551fa58b ("nfsd: fix handling of cached open files in nfsd4_open codepath") Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 328 +++++++++++++++++++++++--------------------- fs/nfsd/trace.h | 51 +++---- 2 files changed, 194 insertions(+), 185 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 7c673f98f95c..9bf78506d071 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -302,8 +302,7 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may) if (key->gc) __set_bit(NFSD_FILE_GC, &nf->nf_flags); nf->nf_inode = key->inode; - /* nf_ref is pre-incremented for hash table */ - refcount_set(&nf->nf_ref, 2); + refcount_set(&nf->nf_ref, 1); nf->nf_may = key->need; nf->nf_mark = NULL; } @@ -355,24 +354,35 @@ nfsd_file_unhash(struct nfsd_file *nf) return false; } -static bool +static void nfsd_file_free(struct nfsd_file *nf) { s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime)); - bool flush = false; trace_nfsd_file_free(nf); this_cpu_inc(nfsd_file_releases); this_cpu_add(nfsd_file_total_age, age); + nfsd_file_unhash(nf); + + /* + * We call fsync here in order to catch writeback errors. It's not + * strictly required by the protocol, but an nfsd_file could get + * evicted from the cache before a COMMIT comes in. If another + * task were to open that file in the interim and scrape the error, + * then the client may never see it. By calling fsync here, we ensure + * that writeback happens before the entry is freed, and that any + * errors reported result in the write verifier changing. + */ + nfsd_file_fsync(nf); + if (nf->nf_mark) nfsd_file_mark_put(nf->nf_mark); if (nf->nf_file) { get_file(nf->nf_file); filp_close(nf->nf_file, NULL); fput(nf->nf_file); - flush = true; } /* @@ -380,10 +390,9 @@ nfsd_file_free(struct nfsd_file *nf) * WARN and leak it to preserve system stability. */ if (WARN_ON_ONCE(!list_empty(&nf->nf_lru))) - return flush; + return; call_rcu(&nf->nf_rcu, nfsd_file_slab_free); - return flush; } static bool @@ -399,17 +408,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf) mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); } -static void nfsd_file_lru_add(struct nfsd_file *nf) +static bool nfsd_file_lru_add(struct nfsd_file *nf) { set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); - if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_add(nf); + return true; + } + return false; } -static void nfsd_file_lru_remove(struct nfsd_file *nf) +static bool nfsd_file_lru_remove(struct nfsd_file *nf) { - if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) + if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) { trace_nfsd_file_lru_del(nf); + return true; + } + return false; } struct nfsd_file * @@ -420,54 +435,48 @@ nfsd_file_get(struct nfsd_file *nf) return NULL; } -static void -nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose) -{ - trace_nfsd_file_unhash_and_queue(nf); - if (nfsd_file_unhash(nf)) { - /* caller must call nfsd_file_dispose_list() later */ - nfsd_file_lru_remove(nf); - list_add(&nf->nf_lru, dispose); - } -} - -static void -nfsd_file_put_noref(struct nfsd_file *nf) -{ - trace_nfsd_file_put(nf); - - if (refcount_dec_and_test(&nf->nf_ref)) { - WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); - nfsd_file_lru_remove(nf); - nfsd_file_free(nf); - } -} - -static void -nfsd_file_unhash_and_put(struct nfsd_file *nf) -{ - if (nfsd_file_unhash(nf)) - nfsd_file_put_noref(nf); -} - +/** + * nfsd_file_put - put the reference to a nfsd_file + * @nf: nfsd_file of which to put the reference + * + * Put a reference to a nfsd_file. In the non-GC case, we just put the + * reference immediately. In the GC case, if the reference would be + * the last one, the put it on the LRU instead to be cleaned up later. + */ void nfsd_file_put(struct nfsd_file *nf) { might_sleep(); + trace_nfsd_file_put(nf); - if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) - nfsd_file_lru_add(nf); - else if (refcount_read(&nf->nf_ref) == 2) - nfsd_file_unhash_and_put(nf); + if (test_bit(NFSD_FILE_GC, &nf->nf_flags) && + test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + /* + * If this is the last reference (nf_ref == 1), then try to + * transfer it to the LRU. + */ + if (refcount_dec_not_one(&nf->nf_ref)) + return; - if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); - } else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) { - nfsd_file_put_noref(nf); - nfsd_file_schedule_laundrette(); - } else - nfsd_file_put_noref(nf); + /* Try to add it to the LRU. If that fails, decrement. */ + if (nfsd_file_lru_add(nf)) { + /* If it's still hashed, we're done */ + if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { + nfsd_file_schedule_laundrette(); + return; + } + + /* + * We're racing with unhashing, so try to remove it from + * the LRU. If removal fails, then someone else already + * has our reference. + */ + if (!nfsd_file_lru_remove(nf)) + return; + } + } + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); } static void @@ -475,33 +484,13 @@ nfsd_file_dispose_list(struct list_head *dispose) { struct nfsd_file *nf; - while(!list_empty(dispose)) { + while (!list_empty(dispose)) { nf = list_first_entry(dispose, struct nfsd_file, nf_lru); list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - nfsd_file_put_noref(nf); + nfsd_file_free(nf); } } -static void -nfsd_file_dispose_list_sync(struct list_head *dispose) -{ - bool flush = false; - struct nfsd_file *nf; - - while(!list_empty(dispose)) { - nf = list_first_entry(dispose, struct nfsd_file, nf_lru); - list_del_init(&nf->nf_lru); - nfsd_file_fsync(nf); - if (!refcount_dec_and_test(&nf->nf_ref)) - continue; - if (nfsd_file_free(nf)) - flush = true; - } - if (flush) - flush_delayed_fput(); -} - static void nfsd_file_list_remove_disposal(struct list_head *dst, struct nfsd_fcache_disposal *l) @@ -569,21 +558,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, struct list_head *head = arg; struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); - /* - * Do a lockless refcount check. The hashtable holds one reference, so - * we look to see if anything else has a reference, or if any have - * been put since the shrinker last ran. Those don't get unhashed and - * released. - * - * Note that in the put path, we set the flag and then decrement the - * counter. Here we check the counter and then test and clear the flag. - * That order is deliberate to ensure that we can do this locklessly. - */ - if (refcount_read(&nf->nf_ref) > 1) { - list_lru_isolate(lru, &nf->nf_lru); - trace_nfsd_file_gc_in_use(nf); - return LRU_REMOVED; - } + /* We should only be dealing with GC entries here */ + WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags)); /* * Don't throw out files that are still undergoing I/O or @@ -594,40 +570,30 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, return LRU_SKIP; } + /* If it was recently added to the list, skip it */ if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) { trace_nfsd_file_gc_referenced(nf); return LRU_ROTATE; } - if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { - trace_nfsd_file_gc_hashed(nf); - return LRU_SKIP; + /* + * Put the reference held on behalf of the LRU. If it wasn't the last + * one, then just remove it from the LRU and ignore it. + */ + if (!refcount_dec_and_test(&nf->nf_ref)) { + trace_nfsd_file_gc_in_use(nf); + list_lru_isolate(lru, &nf->nf_lru); + return LRU_REMOVED; } + /* Refcount went to zero. Unhash it and queue it to the dispose list */ + nfsd_file_unhash(nf); list_lru_isolate_move(lru, &nf->nf_lru, head); this_cpu_inc(nfsd_file_evictions); trace_nfsd_file_gc_disposed(nf); return LRU_REMOVED; } -/* - * Unhash items on @dispose immediately, then queue them on the - * disposal workqueue to finish releasing them in the background. - * - * cel: Note that between the time list_lru_shrink_walk runs and - * now, these items are in the hash table but marked unhashed. - * Why release these outside of lru_cb ? There's no lock ordering - * problem since lru_cb currently takes no lock. - */ -static void nfsd_file_gc_dispose_list(struct list_head *dispose) -{ - struct nfsd_file *nf; - - list_for_each_entry(nf, dispose, nf_lru) - nfsd_file_hash_remove(nf); - nfsd_file_dispose_list_delayed(dispose); -} - static void nfsd_file_gc(void) { @@ -637,7 +603,7 @@ nfsd_file_gc(void) ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb, &dispose, list_lru_count(&nfsd_file_lru)); trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); } static void @@ -662,7 +628,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) ret = list_lru_shrink_walk(&nfsd_file_lru, sc, nfsd_file_lru_cb, &dispose); trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru)); - nfsd_file_gc_dispose_list(&dispose); + nfsd_file_dispose_list_delayed(&dispose); return ret; } @@ -672,72 +638,111 @@ static struct shrinker nfsd_file_shrinker = { .seeks = 1, }; -/* - * Find all cache items across all net namespaces that match @inode and - * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire(). +/** + * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode + * @inode: inode on which to close out nfsd_files + * @dispose: list on which to gather nfsd_files to close out + * + * An nfsd_file represents a struct file being held open on behalf of nfsd. An + * open file however can block other activity (such as leases), or cause + * undesirable behavior (e.g. spurious silly-renames when reexporting NFS). + * + * This function is intended to find open nfsd_files when this sort of + * conflicting access occurs and then attempt to close those files out. + * + * Populates the dispose list with entries that have already had their + * refcounts go to zero. The actual free of an nfsd_file can be expensive, + * so we leave it up to the caller whether it wants to wait or not. */ -static unsigned int -__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose) +static void +nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_INODE, .inode = inode, }; - unsigned int count = 0; struct nfsd_file *nf; rcu_read_lock(); do { + int decrement = 1; + nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key, nfsd_file_rhash_params); if (!nf) break; - nfsd_file_unhash_and_queue(nf, dispose); - count++; + + /* If we raced with someone else unhashing, ignore it */ + if (!nfsd_file_unhash(nf)) + continue; + + /* If we can't get a reference, ignore it */ + if (!nfsd_file_get(nf)) + continue; + + /* Extra decrement if we remove from the LRU */ + if (nfsd_file_lru_remove(nf)) + ++decrement; + + /* If refcount goes to 0, then put on the dispose list */ + if (refcount_sub_and_test(decrement, &nf->nf_ref)) { + list_add(&nf->nf_lru, dispose); + trace_nfsd_file_closing(nf); + } } while (1); rcu_read_unlock(); - return count; -} - -/** - * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file - * @inode: inode of the file to attempt to remove - * - * Unhash and put, then flush and fput all cache items associated with @inode. - */ -void -nfsd_file_close_inode_sync(struct inode *inode) -{ - LIST_HEAD(dispose); - unsigned int count; - - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode_sync(inode, count); - nfsd_file_dispose_list_sync(&dispose); } /** * nfsd_file_close_inode - attempt a delayed close of a nfsd_file * @inode: inode of the file to attempt to remove * - * Unhash and put all cache item associated with @inode. + * Close out any open nfsd_files that can be reaped for @inode. The + * actual freeing is deferred to the dispose_list_delayed infrastructure. + * + * This is used by the fsnotify callbacks and setlease notifier. */ static void nfsd_file_close_inode(struct inode *inode) { LIST_HEAD(dispose); - unsigned int count; - count = __nfsd_file_close_inode(inode, &dispose); - trace_nfsd_file_close_inode(inode, count); + nfsd_file_queue_for_close(inode, &dispose); nfsd_file_dispose_list_delayed(&dispose); } +/** + * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file + * @inode: inode of the file to attempt to remove + * + * Close out any open nfsd_files that can be reaped for @inode. The + * nfsd_files are closed out synchronously. + * + * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames + * when reexporting NFS. + */ +void +nfsd_file_close_inode_sync(struct inode *inode) +{ + struct nfsd_file *nf; + LIST_HEAD(dispose); + + trace_nfsd_file_close(inode); + + nfsd_file_queue_for_close(inode, &dispose); + while (!list_empty(&dispose)) { + nf = list_first_entry(&dispose, struct nfsd_file, nf_lru); + list_del_init(&nf->nf_lru); + nfsd_file_free(nf); + } + flush_delayed_fput(); +} + /** * nfsd_file_delayed_close - close unused nfsd_files * @work: dummy * - * Walk the LRU list and close any entries that have not been used since + * Walk the LRU list and destroy any entries that have not been used since * the last scan. */ static void @@ -759,7 +764,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, /* Only close files for F_SETLEASE leases */ if (fl->fl_flags & FL_LEASE) - nfsd_file_close_inode_sync(file_inode(fl->fl_file)); + nfsd_file_close_inode(file_inode(fl->fl_file)); return 0; } @@ -880,6 +885,13 @@ out_err: goto out; } +/** + * __nfsd_file_cache_purge: clean out the cache for shutdown + * @net: net-namespace to shut down the cache (may be NULL) + * + * Walk the nfsd_file cache and close out any that match @net. If @net is NULL, + * then close out everything. Called when an nfsd instance is being shut down. + */ static void __nfsd_file_cache_purge(struct net *net) { @@ -893,8 +905,11 @@ __nfsd_file_cache_purge(struct net *net) nf = rhashtable_walk_next(&iter); while (!IS_ERR_OR_NULL(nf)) { - if (!net || nf->nf_net == net) - nfsd_file_unhash_and_queue(nf, &dispose); + if (!net || nf->nf_net == net) { + nfsd_file_unhash(nf); + nfsd_file_lru_remove(nf); + list_add(&nf->nf_lru, &dispose); + } nf = rhashtable_walk_next(&iter); } @@ -1061,8 +1076,12 @@ retry: if (nf) nf = nfsd_file_get(nf); rcu_read_unlock(); - if (nf) + + if (nf) { + if (nfsd_file_lru_remove(nf)) + WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref)); goto wait_for_construction; + } nf = nfsd_file_alloc(&key, may_flags); if (!nf) { @@ -1095,11 +1114,11 @@ wait_for_construction: goto out; } open_retry = false; - nfsd_file_put_noref(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); goto retry; } - nfsd_file_lru_remove(nf); this_cpu_inc(nfsd_file_cache_hits); status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); @@ -1109,7 +1128,8 @@ out: this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { - nfsd_file_put(nf); + if (refcount_dec_and_test(&nf->nf_ref)) + nfsd_file_free(nf); nf = NULL; } @@ -1135,8 +1155,10 @@ open_file: * If construction failed, or we raced with a call to unlink() * then unhash. */ - if (status != nfs_ok || key.inode->i_nlink == 0) - nfsd_file_unhash_and_put(nf); + if (status == nfs_ok && key.inode->i_nlink == 0) + status = nfserr_jukebox; + if (status != nfs_ok) + nfsd_file_unhash(nf); clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); smp_mb__after_atomic(); wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 08e2738adf8f..4feeaed32541 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -817,8 +817,8 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}, \ - { 1 << NFSD_FILE_GC, "GC"}) + { 1 << NFSD_FILE_REFERENCED, "REFERENCED" }, \ + { 1 << NFSD_FILE_GC, "GC" }) DECLARE_EVENT_CLASS(nfsd_file_class, TP_PROTO(struct nfsd_file *nf), @@ -853,6 +853,7 @@ DEFINE_EVENT(nfsd_file_class, name, \ DEFINE_NFSD_FILE_EVENT(nfsd_file_free); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash); DEFINE_NFSD_FILE_EVENT(nfsd_file_put); +DEFINE_NFSD_FILE_EVENT(nfsd_file_closing); DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue); TRACE_EVENT(nfsd_file_alloc, @@ -1044,35 +1045,6 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) -DECLARE_EVENT_CLASS(nfsd_file_search_class, - TP_PROTO( - const struct inode *inode, - unsigned int count - ), - TP_ARGS(inode, count), - TP_STRUCT__entry( - __field(const struct inode *, inode) - __field(unsigned int, count) - ), - TP_fast_assign( - __entry->inode = inode; - __entry->count = count; - ), - TP_printk("inode=%p count=%u", - __entry->inode, __entry->count) -); - -#define DEFINE_NFSD_FILE_SEARCH_EVENT(name) \ -DEFINE_EVENT(nfsd_file_search_class, name, \ - TP_PROTO( \ - const struct inode *inode, \ - unsigned int count \ - ), \ - TP_ARGS(inode, count)) - -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync); -DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode); - TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, @@ -1150,7 +1122,6 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced); -DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed); DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed); DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class, @@ -1182,6 +1153,22 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name, \ DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed); DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed); +TRACE_EVENT(nfsd_file_close, + TP_PROTO( + const struct inode *inode + ), + TP_ARGS(inode), + TP_STRUCT__entry( + __field(const void *, inode) + ), + TP_fast_assign( + __entry->inode = inode; + ), + TP_printk("inode=%p", + __entry->inode + ) +); + TRACE_EVENT(nfsd_file_fsync, TP_PROTO( const struct nfsd_file *nf, From 973acfdfe90c8a4e58ade97ff0653a498531ff2e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 5 Jan 2023 14:55:56 -0500 Subject: [PATCH 120/177] nfsd: fix handling of cached open files in nfsd4_open codepath [ Upstream commit 0b3a551fa58b4da941efeb209b3770868e2eddd7 ] Commit fb70bf124b05 ("NFSD: Instantiate a struct file when creating a regular NFSv4 file") added the ability to cache an open fd over a compound. There are a couple of problems with the way this currently works: It's racy, as a newly-created nfsd_file can end up with its PENDING bit cleared while the nf is hashed, and the nf_file pointer is still zeroed out. Other tasks can find it in this state and they expect to see a valid nf_file, and can oops if nf_file is NULL. Also, there is no guarantee that we'll end up creating a new nfsd_file if one is already in the hash. If an extant entry is in the hash with a valid nf_file, nfs4_get_vfs_file will clobber its nf_file pointer with the value of op_file and the old nf_file will leak. Fix both issues by making a new nfsd_file_acquirei_opened variant that takes an optional file pointer. If one is present when this is called, we'll take a new reference to it instead of trying to open the file. If the nfsd_file already has a valid nf_file, we'll just ignore the optional file and pass the nfsd_file back as-is. Also rework the tracepoints a bit to allow for an "opened" variant and don't try to avoid counting acquisitions in the case where we already have a cached open file. Fixes: fb70bf124b05 ("NFSD: Instantiate a struct file when creating a regular NFSv4 file") Cc: Trond Myklebust Reported-by: Stanislav Saner Reported-and-Tested-by: Ruben Vestergaard Reported-and-Tested-by: Torkil Svensgaard Signed-off-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Sasha Levin --- fs/nfsd/filecache.c | 40 ++++++++++++++++++---------------- fs/nfsd/filecache.h | 5 +++-- fs/nfsd/nfs4state.c | 16 ++++---------- fs/nfsd/trace.h | 52 ++++++++++++--------------------------------- 4 files changed, 42 insertions(+), 71 deletions(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 9bf78506d071..ea6fb0e6b165 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1048,8 +1048,8 @@ nfsd_file_is_cached(struct inode *inode) static __be32 nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf, - bool open, bool want_gc) + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf, bool want_gc) { struct nfsd_file_lookup_key key = { .type = NFSD_FILE_KEY_FULL, @@ -1124,8 +1124,7 @@ wait_for_construction: status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { - if (open) - this_cpu_inc(nfsd_file_acquisitions); + this_cpu_inc(nfsd_file_acquisitions); *pnf = nf; } else { if (refcount_dec_and_test(&nf->nf_ref)) @@ -1135,20 +1134,23 @@ out: out_status: put_cred(key.cred); - if (open) - trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); + trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status); return status; open_file: trace_nfsd_file_alloc(nf); nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode); if (nf->nf_mark) { - if (open) { + if (file) { + get_file(file); + nf->nf_file = file; + status = nfs_ok; + trace_nfsd_file_opened(nf, status); + } else { status = nfsd_open_verified(rqstp, fhp, may_flags, &nf->nf_file); trace_nfsd_file_open(nf, status); - } else - status = nfs_ok; + } } else status = nfserr_jukebox; /* @@ -1184,7 +1186,7 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, true); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, true); } /** @@ -1205,28 +1207,30 @@ __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, true, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, NULL, pnf, false); } /** - * nfsd_file_create - Get a struct nfsd_file, do not open + * nfsd_file_acquire_opened - Get a struct nfsd_file using existing open file * @rqstp: the RPC transaction being executed * @fhp: the NFS filehandle of the file just created * @may_flags: NFSD_MAY_ settings for the file + * @file: cached, already-open file (may be NULL) * @pnf: OUT: new or found "struct nfsd_file" object * - * The nfsd_file_object returned by this API is reference-counted - * but not garbage-collected. The object is released immediately - * one RCU grace period after the final nfsd_file_put(). + * Acquire a nfsd_file object that is not GC'ed. If one doesn't already exist, + * and @file is non-NULL, use it to instantiate a new nfsd_file instead of + * opening a new one. * * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in * network byte order is returned. */ __be32 -nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **pnf) +nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **pnf) { - return nfsd_file_do_acquire(rqstp, fhp, may_flags, pnf, false, false); + return nfsd_file_do_acquire(rqstp, fhp, may_flags, file, pnf, false); } /* diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index b7efb2c3ddb1..41516a4263ea 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -60,7 +60,8 @@ __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); __be32 nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned int may_flags, struct nfsd_file **nfp); -__be32 nfsd_file_create(struct svc_rqst *rqstp, struct svc_fh *fhp, - unsigned int may_flags, struct nfsd_file **nfp); +__be32 nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp, + unsigned int may_flags, struct file *file, + struct nfsd_file **nfp); int nfsd_file_cache_stats_show(struct seq_file *m, void *v); #endif /* _FS_NFSD_FILECACHE_H */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 16c3e991ddcc..2247d107da90 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -5211,18 +5211,10 @@ static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, if (!fp->fi_fds[oflag]) { spin_unlock(&fp->fi_lock); - if (!open->op_filp) { - status = nfsd_file_acquire(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - } else { - status = nfsd_file_create(rqstp, cur_fh, access, &nf); - if (status != nfs_ok) - goto out_put_access; - nf->nf_file = open->op_filp; - open->op_filp = NULL; - trace_nfsd_file_create(rqstp, access, nf); - } + status = nfsd_file_acquire_opened(rqstp, cur_fh, access, + open->op_filp, &nf); + if (status != nfs_ok) + goto out_put_access; spin_lock(&fp->fi_lock); if (!fp->fi_fds[oflag]) { diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index 4feeaed32541..4eb4e1039c7f 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -922,43 +922,6 @@ TRACE_EVENT(nfsd_file_acquire, ) ); -TRACE_EVENT(nfsd_file_create, - TP_PROTO( - const struct svc_rqst *rqstp, - unsigned int may_flags, - const struct nfsd_file *nf - ), - - TP_ARGS(rqstp, may_flags, nf), - - TP_STRUCT__entry( - __field(const void *, nf_inode) - __field(const void *, nf_file) - __field(unsigned long, may_flags) - __field(unsigned long, nf_flags) - __field(unsigned long, nf_may) - __field(unsigned int, nf_ref) - __field(u32, xid) - ), - - TP_fast_assign( - __entry->nf_inode = nf->nf_inode; - __entry->nf_file = nf->nf_file; - __entry->may_flags = may_flags; - __entry->nf_flags = nf->nf_flags; - __entry->nf_may = nf->nf_may; - __entry->nf_ref = refcount_read(&nf->nf_ref); - __entry->xid = be32_to_cpu(rqstp->rq_xid); - ), - - TP_printk("xid=0x%x inode=%p may_flags=%s ref=%u nf_flags=%s nf_may=%s nf_file=%p", - __entry->xid, __entry->nf_inode, - show_nfsd_may_flags(__entry->may_flags), - __entry->nf_ref, show_nf_flags(__entry->nf_flags), - show_nfsd_may_flags(__entry->nf_may), __entry->nf_file - ) -); - TRACE_EVENT(nfsd_file_insert_err, TP_PROTO( const struct svc_rqst *rqstp, @@ -1020,8 +983,8 @@ TRACE_EVENT(nfsd_file_cons_err, ) ); -TRACE_EVENT(nfsd_file_open, - TP_PROTO(struct nfsd_file *nf, __be32 status), +DECLARE_EVENT_CLASS(nfsd_file_open_class, + TP_PROTO(const struct nfsd_file *nf, __be32 status), TP_ARGS(nf, status), TP_STRUCT__entry( __field(void *, nf_inode) /* cannot be dereferenced */ @@ -1045,6 +1008,17 @@ TRACE_EVENT(nfsd_file_open, __entry->nf_file) ) +#define DEFINE_NFSD_FILE_OPEN_EVENT(name) \ +DEFINE_EVENT(nfsd_file_open_class, name, \ + TP_PROTO( \ + const struct nfsd_file *nf, \ + __be32 status \ + ), \ + TP_ARGS(nf, status)) + +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_open); +DEFINE_NFSD_FILE_OPEN_EVENT(nfsd_file_opened); + TRACE_EVENT(nfsd_file_is_cached, TP_PROTO( const struct inode *inode, From d995fadd7f09c40babda10329d90e30bec5f2356 Mon Sep 17 00:00:00 2001 From: Angela Czubak Date: Thu, 5 Jan 2023 21:31:07 +0530 Subject: [PATCH 121/177] octeontx2-af: Fix LMAC config in cgx_lmac_rx_tx_enable [ Upstream commit b4e9b8763e417db31c7088103cc557d55cb7a8f5 ] PF netdev can request AF to enable or disable reception and transmission on assigned CGX::LMAC. The current code instead of disabling or enabling 'reception and transmission' also disables/enable the LMAC. This patch fixes this issue. Fixes: 1435f66a28b4 ("octeontx2-af: CGX Rx/Tx enable/disable mbox handlers") Signed-off-by: Angela Czubak Signed-off-by: Hariprasad Kelam Reviewed-by: Leon Romanovsky Link: https://lore.kernel.org/r/20230105160107.17638-1-hkelam@marvell.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/af/cgx.c | 4 ++-- drivers/net/ethernet/marvell/octeontx2/af/cgx.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index c8724bfa86b0..8fdd3afe5998 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -768,9 +768,9 @@ int cgx_lmac_rx_tx_enable(void *cgxd, int lmac_id, bool enable) cfg = cgx_read(cgx, lmac_id, CGXX_CMRX_CFG); if (enable) - cfg |= CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN; + cfg |= DATA_PKT_RX_EN | DATA_PKT_TX_EN; else - cfg &= ~(CMR_EN | DATA_PKT_RX_EN | DATA_PKT_TX_EN); + cfg &= ~(DATA_PKT_RX_EN | DATA_PKT_TX_EN); cgx_write(cgx, lmac_id, CGXX_CMRX_CFG, cfg); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 0b06788b8d80..04338db38671 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -30,7 +30,6 @@ #define CMR_P2X_SEL_SHIFT 59ULL #define CMR_P2X_SEL_NIX0 1ULL #define CMR_P2X_SEL_NIX1 2ULL -#define CMR_EN BIT_ULL(55) #define DATA_PKT_TX_EN BIT_ULL(53) #define DATA_PKT_RX_EN BIT_ULL(54) #define CGX_LMAC_TYPE_SHIFT 40 From 92b0051217f23cba7541d6c4ecbca026477fb642 Mon Sep 17 00:00:00 2001 From: Yair Podemsky Date: Wed, 30 Nov 2022 14:51:21 +0200 Subject: [PATCH 122/177] sched/core: Fix arch_scale_freq_tick() on tickless systems [ Upstream commit 7fb3ff22ad8772bbf0e3ce1ef3eb7b09f431807f ] In order for the scheduler to be frequency invariant we measure the ratio between the maximum CPU frequency and the actual CPU frequency. During long tickless periods of time the calculations that keep track of that might overflow, in the function scale_freq_tick(): if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) goto error; eventually forcing the kernel to disable the feature for all CPUs, and show the warning message: "Scheduler frequency invariance went wobbly, disabling!". Let's avoid that by limiting the frequency invariant calculations to CPUs with regular tick. Fixes: e2b0d619b400 ("x86, sched: check for counters overflow in frequency invariant accounting") Suggested-by: "Peter Zijlstra (Intel)" Signed-off-by: Yair Podemsky Signed-off-by: Ingo Molnar Reviewed-by: Valentin Schneider Acked-by: Giovanni Gherdovich Link: https://lore.kernel.org/r/20221130125121.34407-1-ypodemsk@redhat.com Signed-off-by: Sasha Levin --- kernel/sched/core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 981e41cc4121..172ec79b66f6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5498,7 +5498,9 @@ void scheduler_tick(void) unsigned long thermal_pressure; u64 resched_latency; - arch_scale_freq_tick(); + if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + arch_scale_freq_tick(); + sched_clock_tick(); rq_lock(rq, &rf); From 7bed0d49ca35e281db93aaa5f631591cb91dd665 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 30 Nov 2022 17:36:02 +0100 Subject: [PATCH 123/177] hvc/xen: lock console list traversal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit c0dccad87cf68fc6012aec7567e354353097ec1a ] The currently lockless access to the xen console list in vtermno_to_xencons() is incorrect, as additions and removals from the list can happen anytime, and as such the traversal of the list to get the private console data for a given termno needs to happen with the lock held. Note users that modify the list already do so with the lock taken. Adjust current lock takers to use the _irq{save,restore} helpers, since the context in which vtermno_to_xencons() is called can have interrupts disabled. Use the _irq{save,restore} set of helpers to switch the current callers to disable interrupts in the locked region. I haven't checked if existing users could instead use the _irq variant, as I think it's safer to use _irq{save,restore} upfront. While there switch from using list_for_each_entry_safe to list_for_each_entry: the current entry cursor won't be removed as part of the code in the loop body, so using the _safe variant is pointless. Fixes: 02e19f9c7cac ('hvc_xen: implement multiconsole support') Signed-off-by: Roger Pau Monné Reviewed-by: Stefano Stabellini Link: https://lore.kernel.org/r/20221130163611.14686-1-roger.pau@citrix.com Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/tty/hvc/hvc_xen.c | 46 ++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 7c23112dc923..37809c6c027f 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -52,17 +52,22 @@ static DEFINE_SPINLOCK(xencons_lock); static struct xencons_info *vtermno_to_xencons(int vtermno) { - struct xencons_info *entry, *n, *ret = NULL; + struct xencons_info *entry, *ret = NULL; + unsigned long flags; - if (list_empty(&xenconsoles)) - return NULL; + spin_lock_irqsave(&xencons_lock, flags); + if (list_empty(&xenconsoles)) { + spin_unlock_irqrestore(&xencons_lock, flags); + return NULL; + } - list_for_each_entry_safe(entry, n, &xenconsoles, list) { + list_for_each_entry(entry, &xenconsoles, list) { if (entry->vtermno == vtermno) { ret = entry; break; } } + spin_unlock_irqrestore(&xencons_lock, flags); return ret; } @@ -223,7 +228,7 @@ static int xen_hvm_console_init(void) { int r; uint64_t v = 0; - unsigned long gfn; + unsigned long gfn, flags; struct xencons_info *info; if (!xen_hvm_domain()) @@ -258,9 +263,9 @@ static int xen_hvm_console_init(void) goto err; info->vtermno = HVC_COOKIE; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; err: @@ -283,6 +288,7 @@ static int xencons_info_pv_init(struct xencons_info *info, int vtermno) static int xen_pv_console_init(void) { struct xencons_info *info; + unsigned long flags; if (!xen_pv_domain()) return -ENODEV; @@ -299,9 +305,9 @@ static int xen_pv_console_init(void) /* already configured */ return 0; } - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); xencons_info_pv_init(info, HVC_COOKIE); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -309,6 +315,7 @@ static int xen_pv_console_init(void) static int xen_initial_domain_console_init(void) { struct xencons_info *info; + unsigned long flags; if (!xen_initial_domain()) return -ENODEV; @@ -323,9 +330,9 @@ static int xen_initial_domain_console_init(void) info->irq = bind_virq_to_irq(VIRQ_CONSOLE, 0, false); info->vtermno = HVC_COOKIE; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; } @@ -380,10 +387,12 @@ static void xencons_free(struct xencons_info *info) static int xen_console_remove(struct xencons_info *info) { + unsigned long flags; + xencons_disconnect_backend(info); - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_del(&info->list); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); if (info->xbdev != NULL) xencons_free(info); else { @@ -464,6 +473,7 @@ static int xencons_probe(struct xenbus_device *dev, { int ret, devid; struct xencons_info *info; + unsigned long flags; devid = dev->nodename[strlen(dev->nodename) - 1] - '0'; if (devid == 0) @@ -482,9 +492,9 @@ static int xencons_probe(struct xenbus_device *dev, ret = xencons_connect_backend(dev, info); if (ret < 0) goto error; - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_add_tail(&info->list, &xenconsoles); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); return 0; @@ -584,10 +594,12 @@ static int __init xen_hvc_init(void) info->hvc = hvc_alloc(HVC_COOKIE, info->irq, ops, 256); if (IS_ERR(info->hvc)) { + unsigned long flags; + r = PTR_ERR(info->hvc); - spin_lock(&xencons_lock); + spin_lock_irqsave(&xencons_lock, flags); list_del(&info->list); - spin_unlock(&xencons_lock); + spin_unlock_irqrestore(&xencons_lock, flags); if (info->irq) unbind_from_irqhandler(info->irq, NULL); kfree(info); From 8998db5021a28ad67aa8d627bdb4226e4046ccc4 Mon Sep 17 00:00:00 2001 From: Minsuk Kang Date: Fri, 6 Jan 2023 17:23:44 +0900 Subject: [PATCH 124/177] nfc: pn533: Wait for out_urb's completion in pn533_usb_send_frame() [ Upstream commit 9dab880d675b9d0dd56c6428e4e8352a3339371d ] Fix a use-after-free that occurs in hcd when in_urb sent from pn533_usb_send_frame() is completed earlier than out_urb. Its callback frees the skb data in pn533_send_async_complete() that is used as a transfer buffer of out_urb. Wait before sending in_urb until the callback of out_urb is called. To modify the callback of out_urb alone, separate the complete function of out_urb and ack_urb. Found by a modified version of syzkaller. BUG: KASAN: use-after-free in dummy_timer Call Trace: memcpy (mm/kasan/shadow.c:65) dummy_perform_transfer (drivers/usb/gadget/udc/dummy_hcd.c:1352) transfer (drivers/usb/gadget/udc/dummy_hcd.c:1453) dummy_timer (drivers/usb/gadget/udc/dummy_hcd.c:1972) arch_static_branch (arch/x86/include/asm/jump_label.h:27) static_key_false (include/linux/jump_label.h:207) timer_expire_exit (include/trace/events/timer.h:127) call_timer_fn (kernel/time/timer.c:1475) expire_timers (kernel/time/timer.c:1519) __run_timers (kernel/time/timer.c:1790) run_timer_softirq (kernel/time/timer.c:1803) Fixes: c46ee38620a2 ("NFC: pn533: add NXP pn533 nfc device driver") Signed-off-by: Minsuk Kang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/nfc/pn533/usb.c | 44 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 6f71ac72012e..ed9c5e2cf3ad 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -153,10 +153,17 @@ static int pn533_usb_send_ack(struct pn533 *dev, gfp_t flags) return usb_submit_urb(phy->ack_urb, flags); } +struct pn533_out_arg { + struct pn533_usb_phy *phy; + struct completion done; +}; + static int pn533_usb_send_frame(struct pn533 *dev, struct sk_buff *out) { struct pn533_usb_phy *phy = dev->phy; + struct pn533_out_arg arg; + void *cntx; int rc; if (phy->priv == NULL) @@ -168,10 +175,17 @@ static int pn533_usb_send_frame(struct pn533 *dev, print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1, out->data, out->len, false); + init_completion(&arg.done); + cntx = phy->out_urb->context; + phy->out_urb->context = &arg; + rc = usb_submit_urb(phy->out_urb, GFP_KERNEL); if (rc) return rc; + wait_for_completion(&arg.done); + phy->out_urb->context = cntx; + if (dev->protocol_type == PN533_PROTO_REQ_RESP) { /* request for response for sent packet directly */ rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); @@ -408,7 +422,31 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) return arg.rc; } -static void pn533_send_complete(struct urb *urb) +static void pn533_out_complete(struct urb *urb) +{ + struct pn533_out_arg *arg = urb->context; + struct pn533_usb_phy *phy = arg->phy; + + switch (urb->status) { + case 0: + break; /* success */ + case -ECONNRESET: + case -ENOENT: + dev_dbg(&phy->udev->dev, + "The urb has been stopped (status %d)\n", + urb->status); + break; + case -ESHUTDOWN: + default: + nfc_err(&phy->udev->dev, + "Urb failure (status %d)\n", + urb->status); + } + + complete(&arg->done); +} + +static void pn533_ack_complete(struct urb *urb) { struct pn533_usb_phy *phy = urb->context; @@ -496,10 +534,10 @@ static int pn533_usb_probe(struct usb_interface *interface, usb_fill_bulk_urb(phy->out_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), - NULL, 0, pn533_send_complete, phy); + NULL, 0, pn533_out_complete, phy); usb_fill_bulk_urb(phy->ack_urb, phy->udev, usb_sndbulkpipe(phy->udev, out_endpoint), - NULL, 0, pn533_send_complete, phy); + NULL, 0, pn533_ack_complete, phy); switch (id->driver_info) { case PN533_DEVICE_STD: From df9cddce8f99a47af8a8cf035ef367e9aa256fa5 Mon Sep 17 00:00:00 2001 From: Richard Gobert Date: Tue, 8 Nov 2022 13:33:28 +0100 Subject: [PATCH 125/177] gro: avoid checking for a failed search [ Upstream commit e081ecf084d31809242fb0b9f35484d5fb3a161a ] After searching for a protocol handler in dev_gro_receive, checking for failure is redundant. Skip the failure code after finding the corresponding handler. Suggested-by: Eric Dumazet Signed-off-by: Richard Gobert Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20221108123320.GA59373@debian Signed-off-by: Paolo Abeni Stable-dep-of: 7871f54e3dee ("gro: take care of DODGY packets") Signed-off-by: Sasha Levin --- net/core/gro.c | 72 +++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/net/core/gro.c b/net/core/gro.c index bc9451743307..8e0fe85a647d 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -489,45 +489,45 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || !ptype->callbacks.gro_receive) - continue; - - skb_set_network_header(skb, skb_gro_offset(skb)); - skb_reset_mac_len(skb); - BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32)); - BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), - sizeof(u32))); /* Avoid slow unaligned acc */ - *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; - NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb); - NAPI_GRO_CB(skb)->is_atomic = 1; - NAPI_GRO_CB(skb)->count = 1; - if (unlikely(skb_is_gso(skb))) { - NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; - /* Only support TCP at the moment. */ - if (!skb_is_gso_tcp(skb)) - NAPI_GRO_CB(skb)->flush = 1; - } - - /* Setup for GRO checksum validation */ - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - NAPI_GRO_CB(skb)->csum = skb->csum; - NAPI_GRO_CB(skb)->csum_valid = 1; - break; - case CHECKSUM_UNNECESSARY: - NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; - break; - } - - pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, - ipv6_gro_receive, inet_gro_receive, - &gro_list->list, skb); - break; + if (ptype->type == type && ptype->callbacks.gro_receive) + goto found_ptype; } rcu_read_unlock(); + goto normal; - if (&ptype->list == head) - goto normal; +found_ptype: + skb_set_network_header(skb, skb_gro_offset(skb)); + skb_reset_mac_len(skb); + BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32)); + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), + sizeof(u32))); /* Avoid slow unaligned acc */ + *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; + NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb); + NAPI_GRO_CB(skb)->is_atomic = 1; + NAPI_GRO_CB(skb)->count = 1; + if (unlikely(skb_is_gso(skb))) { + NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; + /* Only support TCP at the moment. */ + if (!skb_is_gso_tcp(skb)) + NAPI_GRO_CB(skb)->flush = 1; + } + + /* Setup for GRO checksum validation */ + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + NAPI_GRO_CB(skb)->csum = skb->csum; + NAPI_GRO_CB(skb)->csum_valid = 1; + break; + case CHECKSUM_UNNECESSARY: + NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; + break; + } + + pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, + ipv6_gro_receive, inet_gro_receive, + &gro_list->list, skb); + + rcu_read_unlock(); if (PTR_ERR(pp) == -EINPROGRESS) { ret = GRO_CONSUMED; From 0b1605e45c36968a508703e08bf0947a68732841 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Jan 2023 14:25:23 +0000 Subject: [PATCH 126/177] gro: take care of DODGY packets [ Upstream commit 7871f54e3deed68a27111dda162c4fe9b9c65f8f ] Jaroslav reported a recent throughput regression with virtio_net caused by blamed commit. It is unclear if DODGY GSO packets coming from user space can be accepted by GRO engine in the future with minimal changes, and if there is any expected gain from it. In the meantime, make sure to detect and flush DODGY packets. Fixes: 5eddb24901ee ("gro: add support of (hw)gro packets to gro stack") Signed-off-by: Eric Dumazet Reported-and-bisected-by: Jaroslav Pulchart Cc: Coco Li Cc: Paolo Abeni Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/gro.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/gro.c b/net/core/gro.c index 8e0fe85a647d..1b4abfb9a7a1 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -507,8 +507,9 @@ found_ptype: NAPI_GRO_CB(skb)->count = 1; if (unlikely(skb_is_gso(skb))) { NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; - /* Only support TCP at the moment. */ - if (!skb_is_gso_tcp(skb)) + /* Only support TCP and non DODGY users. */ + if (!skb_is_gso_tcp(skb) || + (skb_shinfo(skb)->gso_type & SKB_GSO_DODGY)) NAPI_GRO_CB(skb)->flush = 1; } From 31a997c40a8aeb079607e6a0164ecbfedc62ea36 Mon Sep 17 00:00:00 2001 From: Mirsad Goran Todorovac Date: Sat, 7 Jan 2023 04:40:20 +0100 Subject: [PATCH 127/177] af_unix: selftest: Fix the size of the parameter to connect() [ Upstream commit 7d6ceeb1875cc08dc3d1e558e191434d94840cd5 ] Adjust size parameter in connect() to match the type of the parameter, to fix "No such file or directory" error in selftests/net/af_unix/ test_oob_unix.c:127. The existing code happens to work provided that the autogenerated pathname is shorter than sizeof (struct sockaddr), which is why it hasn't been noticed earlier. Visible from the trace excerpt: bind(3, {sa_family=AF_UNIX, sun_path="unix_oob_453059"}, 110) = 0 clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fa6a6577a10) = 453060 [pid ] connect(6, {sa_family=AF_UNIX, sun_path="unix_oob_45305"}, 16) = -1 ENOENT (No such file or directory) BUG: The filename is trimmed to sizeof (struct sockaddr). Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Shuah Khan Cc: Kuniyuki Iwashima Cc: Florian Westphal Reviewed-by: Florian Westphal Fixes: 314001f0bf92 ("af_unix: Add OOB support") Signed-off-by: Mirsad Goran Todorovac Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- tools/testing/selftests/net/af_unix/test_unix_oob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index b57e91e1c3f2..532459a15067 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -124,7 +124,7 @@ void producer(struct sockaddr_un *consumer_addr) wait_for_signal(pipefd[0]); if (connect(cfd, (struct sockaddr *)consumer_addr, - sizeof(struct sockaddr)) != 0) { + sizeof(*consumer_addr)) != 0) { perror("Connect failed"); kill(0, SIGTERM); exit(1); From c878ac66db429530a54518305e302f0675e44a98 Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Sat, 31 Dec 2022 12:55:06 +0100 Subject: [PATCH 128/177] ASoC: qcom: Fix building APQ8016 machine driver without SOUNDWIRE [ Upstream commit 0cbf1ecd8c4801ec7566231491f7ad9cec31098b ] Older Qualcomm platforms like APQ8016 do not have hardware support for SoundWire, so kernel configurations made specifically for those platforms will usually not have CONFIG_SOUNDWIRE enabled. Unfortunately commit 8d89cf6ff229 ("ASoC: qcom: cleanup and fix dependency of QCOM_COMMON") breaks those kernel configurations, because SOUNDWIRE is now a required dependency for SND_SOC_QCOM_COMMON (and in turn also SND_SOC_APQ8016_SBC). Trying to migrate such a kernel config silently disables SND_SOC_APQ8016_SBC and breaks audio functionality. The soundwire helpers in common.c are only used by two of the Qualcomm audio machine drivers, so building and requiring CONFIG_SOUNDWIRE for all platforms is unnecessary. There is no need to stuff all common code into a single module. Fix the issue by moving the soundwire helpers to a separate SND_SOC_QCOM_SDW module/option that is selected only by the machine drivers that make use of them. This also allows reverting the imply/depends changes from the previous fix because both SM8250 and SC8280XP already depend on SOUNDWIRE, so the soundwire helpers will be only built if SOUNDWIRE is really enabled. Cc: Srinivas Kandagatla Fixes: 8d89cf6ff229 ("ASoC: qcom: cleanup and fix dependency of QCOM_COMMON") Signed-off-by: Stephan Gerhold Link: https://lore.kernel.org/r/20221231115506.82991-1-stephan@gerhold.net Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/qcom/Kconfig | 21 ++++--- sound/soc/qcom/Makefile | 2 + sound/soc/qcom/common.c | 114 ----------------------------------- sound/soc/qcom/common.h | 10 ---- sound/soc/qcom/sc8280xp.c | 1 + sound/soc/qcom/sdw.c | 123 ++++++++++++++++++++++++++++++++++++++ sound/soc/qcom/sdw.h | 18 ++++++ sound/soc/qcom/sm8250.c | 1 + 8 files changed, 157 insertions(+), 133 deletions(-) create mode 100644 sound/soc/qcom/sdw.c create mode 100644 sound/soc/qcom/sdw.h diff --git a/sound/soc/qcom/Kconfig b/sound/soc/qcom/Kconfig index 96a6d4731e6f..e7b00d1d9e99 100644 --- a/sound/soc/qcom/Kconfig +++ b/sound/soc/qcom/Kconfig @@ -2,7 +2,6 @@ menuconfig SND_SOC_QCOM tristate "ASoC support for QCOM platforms" depends on ARCH_QCOM || COMPILE_TEST - imply SND_SOC_QCOM_COMMON help Say Y or M if you want to add support to use audio devices in Qualcomm Technologies SOC-based platforms. @@ -60,14 +59,16 @@ config SND_SOC_STORM config SND_SOC_APQ8016_SBC tristate "SoC Audio support for APQ8016 SBC platforms" select SND_SOC_LPASS_APQ8016 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON help Support for Qualcomm Technologies LPASS audio block in APQ8016 SOC-based systems. Say Y if you want to use audio devices on MI2S. config SND_SOC_QCOM_COMMON - depends on SOUNDWIRE + tristate + +config SND_SOC_QCOM_SDW tristate config SND_SOC_QDSP6_COMMON @@ -144,7 +145,7 @@ config SND_SOC_MSM8996 depends on QCOM_APR depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON help Support for Qualcomm Technologies LPASS audio block in APQ8096 SoC-based systems. @@ -155,7 +156,7 @@ config SND_SOC_SDM845 depends on QCOM_APR && I2C && SOUNDWIRE depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON select SND_SOC_RT5663 select SND_SOC_MAX98927 imply SND_SOC_CROS_EC_CODEC @@ -169,7 +170,8 @@ config SND_SOC_SM8250 depends on QCOM_APR && SOUNDWIRE depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_SDW help To add support for audio on Qualcomm Technologies Inc. SM8250 SoC-based systems. @@ -180,7 +182,8 @@ config SND_SOC_SC8280XP depends on QCOM_APR && SOUNDWIRE depends on COMMON_CLK select SND_SOC_QDSP6 - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_SDW help To add support for audio on Qualcomm Technologies Inc. SC8280XP SoC-based systems. @@ -190,7 +193,7 @@ config SND_SOC_SC7180 tristate "SoC Machine driver for SC7180 boards" depends on I2C && GPIOLIB depends on SOUNDWIRE || SOUNDWIRE=n - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON select SND_SOC_LPASS_SC7180 select SND_SOC_MAX98357A select SND_SOC_RT5682_I2C @@ -204,7 +207,7 @@ config SND_SOC_SC7180 config SND_SOC_SC7280 tristate "SoC Machine driver for SC7280 boards" depends on I2C && SOUNDWIRE - depends on SND_SOC_QCOM_COMMON + select SND_SOC_QCOM_COMMON select SND_SOC_LPASS_SC7280 select SND_SOC_MAX98357A select SND_SOC_WCD938X_SDW diff --git a/sound/soc/qcom/Makefile b/sound/soc/qcom/Makefile index 8b97172cf990..254350d9dc06 100644 --- a/sound/soc/qcom/Makefile +++ b/sound/soc/qcom/Makefile @@ -28,6 +28,7 @@ snd-soc-sdm845-objs := sdm845.o snd-soc-sm8250-objs := sm8250.o snd-soc-sc8280xp-objs := sc8280xp.o snd-soc-qcom-common-objs := common.o +snd-soc-qcom-sdw-objs := sdw.o obj-$(CONFIG_SND_SOC_STORM) += snd-soc-storm.o obj-$(CONFIG_SND_SOC_APQ8016_SBC) += snd-soc-apq8016-sbc.o @@ -38,6 +39,7 @@ obj-$(CONFIG_SND_SOC_SC8280XP) += snd-soc-sc8280xp.o obj-$(CONFIG_SND_SOC_SDM845) += snd-soc-sdm845.o obj-$(CONFIG_SND_SOC_SM8250) += snd-soc-sm8250.o obj-$(CONFIG_SND_SOC_QCOM_COMMON) += snd-soc-qcom-common.o +obj-$(CONFIG_SND_SOC_QCOM_SDW) += snd-soc-qcom-sdw.o #DSP lib obj-$(CONFIG_SND_SOC_QDSP6) += qdsp6/ diff --git a/sound/soc/qcom/common.c b/sound/soc/qcom/common.c index 49c74c1662a3..96fe80241fb4 100644 --- a/sound/soc/qcom/common.c +++ b/sound/soc/qcom/common.c @@ -180,120 +180,6 @@ err_put_np: } EXPORT_SYMBOL_GPL(qcom_snd_parse_of); -int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *sruntime, - bool *stream_prepared) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - int ret; - - if (!sruntime) - return 0; - - switch (cpu_dai->id) { - case WSA_CODEC_DMA_RX_0: - case WSA_CODEC_DMA_RX_1: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: - break; - default: - return 0; - } - - if (*stream_prepared) { - sdw_disable_stream(sruntime); - sdw_deprepare_stream(sruntime); - *stream_prepared = false; - } - - ret = sdw_prepare_stream(sruntime); - if (ret) - return ret; - - /** - * NOTE: there is a strict hw requirement about the ordering of port - * enables and actual WSA881x PA enable. PA enable should only happen - * after soundwire ports are enabled if not DC on the line is - * accumulated resulting in Click/Pop Noise - * PA enable/mute are handled as part of codec DAPM and digital mute. - */ - - ret = sdw_enable_stream(sruntime); - if (ret) { - sdw_deprepare_stream(sruntime); - return ret; - } - *stream_prepared = true; - - return ret; -} -EXPORT_SYMBOL_GPL(qcom_snd_sdw_prepare); - -int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, - struct sdw_stream_runtime **psruntime) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *codec_dai; - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - struct sdw_stream_runtime *sruntime; - int i; - - switch (cpu_dai->id) { - case WSA_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: - for_each_rtd_codec_dais(rtd, i, codec_dai) { - sruntime = snd_soc_dai_get_stream(codec_dai, substream->stream); - if (sruntime != ERR_PTR(-ENOTSUPP)) - *psruntime = sruntime; - } - break; - } - - return 0; - -} -EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_params); - -int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *sruntime, bool *stream_prepared) -{ - struct snd_soc_pcm_runtime *rtd = substream->private_data; - struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); - - switch (cpu_dai->id) { - case WSA_CODEC_DMA_RX_0: - case WSA_CODEC_DMA_RX_1: - case RX_CODEC_DMA_RX_0: - case RX_CODEC_DMA_RX_1: - case TX_CODEC_DMA_TX_0: - case TX_CODEC_DMA_TX_1: - case TX_CODEC_DMA_TX_2: - case TX_CODEC_DMA_TX_3: - if (sruntime && *stream_prepared) { - sdw_disable_stream(sruntime); - sdw_deprepare_stream(sruntime); - *stream_prepared = false; - } - break; - default: - break; - } - - return 0; -} -EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free); - int qcom_snd_wcd_jack_setup(struct snd_soc_pcm_runtime *rtd, struct snd_soc_jack *jack, bool *jack_setup) { diff --git a/sound/soc/qcom/common.h b/sound/soc/qcom/common.h index 3ef5bb6d12df..d7f80ee5ae26 100644 --- a/sound/soc/qcom/common.h +++ b/sound/soc/qcom/common.h @@ -5,19 +5,9 @@ #define __QCOM_SND_COMMON_H__ #include -#include int qcom_snd_parse_of(struct snd_soc_card *card); int qcom_snd_wcd_jack_setup(struct snd_soc_pcm_runtime *rtd, struct snd_soc_jack *jack, bool *jack_setup); -int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *runtime, - bool *stream_prepared); -int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, - struct snd_pcm_hw_params *params, - struct sdw_stream_runtime **psruntime); -int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, - struct sdw_stream_runtime *sruntime, - bool *stream_prepared); #endif diff --git a/sound/soc/qcom/sc8280xp.c b/sound/soc/qcom/sc8280xp.c index ade44ad7c585..14d9fea33d16 100644 --- a/sound/soc/qcom/sc8280xp.c +++ b/sound/soc/qcom/sc8280xp.c @@ -12,6 +12,7 @@ #include #include "qdsp6/q6afe.h" #include "common.h" +#include "sdw.h" #define DRIVER_NAME "sc8280xp" diff --git a/sound/soc/qcom/sdw.c b/sound/soc/qcom/sdw.c new file mode 100644 index 000000000000..10249519a39e --- /dev/null +++ b/sound/soc/qcom/sdw.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018, Linaro Limited. +// Copyright (c) 2018, The Linux Foundation. All rights reserved. + +#include +#include +#include "qdsp6/q6afe.h" +#include "sdw.h" + +int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *sruntime, + bool *stream_prepared) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + int ret; + + if (!sruntime) + return 0; + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case WSA_CODEC_DMA_RX_1: + case RX_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_1: + case TX_CODEC_DMA_TX_0: + case TX_CODEC_DMA_TX_1: + case TX_CODEC_DMA_TX_2: + case TX_CODEC_DMA_TX_3: + break; + default: + return 0; + } + + if (*stream_prepared) { + sdw_disable_stream(sruntime); + sdw_deprepare_stream(sruntime); + *stream_prepared = false; + } + + ret = sdw_prepare_stream(sruntime); + if (ret) + return ret; + + /** + * NOTE: there is a strict hw requirement about the ordering of port + * enables and actual WSA881x PA enable. PA enable should only happen + * after soundwire ports are enabled if not DC on the line is + * accumulated resulting in Click/Pop Noise + * PA enable/mute are handled as part of codec DAPM and digital mute. + */ + + ret = sdw_enable_stream(sruntime); + if (ret) { + sdw_deprepare_stream(sruntime); + return ret; + } + *stream_prepared = true; + + return ret; +} +EXPORT_SYMBOL_GPL(qcom_snd_sdw_prepare); + +int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct sdw_stream_runtime **psruntime) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *codec_dai; + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + struct sdw_stream_runtime *sruntime; + int i; + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_1: + case TX_CODEC_DMA_TX_0: + case TX_CODEC_DMA_TX_1: + case TX_CODEC_DMA_TX_2: + case TX_CODEC_DMA_TX_3: + for_each_rtd_codec_dais(rtd, i, codec_dai) { + sruntime = snd_soc_dai_get_stream(codec_dai, substream->stream); + if (sruntime != ERR_PTR(-ENOTSUPP)) + *psruntime = sruntime; + } + break; + } + + return 0; + +} +EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_params); + +int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *sruntime, bool *stream_prepared) +{ + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *cpu_dai = asoc_rtd_to_cpu(rtd, 0); + + switch (cpu_dai->id) { + case WSA_CODEC_DMA_RX_0: + case WSA_CODEC_DMA_RX_1: + case RX_CODEC_DMA_RX_0: + case RX_CODEC_DMA_RX_1: + case TX_CODEC_DMA_TX_0: + case TX_CODEC_DMA_TX_1: + case TX_CODEC_DMA_TX_2: + case TX_CODEC_DMA_TX_3: + if (sruntime && *stream_prepared) { + sdw_disable_stream(sruntime); + sdw_deprepare_stream(sruntime); + *stream_prepared = false; + } + break; + default: + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(qcom_snd_sdw_hw_free); +MODULE_LICENSE("GPL v2"); diff --git a/sound/soc/qcom/sdw.h b/sound/soc/qcom/sdw.h new file mode 100644 index 000000000000..d74cbb84da13 --- /dev/null +++ b/sound/soc/qcom/sdw.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2018, The Linux Foundation. All rights reserved. + +#ifndef __QCOM_SND_SDW_H__ +#define __QCOM_SND_SDW_H__ + +#include + +int qcom_snd_sdw_prepare(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *runtime, + bool *stream_prepared); +int qcom_snd_sdw_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct sdw_stream_runtime **psruntime); +int qcom_snd_sdw_hw_free(struct snd_pcm_substream *substream, + struct sdw_stream_runtime *sruntime, + bool *stream_prepared); +#endif diff --git a/sound/soc/qcom/sm8250.c b/sound/soc/qcom/sm8250.c index 8dbe9ef41b1c..9626a9ef78c2 100644 --- a/sound/soc/qcom/sm8250.c +++ b/sound/soc/qcom/sm8250.c @@ -12,6 +12,7 @@ #include #include "qdsp6/q6afe.h" #include "common.h" +#include "sdw.h" #define DRIVER_NAME "sm8250" #define MI2S_BCLK_RATE 1536000 From 91fa2bd352807bc0be4ae707321c7e7e01c7f040 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Jan 2023 08:54:39 +0100 Subject: [PATCH 129/177] tools/nolibc: restore mips branch ordering in the _start block [ Upstream commit 184177c3d6e023da934761e198c281344d7dd65b ] Depending on the compiler used and the optimization options, the sbrk() test was crashing, both on real hardware (mips-24kc) and in qemu. One such example is kernel.org toolchain in version 11.3 optimizing at -Os. Inspecting the sys_brk() call shows the following code: 0040047c : 40047c: 24020fcd li v0,4045 400480: 27bdffe0 addiu sp,sp,-32 400484: 0000000c syscall 400488: 27bd0020 addiu sp,sp,32 40048c: 10e00001 beqz a3,400494 400490: 00021023 negu v0,v0 400494: 03e00008 jr ra It is obviously wrong, the "negu" instruction is placed in beqz's delayed slot, and worse, there's no nop nor instruction after the return, so the next function's first instruction (addiu sip,sip,-32) will also be executed as part of the delayed slot that follows the return. This is caused by the ".set noreorder" directive in the _start block, that applies to the whole program. The compiler emits code without the delayed slots and relies on the compiler to swap instructions when this option is not set. Removing the option would require to change the startup code in a way that wouldn't make it look like the resulting code, which would not be easy to debug. Instead let's just save the default ordering before changing it, and restore it at the end of the _start block. Now the code is correct: 0040047c : 40047c: 24020fcd li v0,4045 400480: 27bdffe0 addiu sp,sp,-32 400484: 0000000c syscall 400488: 10e00002 beqz a3,400494 40048c: 27bd0020 addiu sp,sp,32 400490: 00021023 negu v0,v0 400494: 03e00008 jr ra 400498: 00000000 nop Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc") #5.0 Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- tools/include/nolibc/arch-mips.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 5fc5b8029bff..7380093ba9e7 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -192,6 +192,7 @@ struct sys_stat_struct { __asm__ (".section .text\n" ".weak __start\n" ".set nomips16\n" + ".set push\n" ".set noreorder\n" ".option pic0\n" ".ent __start\n" @@ -210,6 +211,7 @@ __asm__ (".section .text\n" "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" + ".set pop\n" ""); #endif // _NOLIBC_ARCH_MIPS_H From cd57d5e5e28e8b51fea428ebc7780aecb46abd02 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 9 Jan 2023 08:54:42 +0100 Subject: [PATCH 130/177] tools/nolibc: fix the O_* fcntl/open macro definitions for riscv [ Upstream commit 00b18da4089330196906b9fe075c581c17eb726c ] When RISCV port was imported in 5.2, the O_* macros were taken with their octal value and written as-is in hex, resulting in the getdents64() to fail in nolibc-test. Fixes: 582e84f7b779 ("tool headers nolibc: add RISCV support") #5.2 Signed-off-by: Willy Tarreau Signed-off-by: Paul E. McKenney Signed-off-by: Sasha Levin --- tools/include/nolibc/arch-riscv.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index ba04771cb3a3..a3bdd9803f8c 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -11,13 +11,13 @@ #define O_RDONLY 0 #define O_WRONLY 1 #define O_RDWR 2 -#define O_CREAT 0x100 -#define O_EXCL 0x200 -#define O_NOCTTY 0x400 -#define O_TRUNC 0x1000 -#define O_APPEND 0x2000 -#define O_NONBLOCK 0x4000 -#define O_DIRECTORY 0x200000 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 struct sys_stat_struct { unsigned long st_dev; /* Device. */ From f2faf0699af78968a27ca154bf76e94247f8c471 Mon Sep 17 00:00:00 2001 From: Luben Tuikov Date: Wed, 4 Jan 2023 17:09:02 -0500 Subject: [PATCH 131/177] drm/amdgpu: Fix potential NULL dereference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 0be7ed8e7eb15282b5d0f6fdfea884db594ea9bf ] Fix potential NULL dereference, in the case when "man", the resource manager might be NULL, when/if we print debug information. Cc: Alex Deucher Cc: Christian König Cc: AMD Graphics Cc: Dan Carpenter Cc: kernel test robot Fixes: 7554886daa31ea ("drm/amdgpu: Fix size validation for non-exclusive domains (v4)") Signed-off-by: Luben Tuikov Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 3be3cba3a16d..cfd78c4a45ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -468,8 +468,9 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, return true; fail: - DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size); + if (man) + DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, + man->size); return false; } From 500ca1da9d0876244eb4d1b0ece6fa0e9968d45d Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Wed, 7 Dec 2022 08:55:02 +0000 Subject: [PATCH 132/177] ice: Fix potential memory leak in ice_gnss_tty_write() [ Upstream commit f58985620f55580a07d40062c4115d8c9cf6ae27 ] The ice_gnss_tty_write() return directly if the write_buf alloc failed, leaking the cmd_buf. Fix by free cmd_buf if write_buf alloc failed. Fixes: d6b98c8d242a ("ice: add write functionality for GNSS TTY") Signed-off-by: Yuan Can Reviewed-by: Leon Romanovsky Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_gnss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index b5a7f246d230..a1915551c69a 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -363,6 +363,7 @@ ice_gnss_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) /* Send the data out to a hardware port */ write_buf = kzalloc(sizeof(*write_buf), GFP_KERNEL); if (!write_buf) { + kfree(cmd_buf); err = -ENOMEM; goto exit; } From 96a9873188552ebb2afe76033d7329a5ecabef6e Mon Sep 17 00:00:00 2001 From: Jiasheng Jiang Date: Thu, 8 Dec 2022 21:35:52 +0800 Subject: [PATCH 133/177] ice: Add check for kzalloc [ Upstream commit 40543b3d9d2c13227ecd3aa90a713c201d1d7f09 ] Add the check for the return value of kzalloc in order to avoid NULL pointer dereference. Moreover, use the goto-label to share the clean code. Fixes: d6b98c8d242a ("ice: add write functionality for GNSS TTY") Signed-off-by: Jiasheng Jiang Reviewed-by: Jiri Pirko Tested-by: Gurucharan G (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_gnss.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_gnss.c b/drivers/net/ethernet/intel/ice/ice_gnss.c index a1915551c69a..43e199b5b513 100644 --- a/drivers/net/ethernet/intel/ice/ice_gnss.c +++ b/drivers/net/ethernet/intel/ice/ice_gnss.c @@ -461,6 +461,9 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) { pf->gnss_tty_port[i] = kzalloc(sizeof(*pf->gnss_tty_port[i]), GFP_KERNEL); + if (!pf->gnss_tty_port[i]) + goto err_out; + pf->gnss_serial[i] = NULL; tty_port_init(pf->gnss_tty_port[i]); @@ -470,21 +473,23 @@ static struct tty_driver *ice_gnss_create_tty_driver(struct ice_pf *pf) err = tty_register_driver(tty_driver); if (err) { dev_err(dev, "Failed to register TTY driver err=%d\n", err); - - for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) { - tty_port_destroy(pf->gnss_tty_port[i]); - kfree(pf->gnss_tty_port[i]); - } - kfree(ttydrv_name); - tty_driver_kref_put(pf->ice_gnss_tty_driver); - - return NULL; + goto err_out; } for (i = 0; i < ICE_GNSS_TTY_MINOR_DEVICES; i++) dev_info(dev, "%s%d registered\n", ttydrv_name, i); return tty_driver; + +err_out: + while (i--) { + tty_port_destroy(pf->gnss_tty_port[i]); + kfree(pf->gnss_tty_port[i]); + } + kfree(ttydrv_name); + tty_driver_kref_put(pf->ice_gnss_tty_driver); + + return NULL; } /** From 2f3313c5554469374c2aee9904e803703c3d5fe5 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 22 Oct 2022 00:02:20 -0400 Subject: [PATCH 134/177] drm/vmwgfx: Write the driver id registers [ Upstream commit 7f4c33778686cc2d34cb4ef65b4265eea874c159 ] Driver id registers are a new mechanism in the svga device to hint to the device which driver is running. This should not change device behavior in any way, but might be convenient to work-around specific bugs in guest drivers. Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-2-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 43 +++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index d7bd5eb1d3ac..45028e25d490 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -25,10 +25,13 @@ * **************************************************************************/ -#include -#include -#include -#include + +#include "vmwgfx_drv.h" + +#include "vmwgfx_devcaps.h" +#include "vmwgfx_mksstat.h" +#include "vmwgfx_binding.h" +#include "ttm_object.h" #include #include @@ -41,11 +44,11 @@ #include #include -#include "ttm_object.h" -#include "vmwgfx_binding.h" -#include "vmwgfx_devcaps.h" -#include "vmwgfx_drv.h" -#include "vmwgfx_mksstat.h" +#include +#include +#include +#include +#include #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices" @@ -806,6 +809,27 @@ static int vmw_detect_version(struct vmw_private *dev) return 0; } +static void vmw_write_driver_id(struct vmw_private *dev) +{ + if ((dev->capabilities2 & SVGA_CAP2_DX2) != 0) { + vmw_write(dev, SVGA_REG_GUEST_DRIVER_ID, + SVGA_REG_GUEST_DRIVER_ID_LINUX); + + vmw_write(dev, SVGA_REG_GUEST_DRIVER_VERSION1, + LINUX_VERSION_MAJOR << 24 | + LINUX_VERSION_PATCHLEVEL << 16 | + LINUX_VERSION_SUBLEVEL); + vmw_write(dev, SVGA_REG_GUEST_DRIVER_VERSION2, + VMWGFX_DRIVER_MAJOR << 24 | + VMWGFX_DRIVER_MINOR << 16 | + VMWGFX_DRIVER_PATCHLEVEL); + vmw_write(dev, SVGA_REG_GUEST_DRIVER_VERSION3, 0); + + vmw_write(dev, SVGA_REG_GUEST_DRIVER_ID, + SVGA_REG_GUEST_DRIVER_ID_SUBMIT); + } +} + static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) { int ret; @@ -1091,6 +1115,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) vmw_host_printf("vmwgfx: Module Version: %d.%d.%d (kernel: %s)", VMWGFX_DRIVER_MAJOR, VMWGFX_DRIVER_MINOR, VMWGFX_DRIVER_PATCHLEVEL, UTS_RELEASE); + vmw_write_driver_id(dev_priv); if (dev_priv->enable_fb) { vmw_fifo_resource_inc(dev_priv); From 8557e0e42e02c2066588e626957345530923cedf Mon Sep 17 00:00:00 2001 From: Maaz Mombasawala Date: Sat, 22 Oct 2022 00:02:22 -0400 Subject: [PATCH 135/177] drm/vmwgfx: Refactor resource manager's hashtable to use linux/hashtable implementation. [ Upstream commit 43531dc661b7fb6be249c023bf25847b38215545 ] Vmwgfx's hashtab implementation needs to be replaced with linux/hashtable to reduce maintenance burden. Refactor cmdbuf resource manager to use linux/hashtable.h implementation as part of this effort. Signed-off-by: Maaz Mombasawala Reviewed-by: Zack Rusin Reviewed-by: Martin Krastev Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-4-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c | 62 +++++++++------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 82ef58ccdd42..142aef686fcd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2014-2015 VMware, Inc., Palo Alto, CA., USA + * Copyright 2014-2022 VMware, Inc., Palo Alto, CA., USA * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -28,6 +28,8 @@ #include "vmwgfx_drv.h" #include "vmwgfx_resource_priv.h" +#include + #define VMW_CMDBUF_RES_MAN_HT_ORDER 12 /** @@ -59,7 +61,7 @@ struct vmw_cmdbuf_res { * @resources and @list are protected by the cmdbuf mutex for now. */ struct vmw_cmdbuf_res_manager { - struct vmwgfx_open_hash resources; + DECLARE_HASHTABLE(resources, VMW_CMDBUF_RES_MAN_HT_ORDER); struct list_head list; struct vmw_private *dev_priv; }; @@ -82,14 +84,13 @@ vmw_cmdbuf_res_lookup(struct vmw_cmdbuf_res_manager *man, u32 user_key) { struct vmwgfx_hash_item *hash; - int ret; unsigned long key = user_key | (res_type << 24); - ret = vmwgfx_ht_find_item(&man->resources, key, &hash); - if (unlikely(ret != 0)) - return ERR_PTR(ret); - - return drm_hash_entry(hash, struct vmw_cmdbuf_res, hash)->res; + hash_for_each_possible_rcu(man->resources, hash, head, key) { + if (hash->key == key) + return drm_hash_entry(hash, struct vmw_cmdbuf_res, hash)->res; + } + return ERR_PTR(-EINVAL); } /** @@ -105,7 +106,7 @@ static void vmw_cmdbuf_res_free(struct vmw_cmdbuf_res_manager *man, struct vmw_cmdbuf_res *entry) { list_del(&entry->head); - WARN_ON(vmwgfx_ht_remove_item(&man->resources, &entry->hash)); + hash_del_rcu(&entry->hash.head); vmw_resource_unreference(&entry->res); kfree(entry); } @@ -159,7 +160,6 @@ void vmw_cmdbuf_res_commit(struct list_head *list) void vmw_cmdbuf_res_revert(struct list_head *list) { struct vmw_cmdbuf_res *entry, *next; - int ret; list_for_each_entry_safe(entry, next, list, head) { switch (entry->state) { @@ -167,8 +167,8 @@ void vmw_cmdbuf_res_revert(struct list_head *list) vmw_cmdbuf_res_free(entry->man, entry); break; case VMW_CMDBUF_RES_DEL: - ret = vmwgfx_ht_insert_item(&entry->man->resources, &entry->hash); - BUG_ON(ret); + hash_add_rcu(entry->man->resources, &entry->hash.head, + entry->hash.key); list_move_tail(&entry->head, &entry->man->list); entry->state = VMW_CMDBUF_RES_COMMITTED; break; @@ -199,26 +199,20 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man, struct list_head *list) { struct vmw_cmdbuf_res *cres; - int ret; cres = kzalloc(sizeof(*cres), GFP_KERNEL); if (unlikely(!cres)) return -ENOMEM; cres->hash.key = user_key | (res_type << 24); - ret = vmwgfx_ht_insert_item(&man->resources, &cres->hash); - if (unlikely(ret != 0)) { - kfree(cres); - goto out_invalid_key; - } + hash_add_rcu(man->resources, &cres->hash.head, cres->hash.key); cres->state = VMW_CMDBUF_RES_ADD; cres->res = vmw_resource_reference(res); cres->man = man; list_add_tail(&cres->head, list); -out_invalid_key: - return ret; + return 0; } /** @@ -243,24 +237,26 @@ int vmw_cmdbuf_res_remove(struct vmw_cmdbuf_res_manager *man, struct list_head *list, struct vmw_resource **res_p) { - struct vmw_cmdbuf_res *entry; + struct vmw_cmdbuf_res *entry = NULL; struct vmwgfx_hash_item *hash; - int ret; + unsigned long key = user_key | (res_type << 24); - ret = vmwgfx_ht_find_item(&man->resources, user_key | (res_type << 24), - &hash); - if (likely(ret != 0)) + hash_for_each_possible_rcu(man->resources, hash, head, key) { + if (hash->key == key) { + entry = drm_hash_entry(hash, struct vmw_cmdbuf_res, hash); + break; + } + } + if (unlikely(!entry)) return -EINVAL; - entry = drm_hash_entry(hash, struct vmw_cmdbuf_res, hash); - switch (entry->state) { case VMW_CMDBUF_RES_ADD: vmw_cmdbuf_res_free(man, entry); *res_p = NULL; break; case VMW_CMDBUF_RES_COMMITTED: - (void) vmwgfx_ht_remove_item(&man->resources, &entry->hash); + hash_del_rcu(&entry->hash.head); list_del(&entry->head); entry->state = VMW_CMDBUF_RES_DEL; list_add_tail(&entry->head, list); @@ -287,7 +283,6 @@ struct vmw_cmdbuf_res_manager * vmw_cmdbuf_res_man_create(struct vmw_private *dev_priv) { struct vmw_cmdbuf_res_manager *man; - int ret; man = kzalloc(sizeof(*man), GFP_KERNEL); if (!man) @@ -295,12 +290,8 @@ vmw_cmdbuf_res_man_create(struct vmw_private *dev_priv) man->dev_priv = dev_priv; INIT_LIST_HEAD(&man->list); - ret = vmwgfx_ht_create(&man->resources, VMW_CMDBUF_RES_MAN_HT_ORDER); - if (ret == 0) - return man; - - kfree(man); - return ERR_PTR(ret); + hash_init(man->resources); + return man; } /** @@ -320,7 +311,6 @@ void vmw_cmdbuf_res_man_destroy(struct vmw_cmdbuf_res_manager *man) list_for_each_entry_safe(entry, next, &man->list, head) vmw_cmdbuf_res_free(man, entry); - vmwgfx_ht_remove(&man->resources); kfree(man); } From c00e42f1c9f1a8a38a4a909378fc1479457e9f66 Mon Sep 17 00:00:00 2001 From: Maaz Mombasawala Date: Sat, 22 Oct 2022 00:02:23 -0400 Subject: [PATCH 136/177] drm/vmwgfx: Remove ttm object hashtable [ Upstream commit 931e09d8d5b4aa19bdae0234f2727049f1cd13d9 ] The object_hash hashtable for ttm objects is not being used. Remove it and perform refactoring in ttm_object init function. Signed-off-by: Maaz Mombasawala Reviewed-by: Zack Rusin Reviewed-by: Martin Krastev Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-5-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/ttm_object.c | 24 ++++++------------------ drivers/gpu/drm/vmwgfx/ttm_object.h | 6 ++---- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- 3 files changed, 9 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 26a55fef1ab5..9546b121bc22 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright (c) 2009-2013 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2022 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -44,13 +44,14 @@ #define pr_fmt(fmt) "[TTM] " fmt +#include "ttm_object.h" +#include "vmwgfx_drv.h" + #include #include #include #include #include -#include "ttm_object.h" -#include "vmwgfx_drv.h" MODULE_IMPORT_NS(DMA_BUF); @@ -81,9 +82,7 @@ struct ttm_object_file { /* * struct ttm_object_device * - * @object_lock: lock that protects the object_hash hash table. - * - * @object_hash: hash table for fast lookup of object global names. + * @object_lock: lock that protects idr. * * @object_count: Per device object count. * @@ -92,7 +91,6 @@ struct ttm_object_file { struct ttm_object_device { spinlock_t object_lock; - struct vmwgfx_open_hash object_hash; atomic_t object_count; struct dma_buf_ops ops; void (*dmabuf_release)(struct dma_buf *dma_buf); @@ -449,20 +447,15 @@ out_err: } struct ttm_object_device * -ttm_object_device_init(unsigned int hash_order, - const struct dma_buf_ops *ops) +ttm_object_device_init(const struct dma_buf_ops *ops) { struct ttm_object_device *tdev = kmalloc(sizeof(*tdev), GFP_KERNEL); - int ret; if (unlikely(tdev == NULL)) return NULL; spin_lock_init(&tdev->object_lock); atomic_set(&tdev->object_count, 0); - ret = vmwgfx_ht_create(&tdev->object_hash, hash_order); - if (ret != 0) - goto out_no_object_hash; /* * Our base is at VMWGFX_NUM_MOB + 1 because we want to create @@ -477,10 +470,6 @@ ttm_object_device_init(unsigned int hash_order, tdev->dmabuf_release = tdev->ops.release; tdev->ops.release = ttm_prime_dmabuf_release; return tdev; - -out_no_object_hash: - kfree(tdev); - return NULL; } void ttm_object_device_release(struct ttm_object_device **p_tdev) @@ -491,7 +480,6 @@ void ttm_object_device_release(struct ttm_object_device **p_tdev) WARN_ON_ONCE(!idr_is_empty(&tdev->idr)); idr_destroy(&tdev->idr); - vmwgfx_ht_remove(&tdev->object_hash); kfree(tdev); } diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index 1a2fa0f83f5f..6870f951b677 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2006-2022 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -262,7 +262,6 @@ extern void ttm_object_file_release(struct ttm_object_file **p_tfile); /** * ttm_object device init - initialize a struct ttm_object_device * - * @hash_order: Order of hash table used to hash the base objects. * @ops: DMA buf ops for prime objects of this device. * * This function is typically called on device initialization to prepare @@ -270,8 +269,7 @@ extern void ttm_object_file_release(struct ttm_object_file **p_tfile); */ extern struct ttm_object_device * -ttm_object_device_init(unsigned int hash_order, - const struct dma_buf_ops *ops); +ttm_object_device_init(const struct dma_buf_ops *ops); /** * ttm_object_device_release - release data held by a ttm_object_device diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 45028e25d490..13b90273eb77 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -994,7 +994,7 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) goto out_err0; } - dev_priv->tdev = ttm_object_device_init(12, &vmw_prime_dmabuf_ops); + dev_priv->tdev = ttm_object_device_init(&vmw_prime_dmabuf_ops); if (unlikely(dev_priv->tdev == NULL)) { drm_err(&dev_priv->drm, From e98d62090b6d6326196e444375c1e2104d2e583f Mon Sep 17 00:00:00 2001 From: Maaz Mombasawala Date: Sat, 22 Oct 2022 00:02:24 -0400 Subject: [PATCH 137/177] drm/vmwgfx: Refactor resource validation hashtable to use linux/hashtable implementation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 9e931f2e09701e25744f3d186a4ba13b5342b136 ] Vmwgfx's hashtab implementation needs to be replaced with linux/hashtable to reduce maintenence burden. As part of this effort, refactor the res_ht hashtable used for resource validation during execbuf execution to use linux/hashtable implementation. This also refactors vmw_validation_context to use vmw_sw_context as the container for the hashtable, whereas before it used a vmwgfx_open_hash directly. This makes vmw_validation_context less generic, but there is no functional change since res_ht is the only instance where validation context used a hashtable in vmwgfx driver. Signed-off-by: Maaz Mombasawala Reviewed-by: Thomas Hellström Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-6-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 24 ++++++++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 5 +- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 14 ++---- drivers/gpu/drm/vmwgfx/vmwgfx_validation.c | 55 +++++++++++----------- drivers/gpu/drm/vmwgfx/vmwgfx_validation.h | 26 +++------- 5 files changed, 58 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 13b90273eb77..8d77e79bd904 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -830,6 +830,22 @@ static void vmw_write_driver_id(struct vmw_private *dev) } } +static void vmw_sw_context_init(struct vmw_private *dev_priv) +{ + struct vmw_sw_context *sw_context = &dev_priv->ctx; + + hash_init(sw_context->res_ht); +} + +static void vmw_sw_context_fini(struct vmw_private *dev_priv) +{ + struct vmw_sw_context *sw_context = &dev_priv->ctx; + + vfree(sw_context->cmd_bounce); + if (sw_context->staged_bindings) + vmw_binding_state_free(sw_context->staged_bindings); +} + static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) { int ret; @@ -839,6 +855,8 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) dev_priv->drm.dev_private = dev_priv; + vmw_sw_context_init(dev_priv); + mutex_init(&dev_priv->cmdbuf_mutex); mutex_init(&dev_priv->binding_mutex); spin_lock_init(&dev_priv->resource_lock); @@ -1168,9 +1186,7 @@ static void vmw_driver_unload(struct drm_device *dev) unregister_pm_notifier(&dev_priv->pm_nb); - if (dev_priv->ctx.res_ht_initialized) - vmwgfx_ht_remove(&dev_priv->ctx.res_ht); - vfree(dev_priv->ctx.cmd_bounce); + vmw_sw_context_fini(dev_priv); if (dev_priv->enable_fb) { vmw_fb_off(dev_priv); vmw_fb_close(dev_priv); @@ -1198,8 +1214,6 @@ static void vmw_driver_unload(struct drm_device *dev) vmw_irq_uninstall(&dev_priv->drm); ttm_object_device_release(&dev_priv->tdev); - if (dev_priv->ctx.staged_bindings) - vmw_binding_state_free(dev_priv->ctx.staged_bindings); for (i = vmw_res_context; i < vmw_res_max; ++i) idr_destroy(&dev_priv->res_idr[i]); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 09e2d738aa87..d87aeedb78d0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -93,6 +94,7 @@ #define VMW_RES_STREAM ttm_driver_type2 #define VMW_RES_FENCE ttm_driver_type3 #define VMW_RES_SHADER ttm_driver_type4 +#define VMW_RES_HT_ORDER 12 #define MKSSTAT_CAPACITY_LOG2 5U #define MKSSTAT_CAPACITY (1U << MKSSTAT_CAPACITY_LOG2) @@ -425,8 +427,7 @@ struct vmw_ctx_validation_info; * @ctx: The validation context */ struct vmw_sw_context{ - struct vmwgfx_open_hash res_ht; - bool res_ht_initialized; + DECLARE_HASHTABLE(res_ht, VMW_RES_HT_ORDER); bool kernel; struct vmw_fpriv *fp; struct drm_file *filp; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index f085dbd4736d..c943ab801ca7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009 - 2015 VMware, Inc., Palo Alto, CA., USA + * Copyright 2009 - 2022 VMware, Inc., Palo Alto, CA., USA * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -25,6 +25,7 @@ * **************************************************************************/ #include +#include #include "vmwgfx_drv.h" #include "vmwgfx_reg.h" @@ -34,7 +35,6 @@ #include "vmwgfx_binding.h" #include "vmwgfx_mksstat.h" -#define VMW_RES_HT_ORDER 12 /* * Helper macro to get dx_ctx_node if available otherwise print an error @@ -4101,7 +4101,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, int ret; int32_t out_fence_fd = -1; struct sync_file *sync_file = NULL; - DECLARE_VAL_CONTEXT(val_ctx, &sw_context->res_ht, 1); + DECLARE_VAL_CONTEXT(val_ctx, sw_context, 1); if (flags & DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); @@ -4164,14 +4164,6 @@ int vmw_execbuf_process(struct drm_file *file_priv, if (sw_context->staged_bindings) vmw_binding_state_reset(sw_context->staged_bindings); - if (!sw_context->res_ht_initialized) { - ret = vmwgfx_ht_create(&sw_context->res_ht, VMW_RES_HT_ORDER); - if (unlikely(ret != 0)) - goto out_unlock; - - sw_context->res_ht_initialized = true; - } - INIT_LIST_HEAD(&sw_context->staged_cmd_res); sw_context->ctx = &val_ctx; ret = vmw_execbuf_tie_context(dev_priv, sw_context, dx_context_handle); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c index f46891012be3..f5c4a40fb16d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright © 2018 VMware, Inc., Palo Alto, CA., USA + * Copyright © 2018 - 2022 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -180,11 +180,16 @@ vmw_validation_find_bo_dup(struct vmw_validation_context *ctx, if (!ctx->merge_dups) return NULL; - if (ctx->ht) { + if (ctx->sw_context) { struct vmwgfx_hash_item *hash; + unsigned long key = (unsigned long) vbo; - if (!vmwgfx_ht_find_item(ctx->ht, (unsigned long) vbo, &hash)) - bo_node = container_of(hash, typeof(*bo_node), hash); + hash_for_each_possible_rcu(ctx->sw_context->res_ht, hash, head, key) { + if (hash->key == key) { + bo_node = container_of(hash, typeof(*bo_node), hash); + break; + } + } } else { struct vmw_validation_bo_node *entry; @@ -217,11 +222,16 @@ vmw_validation_find_res_dup(struct vmw_validation_context *ctx, if (!ctx->merge_dups) return NULL; - if (ctx->ht) { + if (ctx->sw_context) { struct vmwgfx_hash_item *hash; + unsigned long key = (unsigned long) res; - if (!vmwgfx_ht_find_item(ctx->ht, (unsigned long) res, &hash)) - res_node = container_of(hash, typeof(*res_node), hash); + hash_for_each_possible_rcu(ctx->sw_context->res_ht, hash, head, key) { + if (hash->key == key) { + res_node = container_of(hash, typeof(*res_node), hash); + break; + } + } } else { struct vmw_validation_res_node *entry; @@ -269,20 +279,15 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx, } } else { struct ttm_validate_buffer *val_buf; - int ret; bo_node = vmw_validation_mem_alloc(ctx, sizeof(*bo_node)); if (!bo_node) return -ENOMEM; - if (ctx->ht) { + if (ctx->sw_context) { bo_node->hash.key = (unsigned long) vbo; - ret = vmwgfx_ht_insert_item(ctx->ht, &bo_node->hash); - if (ret) { - DRM_ERROR("Failed to initialize a buffer " - "validation entry.\n"); - return ret; - } + hash_add_rcu(ctx->sw_context->res_ht, &bo_node->hash.head, + bo_node->hash.key); } val_buf = &bo_node->base; val_buf->bo = ttm_bo_get_unless_zero(&vbo->base); @@ -316,7 +321,6 @@ int vmw_validation_add_resource(struct vmw_validation_context *ctx, bool *first_usage) { struct vmw_validation_res_node *node; - int ret; node = vmw_validation_find_res_dup(ctx, res); if (node) { @@ -330,14 +334,9 @@ int vmw_validation_add_resource(struct vmw_validation_context *ctx, return -ENOMEM; } - if (ctx->ht) { + if (ctx->sw_context) { node->hash.key = (unsigned long) res; - ret = vmwgfx_ht_insert_item(ctx->ht, &node->hash); - if (ret) { - DRM_ERROR("Failed to initialize a resource validation " - "entry.\n"); - return ret; - } + hash_add_rcu(ctx->sw_context->res_ht, &node->hash.head, node->hash.key); } node->res = vmw_resource_reference_unless_doomed(res); if (!node->res) @@ -681,19 +680,19 @@ void vmw_validation_drop_ht(struct vmw_validation_context *ctx) struct vmw_validation_bo_node *entry; struct vmw_validation_res_node *val; - if (!ctx->ht) + if (!ctx->sw_context) return; list_for_each_entry(entry, &ctx->bo_list, base.head) - (void) vmwgfx_ht_remove_item(ctx->ht, &entry->hash); + hash_del_rcu(&entry->hash.head); list_for_each_entry(val, &ctx->resource_list, head) - (void) vmwgfx_ht_remove_item(ctx->ht, &val->hash); + hash_del_rcu(&val->hash.head); list_for_each_entry(val, &ctx->resource_ctx_list, head) - (void) vmwgfx_ht_remove_item(ctx->ht, &val->hash); + hash_del_rcu(&entry->hash.head); - ctx->ht = NULL; + ctx->sw_context = NULL; } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h index f21df053882b..ab9ec226f433 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright © 2018 VMware, Inc., Palo Alto, CA., USA + * Copyright © 2018 - 2022 VMware, Inc., Palo Alto, CA., USA * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -29,12 +29,11 @@ #define _VMWGFX_VALIDATION_H_ #include +#include #include #include -#include "vmwgfx_hashtab.h" - #define VMW_RES_DIRTY_NONE 0 #define VMW_RES_DIRTY_SET BIT(0) #define VMW_RES_DIRTY_CLEAR BIT(1) @@ -59,7 +58,7 @@ * @total_mem: Amount of reserved memory. */ struct vmw_validation_context { - struct vmwgfx_open_hash *ht; + struct vmw_sw_context *sw_context; struct list_head resource_list; struct list_head resource_ctx_list; struct list_head bo_list; @@ -82,16 +81,16 @@ struct vmw_fence_obj; /** * DECLARE_VAL_CONTEXT - Declare a validation context with initialization * @_name: The name of the variable - * @_ht: The hash table used to find dups or NULL if none + * @_sw_context: Contains the hash table used to find dups or NULL if none * @_merge_dups: Whether to merge duplicate buffer object- or resource * entries. If set to true, ideally a hash table pointer should be supplied * as well unless the number of resources and buffer objects per validation * is known to be very small */ #endif -#define DECLARE_VAL_CONTEXT(_name, _ht, _merge_dups) \ +#define DECLARE_VAL_CONTEXT(_name, _sw_context, _merge_dups) \ struct vmw_validation_context _name = \ - { .ht = _ht, \ + { .sw_context = _sw_context, \ .resource_list = LIST_HEAD_INIT((_name).resource_list), \ .resource_ctx_list = LIST_HEAD_INIT((_name).resource_ctx_list), \ .bo_list = LIST_HEAD_INIT((_name).bo_list), \ @@ -114,19 +113,6 @@ vmw_validation_has_bos(struct vmw_validation_context *ctx) return !list_empty(&ctx->bo_list); } -/** - * vmw_validation_set_ht - Register a hash table for duplicate finding - * @ctx: The validation context - * @ht: Pointer to a hash table to use for duplicate finding - * This function is intended to be used if the hash table wasn't - * available at validation context declaration time - */ -static inline void vmw_validation_set_ht(struct vmw_validation_context *ctx, - struct vmwgfx_open_hash *ht) -{ - ctx->ht = ht; -} - /** * vmw_validation_bo_reserve - Reserve buffer objects registered with a * validation context From 7f3d691dedfe73741d609707ed417200479f0b0c Mon Sep 17 00:00:00 2001 From: Maaz Mombasawala Date: Sat, 22 Oct 2022 00:02:29 -0400 Subject: [PATCH 138/177] drm/vmwgfx: Refactor ttm reference object hashtable to use linux/hashtable. [ Upstream commit 76a9e07f270cf5fb556ac237dbf11f5dacd61fef ] This is part of an effort to move from the vmwgfx_open_hash hashtable to linux/hashtable implementation. Refactor the ref_hash hashtable, used for fast lookup of reference objects associated with a ttm file. This also exposed a problem related to inconsistently using 32-bit and 64-bit keys with this hashtable. The hash function used changes depending on the size of the type, and results are not consistent across numbers, for example, hash_32(329) = 329, but hash_long(329) = 328. This would cause the lookup to fail for objects already in the hashtable, since keys of different sizes were being passed during adding and lookup. This was not an issue before because vmwgfx_open_hash always used hash_long. Fix this by always using 64-bit keys for this hashtable, which means that hash_long is always used. Signed-off-by: Maaz Mombasawala Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-11-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/ttm_object.c | 91 ++++++++++++++++------------- drivers/gpu/drm/vmwgfx/ttm_object.h | 12 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 2 +- 3 files changed, 56 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 9546b121bc22..c07b81fbc495 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -52,9 +52,12 @@ #include #include #include +#include MODULE_IMPORT_NS(DMA_BUF); +#define VMW_TTM_OBJECT_REF_HT_ORDER 10 + /** * struct ttm_object_file * @@ -75,7 +78,7 @@ struct ttm_object_file { struct ttm_object_device *tdev; spinlock_t lock; struct list_head ref_list; - struct vmwgfx_open_hash ref_hash; + DECLARE_HASHTABLE(ref_hash, VMW_TTM_OBJECT_REF_HT_ORDER); struct kref refcount; }; @@ -136,6 +139,36 @@ ttm_object_file_ref(struct ttm_object_file *tfile) return tfile; } +static int ttm_tfile_find_ref_rcu(struct ttm_object_file *tfile, + uint64_t key, + struct vmwgfx_hash_item **p_hash) +{ + struct vmwgfx_hash_item *hash; + + hash_for_each_possible_rcu(tfile->ref_hash, hash, head, key) { + if (hash->key == key) { + *p_hash = hash; + return 0; + } + } + return -EINVAL; +} + +static int ttm_tfile_find_ref(struct ttm_object_file *tfile, + uint64_t key, + struct vmwgfx_hash_item **p_hash) +{ + struct vmwgfx_hash_item *hash; + + hash_for_each_possible(tfile->ref_hash, hash, head, key) { + if (hash->key == key) { + *p_hash = hash; + return 0; + } + } + return -EINVAL; +} + static void ttm_object_file_destroy(struct kref *kref) { struct ttm_object_file *tfile = @@ -238,14 +271,13 @@ void ttm_base_object_unref(struct ttm_base_object **p_base) * Return: A pointer to the object if successful or NULL otherwise. */ struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint32_t key) +ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key) { struct vmwgfx_hash_item *hash; - struct vmwgfx_open_hash *ht = &tfile->ref_hash; int ret; rcu_read_lock(); - ret = vmwgfx_ht_find_item_rcu(ht, key, &hash); + ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); if (ret) { rcu_read_unlock(); return NULL; @@ -257,15 +289,14 @@ ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint32_t key) EXPORT_SYMBOL(ttm_base_object_noref_lookup); struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, - uint32_t key) + uint64_t key) { struct ttm_base_object *base = NULL; struct vmwgfx_hash_item *hash; - struct vmwgfx_open_hash *ht = &tfile->ref_hash; int ret; rcu_read_lock(); - ret = vmwgfx_ht_find_item_rcu(ht, key, &hash); + ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); if (likely(ret == 0)) { base = drm_hash_entry(hash, struct ttm_ref_object, hash)->obj; @@ -278,7 +309,7 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, } struct ttm_base_object * -ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint32_t key) +ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint64_t key) { struct ttm_base_object *base; @@ -297,7 +328,6 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, bool *existed, bool require_existed) { - struct vmwgfx_open_hash *ht = &tfile->ref_hash; struct ttm_ref_object *ref; struct vmwgfx_hash_item *hash; int ret = -EINVAL; @@ -310,7 +340,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, while (ret == -EINVAL) { rcu_read_lock(); - ret = vmwgfx_ht_find_item_rcu(ht, base->handle, &hash); + ret = ttm_tfile_find_ref_rcu(tfile, base->handle, &hash); if (ret == 0) { ref = drm_hash_entry(hash, struct ttm_ref_object, hash); @@ -335,21 +365,14 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, kref_init(&ref->kref); spin_lock(&tfile->lock); - ret = vmwgfx_ht_insert_item_rcu(ht, &ref->hash); - - if (likely(ret == 0)) { - list_add_tail(&ref->head, &tfile->ref_list); - kref_get(&base->refcount); - spin_unlock(&tfile->lock); - if (existed != NULL) - *existed = false; - break; - } + hash_add_rcu(tfile->ref_hash, &ref->hash.head, ref->hash.key); + ret = 0; + list_add_tail(&ref->head, &tfile->ref_list); + kref_get(&base->refcount); spin_unlock(&tfile->lock); - BUG_ON(ret != -EINVAL); - - kfree(ref); + if (existed != NULL) + *existed = false; } return ret; @@ -361,10 +384,8 @@ ttm_ref_object_release(struct kref *kref) struct ttm_ref_object *ref = container_of(kref, struct ttm_ref_object, kref); struct ttm_object_file *tfile = ref->tfile; - struct vmwgfx_open_hash *ht; - ht = &tfile->ref_hash; - (void)vmwgfx_ht_remove_item_rcu(ht, &ref->hash); + hash_del_rcu(&ref->hash.head); list_del(&ref->head); spin_unlock(&tfile->lock); @@ -376,13 +397,12 @@ ttm_ref_object_release(struct kref *kref) int ttm_ref_object_base_unref(struct ttm_object_file *tfile, unsigned long key) { - struct vmwgfx_open_hash *ht = &tfile->ref_hash; struct ttm_ref_object *ref; struct vmwgfx_hash_item *hash; int ret; spin_lock(&tfile->lock); - ret = vmwgfx_ht_find_item(ht, key, &hash); + ret = ttm_tfile_find_ref(tfile, key, &hash); if (unlikely(ret != 0)) { spin_unlock(&tfile->lock); return -EINVAL; @@ -414,16 +434,13 @@ void ttm_object_file_release(struct ttm_object_file **p_tfile) } spin_unlock(&tfile->lock); - vmwgfx_ht_remove(&tfile->ref_hash); ttm_object_file_unref(&tfile); } -struct ttm_object_file *ttm_object_file_init(struct ttm_object_device *tdev, - unsigned int hash_order) +struct ttm_object_file *ttm_object_file_init(struct ttm_object_device *tdev) { struct ttm_object_file *tfile = kmalloc(sizeof(*tfile), GFP_KERNEL); - int ret; if (unlikely(tfile == NULL)) return NULL; @@ -433,17 +450,9 @@ struct ttm_object_file *ttm_object_file_init(struct ttm_object_device *tdev, kref_init(&tfile->refcount); INIT_LIST_HEAD(&tfile->ref_list); - ret = vmwgfx_ht_create(&tfile->ref_hash, hash_order); - if (ret) - goto out_err; + hash_init(tfile->ref_hash); return tfile; -out_err: - vmwgfx_ht_remove(&tfile->ref_hash); - - kfree(tfile); - - return NULL; } struct ttm_object_device * diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index 6870f951b677..67f30d589e27 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -104,7 +104,7 @@ struct ttm_base_object { struct ttm_object_file *tfile; struct kref refcount; void (*refcount_release) (struct ttm_base_object **base); - u32 handle; + u64 handle; enum ttm_object_type object_type; u32 shareable; }; @@ -164,7 +164,7 @@ extern int ttm_base_object_init(struct ttm_object_file *tfile, */ extern struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file - *tfile, uint32_t key); + *tfile, uint64_t key); /** * ttm_base_object_lookup_for_ref @@ -178,7 +178,7 @@ extern struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file */ extern struct ttm_base_object * -ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint32_t key); +ttm_base_object_lookup_for_ref(struct ttm_object_device *tdev, uint64_t key); /** * ttm_base_object_unref @@ -237,14 +237,12 @@ extern int ttm_ref_object_base_unref(struct ttm_object_file *tfile, * ttm_object_file_init - initialize a struct ttm_object file * * @tdev: A struct ttm_object device this file is initialized on. - * @hash_order: Order of the hash table used to hold the reference objects. * * This is typically called by the file_ops::open function. */ extern struct ttm_object_file *ttm_object_file_init(struct ttm_object_device - *tdev, - unsigned int hash_order); + *tdev); /** * ttm_object_file_release - release data held by a ttm_object_file @@ -312,7 +310,7 @@ extern int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, kfree_rcu(__obj, __prime.base.rhead) struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint32_t key); +ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key); /** * ttm_base_object_noref_release - release a base object pointer looked up diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 8d77e79bd904..b909a3ce9af3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1242,7 +1242,7 @@ static int vmw_driver_open(struct drm_device *dev, struct drm_file *file_priv) if (unlikely(!vmw_fp)) return ret; - vmw_fp->tfile = ttm_object_file_init(dev_priv->tdev, 10); + vmw_fp->tfile = ttm_object_file_init(dev_priv->tdev); if (unlikely(vmw_fp->tfile == NULL)) goto out_no_tfile; From a3be7e2afc4360312c97e573bb6f12f248a8d1b0 Mon Sep 17 00:00:00 2001 From: Maaz Mombasawala Date: Sat, 22 Oct 2022 00:02:30 -0400 Subject: [PATCH 139/177] drm/vmwgfx: Remove vmwgfx_hashtab [ Upstream commit 9da30cdd6a318595199319708c143ae318f804ef ] The vmwgfx driver has migrated from using the hashtable in vmwgfx_hashtab to the linux/hashtable implementation. Remove the vmwgfx_hashtab from the driver. Signed-off-by: Maaz Mombasawala Reviewed-by: Martin Krastev Reviewed-by: Zack Rusin Signed-off-by: Zack Rusin Link: https://patchwork.freedesktop.org/patch/msgid/20221022040236.616490-12-zack@kde.org Stable-dep-of: a309c7194e8a ("drm/vmwgfx: Remove rcu locks from user resources") Signed-off-by: Sasha Levin --- Documentation/gpu/todo.rst | 11 -- drivers/gpu/drm/vmwgfx/Makefile | 2 +- drivers/gpu/drm/vmwgfx/ttm_object.c | 8 +- drivers/gpu/drm/vmwgfx/ttm_object.h | 2 - drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 6 +- drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c | 199 --------------------- drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h | 83 --------- 8 files changed, 12 insertions(+), 303 deletions(-) delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c delete mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 393d218e4a0c..b2c6aaf1edf2 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -651,17 +651,6 @@ See drivers/gpu/drm/amd/display/TODO for tasks. Contact: Harry Wentland, Alex Deucher -vmwgfx: Replace hashtable with Linux' implementation ----------------------------------------------------- - -The vmwgfx driver uses its own hashtable implementation. Replace the -code with Linux' implementation and update the callers. It's mostly a -refactoring task, but the interfaces are different. - -Contact: Zack Rusin, Thomas Zimmermann - -Level: Intermediate - Bootsplash ========== diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile index eee73b9aa404..68e350f410ad 100644 --- a/drivers/gpu/drm/vmwgfx/Makefile +++ b/drivers/gpu/drm/vmwgfx/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_hashtab.o vmwgfx_kms.o vmwgfx_drv.o \ +vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_ttm_buffer.o \ vmwgfx_cmd.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \ vmwgfx_overlay.o vmwgfx_gmrid_manager.o vmwgfx_fence.o \ diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index c07b81fbc495..932b125ebf3d 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -284,7 +284,7 @@ ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key) } __release(RCU); - return drm_hash_entry(hash, struct ttm_ref_object, hash)->obj; + return hlist_entry(hash, struct ttm_ref_object, hash)->obj; } EXPORT_SYMBOL(ttm_base_object_noref_lookup); @@ -299,7 +299,7 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); if (likely(ret == 0)) { - base = drm_hash_entry(hash, struct ttm_ref_object, hash)->obj; + base = hlist_entry(hash, struct ttm_ref_object, hash)->obj; if (!kref_get_unless_zero(&base->refcount)) base = NULL; } @@ -343,7 +343,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile, ret = ttm_tfile_find_ref_rcu(tfile, base->handle, &hash); if (ret == 0) { - ref = drm_hash_entry(hash, struct ttm_ref_object, hash); + ref = hlist_entry(hash, struct ttm_ref_object, hash); if (kref_get_unless_zero(&ref->kref)) { rcu_read_unlock(); break; @@ -407,7 +407,7 @@ int ttm_ref_object_base_unref(struct ttm_object_file *tfile, spin_unlock(&tfile->lock); return -EINVAL; } - ref = drm_hash_entry(hash, struct ttm_ref_object, hash); + ref = hlist_entry(hash, struct ttm_ref_object, hash); kref_put(&ref->kref, ttm_ref_object_release); spin_unlock(&tfile->lock); return 0; diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index 67f30d589e27..f0ebbe340ad6 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -42,8 +42,6 @@ #include #include -#include "vmwgfx_hashtab.h" - /** * enum ttm_object_type * diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 142aef686fcd..47bc0b411055 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -88,7 +88,7 @@ vmw_cmdbuf_res_lookup(struct vmw_cmdbuf_res_manager *man, hash_for_each_possible_rcu(man->resources, hash, head, key) { if (hash->key == key) - return drm_hash_entry(hash, struct vmw_cmdbuf_res, hash)->res; + return hlist_entry(hash, struct vmw_cmdbuf_res, hash)->res; } return ERR_PTR(-EINVAL); } @@ -243,7 +243,7 @@ int vmw_cmdbuf_res_remove(struct vmw_cmdbuf_res_manager *man, hash_for_each_possible_rcu(man->resources, hash, head, key) { if (hash->key == key) { - entry = drm_hash_entry(hash, struct vmw_cmdbuf_res, hash); + entry = hlist_entry(hash, struct vmw_cmdbuf_res, hash); break; } } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d87aeedb78d0..7c45c3de0dcf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -43,7 +43,6 @@ #include "ttm_object.h" #include "vmwgfx_fence.h" -#include "vmwgfx_hashtab.h" #include "vmwgfx_reg.h" #include "vmwgfx_validation.h" @@ -104,6 +103,11 @@ struct vmw_fpriv { bool gb_aware; /* user-space is guest-backed aware */ }; +struct vmwgfx_hash_item { + struct hlist_node head; + unsigned long key; +}; + /** * struct vmw_buffer_object - TTM buffer object with vmwgfx additions * @base: The TTM buffer object diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c b/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c deleted file mode 100644 index 06aebc12774e..000000000000 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.c +++ /dev/null @@ -1,199 +0,0 @@ -/* - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Simple open hash tab implementation. - * - * Authors: - * Thomas Hellström - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "vmwgfx_hashtab.h" - -int vmwgfx_ht_create(struct vmwgfx_open_hash *ht, unsigned int order) -{ - unsigned int size = 1 << order; - - ht->order = order; - ht->table = NULL; - if (size <= PAGE_SIZE / sizeof(*ht->table)) - ht->table = kcalloc(size, sizeof(*ht->table), GFP_KERNEL); - else - ht->table = vzalloc(array_size(size, sizeof(*ht->table))); - if (!ht->table) { - DRM_ERROR("Out of memory for hash table\n"); - return -ENOMEM; - } - return 0; -} - -void vmwgfx_ht_verbose_list(struct vmwgfx_open_hash *ht, unsigned long key) -{ - struct vmwgfx_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - int count = 0; - - hashed_key = hash_long(key, ht->order); - DRM_DEBUG("Key is 0x%08lx, Hashed key is 0x%08x\n", key, hashed_key); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, h_list, head) - DRM_DEBUG("count %d, key: 0x%08lx\n", count++, entry->key); -} - -static struct hlist_node *vmwgfx_ht_find_key(struct vmwgfx_open_hash *ht, unsigned long key) -{ - struct vmwgfx_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry(entry, h_list, head) { - if (entry->key == key) - return &entry->head; - if (entry->key > key) - break; - } - return NULL; -} - -static struct hlist_node *vmwgfx_ht_find_key_rcu(struct vmwgfx_open_hash *ht, unsigned long key) -{ - struct vmwgfx_hash_item *entry; - struct hlist_head *h_list; - unsigned int hashed_key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - hlist_for_each_entry_rcu(entry, h_list, head) { - if (entry->key == key) - return &entry->head; - if (entry->key > key) - break; - } - return NULL; -} - -int vmwgfx_ht_insert_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item) -{ - struct vmwgfx_hash_item *entry; - struct hlist_head *h_list; - struct hlist_node *parent; - unsigned int hashed_key; - unsigned long key = item->key; - - hashed_key = hash_long(key, ht->order); - h_list = &ht->table[hashed_key]; - parent = NULL; - hlist_for_each_entry(entry, h_list, head) { - if (entry->key == key) - return -EINVAL; - if (entry->key > key) - break; - parent = &entry->head; - } - if (parent) - hlist_add_behind_rcu(&item->head, parent); - else - hlist_add_head_rcu(&item->head, h_list); - return 0; -} - -/* - * Just insert an item and return any "bits" bit key that hasn't been - * used before. - */ -int vmwgfx_ht_just_insert_please(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item, - unsigned long seed, int bits, int shift, - unsigned long add) -{ - int ret; - unsigned long mask = (1UL << bits) - 1; - unsigned long first, unshifted_key; - - unshifted_key = hash_long(seed, bits); - first = unshifted_key; - do { - item->key = (unshifted_key << shift) + add; - ret = vmwgfx_ht_insert_item(ht, item); - if (ret) - unshifted_key = (unshifted_key + 1) & mask; - } while (ret && (unshifted_key != first)); - - if (ret) { - DRM_ERROR("Available key bit space exhausted\n"); - return -EINVAL; - } - return 0; -} - -int vmwgfx_ht_find_item(struct vmwgfx_open_hash *ht, unsigned long key, - struct vmwgfx_hash_item **item) -{ - struct hlist_node *list; - - list = vmwgfx_ht_find_key_rcu(ht, key); - if (!list) - return -EINVAL; - - *item = hlist_entry(list, struct vmwgfx_hash_item, head); - return 0; -} - -int vmwgfx_ht_remove_key(struct vmwgfx_open_hash *ht, unsigned long key) -{ - struct hlist_node *list; - - list = vmwgfx_ht_find_key(ht, key); - if (list) { - hlist_del_init_rcu(list); - return 0; - } - return -EINVAL; -} - -int vmwgfx_ht_remove_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item) -{ - hlist_del_init_rcu(&item->head); - return 0; -} - -void vmwgfx_ht_remove(struct vmwgfx_open_hash *ht) -{ - if (ht->table) { - kvfree(ht->table); - ht->table = NULL; - } -} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h b/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h deleted file mode 100644 index a9ce12922e21..000000000000 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_hashtab.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright 2006 Tungsten Graphics, Inc., Bismack, ND. USA. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * Simple open hash tab implementation. - * - * Authors: - * Thomas Hellström - */ - -/* - * TODO: Replace this hashtable with Linux' generic implementation - * from . - */ - -#ifndef VMWGFX_HASHTAB_H -#define VMWGFX_HASHTAB_H - -#include - -#define drm_hash_entry(_ptr, _type, _member) container_of(_ptr, _type, _member) - -struct vmwgfx_hash_item { - struct hlist_node head; - unsigned long key; -}; - -struct vmwgfx_open_hash { - struct hlist_head *table; - u8 order; -}; - -int vmwgfx_ht_create(struct vmwgfx_open_hash *ht, unsigned int order); -int vmwgfx_ht_insert_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item); -int vmwgfx_ht_just_insert_please(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item, - unsigned long seed, int bits, int shift, - unsigned long add); -int vmwgfx_ht_find_item(struct vmwgfx_open_hash *ht, unsigned long key, - struct vmwgfx_hash_item **item); - -void vmwgfx_ht_verbose_list(struct vmwgfx_open_hash *ht, unsigned long key); -int vmwgfx_ht_remove_key(struct vmwgfx_open_hash *ht, unsigned long key); -int vmwgfx_ht_remove_item(struct vmwgfx_open_hash *ht, struct vmwgfx_hash_item *item); -void vmwgfx_ht_remove(struct vmwgfx_open_hash *ht); - -/* - * RCU-safe interface - * - * The user of this API needs to make sure that two or more instances of the - * hash table manipulation functions are never run simultaneously. - * The lookup function vmwgfx_ht_find_item_rcu may, however, run simultaneously - * with any of the manipulation functions as long as it's called from within - * an RCU read-locked section. - */ -#define vmwgfx_ht_insert_item_rcu vmwgfx_ht_insert_item -#define vmwgfx_ht_just_insert_please_rcu vmwgfx_ht_just_insert_please -#define vmwgfx_ht_remove_key_rcu vmwgfx_ht_remove_key -#define vmwgfx_ht_remove_item_rcu vmwgfx_ht_remove_item -#define vmwgfx_ht_find_item_rcu vmwgfx_ht_find_item - -#endif From 7ac9578e45b20e3f3c0c8eb71f5417a499a7226a Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 7 Dec 2022 12:29:07 -0500 Subject: [PATCH 140/177] drm/vmwgfx: Remove rcu locks from user resources [ Upstream commit a309c7194e8a2f8bd4539b9449917913f6c2cd50 ] User resource lookups used rcu to avoid two extra atomics. Unfortunately the rcu paths were buggy and it was easy to make the driver crash by submitting command buffers from two different threads. Because the lookups never show up in performance profiles replace them with a regular spin lock which fixes the races in accesses to those shared resources. Fixes kernel oops'es in IGT's vmwgfx execution_buffer stress test and seen crashes with apps using shared resources. Fixes: e14c02e6b699 ("drm/vmwgfx: Look up objects without taking a reference") Signed-off-by: Zack Rusin Reviewed-by: Martin Krastev Reviewed-by: Maaz Mombasawala Link: https://patchwork.freedesktop.org/patch/msgid/20221207172907.959037-1-zack@kde.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/vmwgfx/ttm_object.c | 41 +---- drivers/gpu/drm/vmwgfx/ttm_object.h | 14 -- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 38 ----- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 18 +-- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 182 +++++++++++------------ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 ---- 6 files changed, 90 insertions(+), 236 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.c b/drivers/gpu/drm/vmwgfx/ttm_object.c index 932b125ebf3d..ddf8373c1d77 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.c +++ b/drivers/gpu/drm/vmwgfx/ttm_object.c @@ -254,40 +254,6 @@ void ttm_base_object_unref(struct ttm_base_object **p_base) kref_put(&base->refcount, ttm_release_base); } -/** - * ttm_base_object_noref_lookup - look up a base object without reference - * @tfile: The struct ttm_object_file the object is registered with. - * @key: The object handle. - * - * This function looks up a ttm base object and returns a pointer to it - * without refcounting the pointer. The returned pointer is only valid - * until ttm_base_object_noref_release() is called, and the object - * pointed to by the returned pointer may be doomed. Any persistent usage - * of the object requires a refcount to be taken using kref_get_unless_zero(). - * Iff this function returns successfully it needs to be paired with - * ttm_base_object_noref_release() and no sleeping- or scheduling functions - * may be called inbetween these function callse. - * - * Return: A pointer to the object if successful or NULL otherwise. - */ -struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key) -{ - struct vmwgfx_hash_item *hash; - int ret; - - rcu_read_lock(); - ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); - if (ret) { - rcu_read_unlock(); - return NULL; - } - - __release(RCU); - return hlist_entry(hash, struct ttm_ref_object, hash)->obj; -} -EXPORT_SYMBOL(ttm_base_object_noref_lookup); - struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, uint64_t key) { @@ -295,15 +261,16 @@ struct ttm_base_object *ttm_base_object_lookup(struct ttm_object_file *tfile, struct vmwgfx_hash_item *hash; int ret; - rcu_read_lock(); - ret = ttm_tfile_find_ref_rcu(tfile, key, &hash); + spin_lock(&tfile->lock); + ret = ttm_tfile_find_ref(tfile, key, &hash); if (likely(ret == 0)) { base = hlist_entry(hash, struct ttm_ref_object, hash)->obj; if (!kref_get_unless_zero(&base->refcount)) base = NULL; } - rcu_read_unlock(); + spin_unlock(&tfile->lock); + return base; } diff --git a/drivers/gpu/drm/vmwgfx/ttm_object.h b/drivers/gpu/drm/vmwgfx/ttm_object.h index f0ebbe340ad6..8098a3846bae 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_object.h +++ b/drivers/gpu/drm/vmwgfx/ttm_object.h @@ -307,18 +307,4 @@ extern int ttm_prime_handle_to_fd(struct ttm_object_file *tfile, #define ttm_prime_object_kfree(__obj, __prime) \ kfree_rcu(__obj, __prime.base.rhead) -struct ttm_base_object * -ttm_base_object_noref_lookup(struct ttm_object_file *tfile, uint64_t key); - -/** - * ttm_base_object_noref_release - release a base object pointer looked up - * without reference - * - * Releases a base object pointer looked up with ttm_base_object_noref_lookup(). - */ -static inline void ttm_base_object_noref_release(void) -{ - __acquire(RCU); - rcu_read_unlock(); -} #endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 822251aaab0a..973a0a52462e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -715,44 +715,6 @@ int vmw_user_bo_lookup(struct drm_file *filp, return 0; } -/** - * vmw_user_bo_noref_lookup - Look up a vmw user buffer object without reference - * @filp: The TTM object file the handle is registered with. - * @handle: The user buffer object handle. - * - * This function looks up a struct vmw_bo and returns a pointer to the - * struct vmw_buffer_object it derives from without refcounting the pointer. - * The returned pointer is only valid until vmw_user_bo_noref_release() is - * called, and the object pointed to by the returned pointer may be doomed. - * Any persistent usage of the object requires a refcount to be taken using - * ttm_bo_reference_unless_doomed(). Iff this function returns successfully it - * needs to be paired with vmw_user_bo_noref_release() and no sleeping- - * or scheduling functions may be called in between these function calls. - * - * Return: A struct vmw_buffer_object pointer if successful or negative - * error pointer on failure. - */ -struct vmw_buffer_object * -vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle) -{ - struct vmw_buffer_object *vmw_bo; - struct ttm_buffer_object *bo; - struct drm_gem_object *gobj = drm_gem_object_lookup(filp, handle); - - if (!gobj) { - DRM_ERROR("Invalid buffer object handle 0x%08lx.\n", - (unsigned long)handle); - return ERR_PTR(-ESRCH); - } - vmw_bo = gem_to_vmw_bo(gobj); - bo = ttm_bo_get_unless_zero(&vmw_bo->base); - vmw_bo = vmw_buffer_object(bo); - drm_gem_object_put(gobj); - - return vmw_bo; -} - - /** * vmw_bo_fence_single - Utility function to fence a single TTM buffer * object without unreserving it. diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 7c45c3de0dcf..0bc1ebc43002 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -826,12 +826,7 @@ extern int vmw_user_resource_lookup_handle( uint32_t handle, const struct vmw_user_resource_conv *converter, struct vmw_resource **p_res); -extern struct vmw_resource * -vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, - uint32_t handle, - const struct vmw_user_resource_conv * - converter); + extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data, @@ -870,15 +865,6 @@ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) return !RB_EMPTY_NODE(&res->mob_node); } -/** - * vmw_user_resource_noref_release - release a user resource pointer looked up - * without reference - */ -static inline void vmw_user_resource_noref_release(void) -{ - ttm_base_object_noref_release(); -} - /** * Buffer object helper functions - vmwgfx_bo.c */ @@ -930,8 +916,6 @@ extern void vmw_bo_unmap(struct vmw_buffer_object *vbo); extern void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); extern void vmw_bo_swap_notify(struct ttm_buffer_object *bo); -extern struct vmw_buffer_object * -vmw_user_bo_noref_lookup(struct drm_file *filp, u32 handle); /** * vmw_bo_adjust_prio - Adjust the buffer object eviction priority diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index c943ab801ca7..70cfed4fdba0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -290,20 +290,26 @@ static void vmw_execbuf_rcache_update(struct vmw_res_cache_entry *rcache, rcache->valid_handle = 0; } +enum vmw_val_add_flags { + vmw_val_add_flag_none = 0, + vmw_val_add_flag_noctx = 1 << 0, +}; + /** - * vmw_execbuf_res_noref_val_add - Add a resource described by an unreferenced - * rcu-protected pointer to the validation list. + * vmw_execbuf_res_val_add - Add a resource to the validation list. * * @sw_context: Pointer to the software context. * @res: Unreferenced rcu-protected pointer to the resource. * @dirty: Whether to change dirty status. + * @flags: specifies whether to use the context or not * * Returns: 0 on success. Negative error code on failure. Typical error codes * are %-EINVAL on inconsistency and %-ESRCH if the resource was doomed. */ -static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, - struct vmw_resource *res, - u32 dirty) +static int vmw_execbuf_res_val_add(struct vmw_sw_context *sw_context, + struct vmw_resource *res, + u32 dirty, + u32 flags) { struct vmw_private *dev_priv = res->dev_priv; int ret; @@ -318,24 +324,30 @@ static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, if (dirty) vmw_validation_res_set_dirty(sw_context->ctx, rcache->private, dirty); - vmw_user_resource_noref_release(); return 0; } - priv_size = vmw_execbuf_res_size(dev_priv, res_type); - ret = vmw_validation_add_resource(sw_context->ctx, res, priv_size, - dirty, (void **)&ctx_info, - &first_usage); - vmw_user_resource_noref_release(); - if (ret) - return ret; - - if (priv_size && first_usage) { - ret = vmw_cmd_ctx_first_setup(dev_priv, sw_context, res, - ctx_info); - if (ret) { - VMW_DEBUG_USER("Failed first usage context setup.\n"); + if ((flags & vmw_val_add_flag_noctx) != 0) { + ret = vmw_validation_add_resource(sw_context->ctx, res, 0, dirty, + (void **)&ctx_info, NULL); + if (ret) return ret; + + } else { + priv_size = vmw_execbuf_res_size(dev_priv, res_type); + ret = vmw_validation_add_resource(sw_context->ctx, res, priv_size, + dirty, (void **)&ctx_info, + &first_usage); + if (ret) + return ret; + + if (priv_size && first_usage) { + ret = vmw_cmd_ctx_first_setup(dev_priv, sw_context, res, + ctx_info); + if (ret) { + VMW_DEBUG_USER("Failed first usage context setup.\n"); + return ret; + } } } @@ -343,43 +355,6 @@ static int vmw_execbuf_res_noref_val_add(struct vmw_sw_context *sw_context, return 0; } -/** - * vmw_execbuf_res_noctx_val_add - Add a non-context resource to the resource - * validation list if it's not already on it - * - * @sw_context: Pointer to the software context. - * @res: Pointer to the resource. - * @dirty: Whether to change dirty status. - * - * Returns: Zero on success. Negative error code on failure. - */ -static int vmw_execbuf_res_noctx_val_add(struct vmw_sw_context *sw_context, - struct vmw_resource *res, - u32 dirty) -{ - struct vmw_res_cache_entry *rcache; - enum vmw_res_type res_type = vmw_res_type(res); - void *ptr; - int ret; - - rcache = &sw_context->res_cache[res_type]; - if (likely(rcache->valid && rcache->res == res)) { - if (dirty) - vmw_validation_res_set_dirty(sw_context->ctx, - rcache->private, dirty); - return 0; - } - - ret = vmw_validation_add_resource(sw_context->ctx, res, 0, dirty, - &ptr, NULL); - if (ret) - return ret; - - vmw_execbuf_rcache_update(rcache, res, ptr); - - return 0; -} - /** * vmw_view_res_val_add - Add a view and the surface it's pointing to to the * validation list @@ -398,13 +373,13 @@ static int vmw_view_res_val_add(struct vmw_sw_context *sw_context, * First add the resource the view is pointing to, otherwise it may be * swapped out when the view is validated. */ - ret = vmw_execbuf_res_noctx_val_add(sw_context, vmw_view_srf(view), - vmw_view_dirtying(view)); + ret = vmw_execbuf_res_val_add(sw_context, vmw_view_srf(view), + vmw_view_dirtying(view), vmw_val_add_flag_noctx); if (ret) return ret; - return vmw_execbuf_res_noctx_val_add(sw_context, view, - VMW_RES_DIRTY_NONE); + return vmw_execbuf_res_val_add(sw_context, view, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); } /** @@ -475,8 +450,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, if (IS_ERR(res)) continue; - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_SET); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_SET, + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) return ret; } @@ -490,9 +466,9 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, if (vmw_res_type(entry->res) == vmw_res_view) ret = vmw_view_res_val_add(sw_context, entry->res); else - ret = vmw_execbuf_res_noctx_val_add - (sw_context, entry->res, - vmw_binding_dirtying(entry->bt)); + ret = vmw_execbuf_res_val_add(sw_context, entry->res, + vmw_binding_dirtying(entry->bt), + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) break; } @@ -658,7 +634,8 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, { struct vmw_res_cache_entry *rcache = &sw_context->res_cache[res_type]; struct vmw_resource *res; - int ret; + int ret = 0; + bool needs_unref = false; if (p_res) *p_res = NULL; @@ -683,17 +660,18 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, if (ret) return ret; - res = vmw_user_resource_noref_lookup_handle - (dev_priv, sw_context->fp->tfile, *id_loc, converter); - if (IS_ERR(res)) { + ret = vmw_user_resource_lookup_handle + (dev_priv, sw_context->fp->tfile, *id_loc, converter, &res); + if (ret != 0) { VMW_DEBUG_USER("Could not find/use resource 0x%08x.\n", (unsigned int) *id_loc); - return PTR_ERR(res); - } - - ret = vmw_execbuf_res_noref_val_add(sw_context, res, dirty); - if (unlikely(ret != 0)) return ret; + } + needs_unref = true; + + ret = vmw_execbuf_res_val_add(sw_context, res, dirty, vmw_val_add_flag_none); + if (unlikely(ret != 0)) + goto res_check_done; if (rcache->valid && rcache->res == res) { rcache->valid_handle = true; @@ -708,7 +686,11 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, if (p_res) *p_res = res; - return 0; +res_check_done: + if (needs_unref) + vmw_resource_unreference(&res); + + return ret; } /** @@ -1171,9 +1153,9 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, int ret; vmw_validation_preload_bo(sw_context->ctx); - vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle); - if (IS_ERR(vmw_bo)) { - VMW_DEBUG_USER("Could not find or use MOB buffer.\n"); + ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); + if (ret != 0) { + drm_dbg(&dev_priv->drm, "Could not find or use MOB buffer.\n"); return PTR_ERR(vmw_bo); } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, true, false); @@ -1225,9 +1207,9 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, int ret; vmw_validation_preload_bo(sw_context->ctx); - vmw_bo = vmw_user_bo_noref_lookup(sw_context->filp, handle); - if (IS_ERR(vmw_bo)) { - VMW_DEBUG_USER("Could not find or use GMR region.\n"); + ret = vmw_user_bo_lookup(sw_context->filp, handle, &vmw_bo); + if (ret != 0) { + drm_dbg(&dev_priv->drm, "Could not find or use GMR region.\n"); return PTR_ERR(vmw_bo); } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, false, false); @@ -2025,8 +2007,9 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv, res = vmw_shader_lookup(vmw_context_res_man(ctx), cmd->body.shid, cmd->body.type); if (!IS_ERR(res)) { - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (unlikely(ret != 0)) return ret; @@ -2273,8 +2256,9 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv, return PTR_ERR(res); } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, + VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) return ret; } @@ -2777,8 +2761,8 @@ static int vmw_cmd_dx_bind_shader(struct vmw_private *dev_priv, return PTR_ERR(res); } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { VMW_DEBUG_USER("Error creating resource validation node.\n"); return ret; @@ -3098,8 +3082,8 @@ static int vmw_cmd_dx_bind_streamoutput(struct vmw_private *dev_priv, vmw_dx_streamoutput_set_size(res, cmd->body.sizeInBytes); - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { DRM_ERROR("Error creating resource validation node.\n"); return ret; @@ -3148,8 +3132,8 @@ static int vmw_cmd_dx_set_streamoutput(struct vmw_private *dev_priv, return 0; } - ret = vmw_execbuf_res_noctx_val_add(sw_context, res, - VMW_RES_DIRTY_NONE); + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_NONE, + vmw_val_add_flag_noctx); if (ret) { DRM_ERROR("Error creating resource validation node.\n"); return ret; @@ -4067,22 +4051,26 @@ static int vmw_execbuf_tie_context(struct vmw_private *dev_priv, if (ret) return ret; - res = vmw_user_resource_noref_lookup_handle + ret = vmw_user_resource_lookup_handle (dev_priv, sw_context->fp->tfile, handle, - user_context_converter); - if (IS_ERR(res)) { + user_context_converter, &res); + if (ret != 0) { VMW_DEBUG_USER("Could not find or user DX context 0x%08x.\n", (unsigned int) handle); - return PTR_ERR(res); + return ret; } - ret = vmw_execbuf_res_noref_val_add(sw_context, res, VMW_RES_DIRTY_SET); - if (unlikely(ret != 0)) + ret = vmw_execbuf_res_val_add(sw_context, res, VMW_RES_DIRTY_SET, + vmw_val_add_flag_none); + if (unlikely(ret != 0)) { + vmw_resource_unreference(&res); return ret; + } sw_context->dx_ctx_node = vmw_execbuf_info_from_res(sw_context, res); sw_context->man = vmw_context_res_man(res); + vmw_resource_unreference(&res); return 0; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index f66caa540e14..c7d645e5ec7b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -281,39 +281,6 @@ out_bad_resource: return ret; } -/** - * vmw_user_resource_noref_lookup_handle - lookup a struct resource from a - * TTM user-space handle and perform basic type checks - * - * @dev_priv: Pointer to a device private struct - * @tfile: Pointer to a struct ttm_object_file identifying the caller - * @handle: The TTM user-space handle - * @converter: Pointer to an object describing the resource type - * - * If the handle can't be found or is associated with an incorrect resource - * type, -EINVAL will be returned. - */ -struct vmw_resource * -vmw_user_resource_noref_lookup_handle(struct vmw_private *dev_priv, - struct ttm_object_file *tfile, - uint32_t handle, - const struct vmw_user_resource_conv - *converter) -{ - struct ttm_base_object *base; - - base = ttm_base_object_noref_lookup(tfile, handle); - if (!base) - return ERR_PTR(-ESRCH); - - if (unlikely(ttm_base_object_type(base) != converter->object_type)) { - ttm_base_object_noref_release(); - return ERR_PTR(-EINVAL); - } - - return converter->base_obj_to_res(base); -} - /* * Helper function that looks either a surface or bo. * From 8a97b544b98e44f596219ebb290fd2ba2fd5d644 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sat, 7 Jan 2023 19:10:04 +0200 Subject: [PATCH 141/177] net/sched: act_mpls: Fix warning during failed attribute validation [ Upstream commit 9e17f99220d111ea031b44153fdfe364b0024ff2 ] The 'TCA_MPLS_LABEL' attribute is of 'NLA_U32' type, but has a validation type of 'NLA_VALIDATE_FUNCTION'. This is an invalid combination according to the comment above 'struct nla_policy': " Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN: NLA_BINARY Validation function called for the attribute. All other Unused - but note that it's a union " This can trigger the warning [1] in nla_get_range_unsigned() when validation of the attribute fails. Despite being of 'NLA_U32' type, the associated 'min'/'max' fields in the policy are negative as they are aliased by the 'validate' field. Fix by changing the attribute type to 'NLA_BINARY' which is consistent with the above comment and all other users of NLA_POLICY_VALIDATE_FN(). As a result, move the length validation to the validation function. No regressions in MPLS tests: # ./tdc.py -f tc-tests/actions/mpls.json [...] # echo $? 0 [1] WARNING: CPU: 0 PID: 17743 at lib/nlattr.c:118 nla_get_range_unsigned+0x1d8/0x1e0 lib/nlattr.c:117 Modules linked in: CPU: 0 PID: 17743 Comm: syz-executor.0 Not tainted 6.1.0-rc8 #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-48-gd9c812dda519-prebuilt.qemu.org 04/01/2014 RIP: 0010:nla_get_range_unsigned+0x1d8/0x1e0 lib/nlattr.c:117 [...] Call Trace: __netlink_policy_dump_write_attr+0x23d/0x990 net/netlink/policy.c:310 netlink_policy_dump_write_attr+0x22/0x30 net/netlink/policy.c:411 netlink_ack_tlv_fill net/netlink/af_netlink.c:2454 [inline] netlink_ack+0x546/0x760 net/netlink/af_netlink.c:2506 netlink_rcv_skb+0x1b7/0x240 net/netlink/af_netlink.c:2546 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:6109 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0x5e9/0x6b0 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x739/0x860 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] ____sys_sendmsg+0x38f/0x500 net/socket.c:2482 ___sys_sendmsg net/socket.c:2536 [inline] __sys_sendmsg+0x197/0x230 net/socket.c:2565 __do_sys_sendmsg net/socket.c:2574 [inline] __se_sys_sendmsg net/socket.c:2572 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2572 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Link: https://lore.kernel.org/netdev/CAO4mrfdmjvRUNbDyP0R03_DrD_eFCLCguz6OxZ2TYRSv0K9gxA@mail.gmail.com/ Fixes: 2a2ea50870ba ("net: sched: add mpls manipulation actions to TC") Reported-by: Wei Chen Tested-by: Wei Chen Signed-off-by: Ido Schimmel Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230107171004.608436-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/sched/act_mpls.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 8ad25cc8ccd5..ea5959094adb 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -132,6 +132,11 @@ static int valid_label(const struct nlattr *attr, { const u32 *label = nla_data(attr); + if (nla_len(attr) != sizeof(*label)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length"); + return -EINVAL; + } + if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) { NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range"); return -EINVAL; @@ -143,7 +148,8 @@ static int valid_label(const struct nlattr *attr, static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = { [TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)), [TCA_MPLS_PROTO] = { .type = NLA_U16 }, - [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label), + [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, + valid_label), [TCA_MPLS_TC] = NLA_POLICY_RANGE(NLA_U8, 0, 7), [TCA_MPLS_TTL] = NLA_POLICY_MIN(NLA_U8, 1), [TCA_MPLS_BOS] = NLA_POLICY_RANGE(NLA_U8, 0, 1), From 7697de4ad708099cda1427aff4a73af1a25ab162 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 8 Jan 2023 20:37:53 +0100 Subject: [PATCH 142/177] Revert "r8169: disable detection of chip version 36" [ Upstream commit 2ea26b4de6f42b74a5f1701de41efa6bc9f12666 ] This reverts commit 42666b2c452ce87894786aae05e3fad3cfc6cb59. This chip version seems to be very rare, but it exits in consumer devices, see linked report. Link: https://stackoverflow.com/questions/75049473/cant-setup-a-wired-network-in-archlinux-fresh-install Fixes: 42666b2c452c ("r8169: disable detection of chip version 36") Signed-off-by: Heiner Kallweit Link: https://lore.kernel.org/r/42e9674c-d5d0-a65a-f578-e5c74f244739@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/realtek/r8169_main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a73d061d9fcb..fe8dc8e0522b 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1996,10 +1996,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) /* 8168F family. */ { 0x7c8, 0x488, RTL_GIGA_MAC_VER_38 }, - /* It seems this chip version never made it to - * the wild. Let's disable detection. - * { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, - */ + { 0x7cf, 0x481, RTL_GIGA_MAC_VER_36 }, { 0x7cf, 0x480, RTL_GIGA_MAC_VER_35 }, /* 8168E family. */ From 39124c14af45b4c0b393c9d271697b17721ad84b Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 16 Aug 2022 23:19:11 +0300 Subject: [PATCH 143/177] net/mlx5: check attr pointer validity before dereferencing it [ Upstream commit e0bf81bf0d3d4747c146e0bf44774d3d881d7137 ] Fix attr pointer validity checks after it was already dereferenced. Fixes: cb0d54cbf948 ("net/mlx5e: Fix wrong source vport matching on tunnel rule") Signed-off-by: Ariel Levkovich Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8c6c9bcb3dc3..b4e263e8cfb8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -142,7 +142,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, if (mlx5_esw_indir_table_decap_vport(attr)) vport = mlx5_esw_indir_table_decap_vport(attr); - if (attr && !attr->chain && esw_attr->int_port) + if (!attr->chain && esw_attr && esw_attr->int_port) metadata = mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); else From 1f232fb61913f4ef527528330749d0b23d31c806 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 30 Aug 2022 00:32:30 +0300 Subject: [PATCH 144/177] net/mlx5e: TC, Keep mod hdr actions after mod hdr alloc [ Upstream commit 5e72f3f1c558019082cfeedeed73748f35d780c6 ] When offloading TC NIC rule which has mod_hdr action, the mod_hdr actions list is freed upon mod_hdr allocation. In the new format of handling multi table actions and CT in particular, the mod_hdr actions list is still relevant when setting the pre and post rules and therefore, freeing the list may cause adding rules which don't set the FTE_ID. Therefore, the mod_hdr actions list needs to be kept for the pre/post flows as well and should be left for these handler to be freed. Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions") Signed-off-by: Ariel Levkovich Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index bd9936af4582..4c313b7424bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1283,7 +1283,6 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts); if (err) return err; } @@ -1341,8 +1340,10 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, } mutex_unlock(&tc->t_lock); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); mlx5e_detach_mod_hdr(priv, flow); + } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(priv->mdev, attr->counter); From ddf458641a511e7dff19f3bf0cbbc5dd9fe08ce5 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 28 Nov 2022 19:05:47 +0200 Subject: [PATCH 145/177] net/mlx5: Fix command stats access after free [ Upstream commit da2e552b469a0cd130ff70a88ccc4139da428a65 ] Command may fail while driver is reloading and can't accept FW commands till command interface is reinitialized. Such command failure is being logged to command stats. This results in NULL pointer access as command stats structure is being freed and reallocated during mlx5 devlink reload (see kernel log below). Fix it by making command stats statically allocated on driver probe. Kernel log: [ 2394.808802] BUG: unable to handle kernel paging request at 000000000002a9c0 [ 2394.810610] PGD 0 P4D 0 [ 2394.811811] Oops: 0002 [#1] SMP NOPTI ... [ 2394.815482] RIP: 0010:native_queued_spin_lock_slowpath+0x183/0x1d0 ... [ 2394.829505] Call Trace: [ 2394.830667] _raw_spin_lock_irq+0x23/0x26 [ 2394.831858] cmd_status_err+0x55/0x110 [mlx5_core] [ 2394.833020] mlx5_access_reg+0xe7/0x150 [mlx5_core] [ 2394.834175] mlx5_query_port_ptys+0x78/0xa0 [mlx5_core] [ 2394.835337] mlx5e_ethtool_get_link_ksettings+0x74/0x590 [mlx5_core] [ 2394.836454] ? kmem_cache_alloc_trace+0x140/0x1c0 [ 2394.837562] __rh_call_get_link_ksettings+0x33/0x100 [ 2394.838663] ? __rtnl_unlock+0x25/0x50 [ 2394.839755] __ethtool_get_link_ksettings+0x72/0x150 [ 2394.840862] duplex_show+0x6e/0xc0 [ 2394.841963] dev_attr_show+0x1c/0x40 [ 2394.843048] sysfs_kf_seq_show+0x9b/0x100 [ 2394.844123] seq_read+0x153/0x410 [ 2394.845187] vfs_read+0x91/0x140 [ 2394.846226] ksys_read+0x4f/0xb0 [ 2394.847234] do_syscall_64+0x5b/0x1a0 [ 2394.848228] entry_SYSCALL_64_after_hwframe+0x65/0xca Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs") Signed-off-by: Moshe Shemesh Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 13 ++----------- include/linux/mlx5/driver.h | 2 +- 2 files changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e7a894ba5c3e..723891eb86ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -2177,15 +2177,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) return -EINVAL; } - cmd->stats = kvcalloc(MLX5_CMD_OP_MAX, sizeof(*cmd->stats), GFP_KERNEL); - if (!cmd->stats) - return -ENOMEM; - cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) { - err = -ENOMEM; - goto dma_pool_err; - } + if (!cmd->pool) + return -ENOMEM; err = alloc_cmd_page(dev, cmd); if (err) @@ -2269,8 +2263,6 @@ err_free_page: err_free_pool: dma_pool_destroy(cmd->pool); -dma_pool_err: - kvfree(cmd->stats); return err; } @@ -2283,7 +2275,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) destroy_msg_cache(dev); free_cmd_page(dev, cmd); dma_pool_destroy(cmd->pool); - kvfree(cmd->stats); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 06cbad166225..ad55470a9fb9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -315,7 +315,7 @@ struct mlx5_cmd { struct mlx5_cmd_debug dbg; struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES]; int checksum_disabled; - struct mlx5_cmd_stats *stats; + struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX]; }; struct mlx5_cmd_mailbox { From 798604a1bd6eb0a0d8ac539628d0f5ad1f50d112 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Wed, 4 Jan 2023 11:16:21 +0200 Subject: [PATCH 146/177] net/mlx5e: Verify dev is present for fix features ndo [ Upstream commit ab4b01bfdaa69492fb36484026b0a0f0af02d75a ] The native NIC port net device instance is being used as Uplink representor. While changing profiles private resources are not available, fix features ndo does not check if the netdev is present. Add driver protection to verify private resources are ready. Fixes: 7a9fb35e8c3a ("net/mlx5e: Do not reload ethernet ports when changing eswitch mode") Signed-off-by: Roy Novich Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 951ede433813..4dc149ef618c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4071,6 +4071,9 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, struct mlx5e_vlan_table *vlan; struct mlx5e_params *params; + if (!netif_device_present(netdev)) + return features; + vlan = mlx5e_fs_get_vlan(priv->fs); mutex_lock(&priv->state_lock); params = &priv->channels.params; From b5f108e202b74b3641dfcfcbacea096e159760dc Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 15 Dec 2022 13:02:38 +0200 Subject: [PATCH 147/177] net/mlx5e: IPoIB, Block queue count configuration when sub interfaces are present [ Upstream commit 806a8df7126a8c05d60411eeb81057c2a8bbe7a7 ] PKEY sub interfaces share the receive queues with the parent interface. While setting the sub interface queue count is not supported, it is currently possible to change the number of queues of the parent interface. Thus we can end up with inconsistent queue sizes between the parent and its sub interfaces. This change disallows setting the queue count on the parent interface when sub interfaces are present. This is achieved by introducing an explicit reference to the parent netdev in the mlx5i_priv of the child interface. An additional counter is also required on the parent side to detect when sub interfaces are attached and for proper cleanup. The rtnl lock is taken during the ethtool op and the sub interface ndo_init/uninit ops. There is no race here around counting the sub interfaces, reading the sub interfaces and setting the number of channels. The ASSERT_RTNL was added to document that. Fixes: be98737a4faa ("net/mlx5e: Use dynamic per-channel allocations in stats") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/ipoib/ethtool.c | 16 +++++++- .../ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 38 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 6 +++ .../mellanox/mlx5/core/ipoib/ipoib_vlan.c | 9 ++--- 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index c247cca154e9..eff92dc0927c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -90,9 +90,21 @@ static void mlx5i_get_ringparam(struct net_device *dev, static int mlx5i_set_channels(struct net_device *dev, struct ethtool_channels *ch) { - struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = netdev_priv(dev); + struct mlx5e_priv *epriv = mlx5i_epriv(dev); - return mlx5e_ethtool_set_channels(priv, ch); + /* rtnl lock protects from race between this ethtool op and sub + * interface ndo_init/uninit. + */ + ASSERT_RTNL(); + if (ipriv->num_sub_interfaces > 0) { + mlx5_core_warn(epriv->mdev, + "can't change number of channels for interfaces with sub interfaces (%u)\n", + ipriv->num_sub_interfaces); + return -EINVAL; + } + + return mlx5e_ethtool_set_channels(epriv, ch); } static void mlx5i_get_channels(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 84f5352b0ce1..038ae0fcf9d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -160,6 +160,44 @@ void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) stats->tx_dropped = sstats->tx_queue_dropped; } +struct net_device *mlx5i_parent_get(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + struct net_device *parent_dev; + int parent_ifindex; + + ipriv = priv->ppriv; + + parent_ifindex = netdev->netdev_ops->ndo_get_iflink(netdev); + parent_dev = dev_get_by_index(dev_net(netdev), parent_ifindex); + if (!parent_dev) + return NULL; + + parent_ipriv = netdev_priv(parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces++; + + ipriv->parent_dev = parent_dev; + + return parent_dev; +} + +void mlx5i_parent_put(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5i_priv *ipriv, *parent_ipriv; + + ipriv = priv->ppriv; + parent_ipriv = netdev_priv(ipriv->parent_dev); + + ASSERT_RTNL(); + parent_ipriv->num_sub_interfaces--; + + dev_put(ipriv->parent_dev); +} + int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 99d46fda9f82..f3f2af972020 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -54,9 +54,11 @@ struct mlx5i_priv { struct rdma_netdev rn; /* keep this first */ u32 qpn; bool sub_interface; + u32 num_sub_interfaces; u32 qkey; u16 pkey_index; struct mlx5i_pkey_qpn_ht *qpn_htbl; + struct net_device *parent_dev; char *mlx5e_priv[]; }; @@ -117,5 +119,9 @@ void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more); void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +/* Reference management for child to parent interfaces. */ +struct net_device *mlx5i_parent_get(struct net_device *netdev); +void mlx5i_parent_put(struct net_device *netdev); + #endif /* CONFIG_MLX5_CORE_IPOIB */ #endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 0227a521d301..3d31c59e69d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -158,21 +158,19 @@ static int mlx5i_pkey_dev_init(struct net_device *dev) struct mlx5e_priv *priv = mlx5i_epriv(dev); struct mlx5i_priv *ipriv, *parent_ipriv; struct net_device *parent_dev; - int parent_ifindex; ipriv = priv->ppriv; - /* Get QPN to netdevice hash table from parent */ - parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev); - parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex); + /* Link to parent */ + parent_dev = mlx5i_parent_get(dev); if (!parent_dev) { mlx5_core_warn(priv->mdev, "failed to get parent device\n"); return -EINVAL; } + /* Get QPN to netdevice hash table from parent */ parent_ipriv = netdev_priv(parent_dev); ipriv->qpn_htbl = parent_ipriv->qpn_htbl; - dev_put(parent_dev); return mlx5i_dev_init(dev); } @@ -184,6 +182,7 @@ static int mlx5i_pkey_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) static void mlx5i_pkey_dev_cleanup(struct net_device *netdev) { + mlx5i_parent_put(netdev); return mlx5i_dev_cleanup(netdev); } From 5844a46f09f768da866d6b0ffbf1a9073266bf24 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 25 Nov 2022 17:51:19 +0200 Subject: [PATCH 148/177] net/mlx5e: IPoIB, Block PKEY interfaces with less rx queues than parent [ Upstream commit 31c70bfe58ef09fe36327ddcced9143a16e9e83d ] A user is able to configure an arbitrary number of rx queues when creating an interface via netlink. This doesn't work for child PKEY interfaces because the child interface uses the parent receive channels. Although the child shares the parent's receive channels, the number of rx queues is important for the channel_stats array: the parent's rx channel index is used to access the child's channel_stats. So the array has to be at least as large as the parent's rx queue size for the counting to work correctly and to prevent out of bound accesses. This patch checks for the mentioned scenario and returns an error when trying to create the interface. The error is propagated to the user. Fixes: be98737a4faa ("net/mlx5e: Use dynamic per-channel allocations in stats") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 3d31c59e69d4..0cf4eaf852d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -168,6 +168,15 @@ static int mlx5i_pkey_dev_init(struct net_device *dev) return -EINVAL; } + if (dev->num_rx_queues < parent_dev->real_num_rx_queues) { + mlx5_core_warn(priv->mdev, + "failed to create child device with rx queues [%d] less than parent's [%d]\n", + dev->num_rx_queues, + parent_dev->real_num_rx_queues); + mlx5i_parent_put(dev); + return -EINVAL; + } + /* Get QPN to netdevice hash table from parent */ parent_ipriv = netdev_priv(parent_dev); ipriv->qpn_htbl = parent_ipriv->qpn_htbl; From 0fa15a13670d91ed15c6d0b552404db3fc79642b Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Wed, 23 Nov 2022 16:59:13 +0200 Subject: [PATCH 149/177] net/mlx5e: IPoIB, Fix child PKEY interface stats on rx path [ Upstream commit b5e23931c45a2f99f60a2f2b98a9e4d5a62a5b13 ] The current code always does the accounting using the stats from the parent interface (linked in the rq). This doesn't work when there are child interfaces configured. Fix this behavior by always using the stats from the child interface priv. This will also work for parent only interfaces: the child (netdev) and parent netdev (rq->netdev) will point to the same thing. Fixes: be98737a4faa ("net/mlx5e: Use dynamic per-channel allocations in stats") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index a61a43fc8d5c..56d1bd22c7c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -2300,7 +2300,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, priv = mlx5i_epriv(netdev); tstamp = &priv->tstamp; - stats = rq->stats; + stats = &priv->channel_stats[rq->ix]->rq; flags_rqpn = be32_to_cpu(cqe->flags_rqpn); g = (flags_rqpn >> 28) & 3; From f594f740c3523abe1067c684a832c47eb4f01745 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Mon, 5 Dec 2022 14:26:09 -0800 Subject: [PATCH 150/177] net/mlx5: Fix ptp max frequency adjustment range [ Upstream commit fe91d57277eef8bb4aca05acfa337b4a51d0bba4 ] .max_adj of ptp_clock_info acts as an absolute value for the amount in ppb that can be set for a single call of .adjfine. This means that a single call to .getfine cannot be greater than .max_adj or less than -(.max_adj). Provides correct value for max frequency adjustment value supported by devices. Fixes: 3d8c38af1493 ("net/mlx5e: Add PTP Hardware Clock (PHC) support") Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index d3a9ae80fd30..d7ddfc489536 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -691,7 +691,7 @@ static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin, static const struct ptp_clock_info mlx5_ptp_clock_info = { .owner = THIS_MODULE, .name = "mlx5_ptp", - .max_adj = 100000000, + .max_adj = 50000000, .n_alarm = 0, .n_ext_ts = 0, .n_per_out = 0, From e02190787f1141935784c1015b964fa39370c686 Mon Sep 17 00:00:00 2001 From: Gavin Li Date: Tue, 27 Dec 2022 04:54:09 +0200 Subject: [PATCH 151/177] net/mlx5e: Don't support encap rules with gbp option [ Upstream commit d515d63cae2cd186acf40deaa8ef33067bb7f637 ] Previously, encap rules with gbp option would be offloaded by mistake but driver does not support gbp option offload. To fix this issue, check if the encap rule has gbp option and don't offload the rule Fixes: d8f9dfae49ce ("net: sched: allow flower to match vxlan options") Signed-off-by: Gavin Li Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c index fd07c4cbfd1d..1f62c702b625 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_vxlan.c @@ -88,6 +88,8 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[], struct udphdr *udp = (struct udphdr *)(buf); struct vxlanhdr *vxh; + if (tun_key->tun_flags & TUNNEL_VXLAN_OPT) + return -EOPNOTSUPP; vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr)); *ip_proto = IPPROTO_UDP; From 9be4e25bdc1fb6bacde61e3e35e276c821a485a0 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Wed, 14 Dec 2022 16:34:13 +0200 Subject: [PATCH 152/177] net/mlx5e: Fix macsec ssci attribute handling in offload path [ Upstream commit f5e1ed04aa2ea665a796f0109091ca3f2b01024a ] Currently when macsec offload is set with extended packet number (epn) enabled, the driver wrongly deduce the short secure channel identifier (ssci) from the salt instead of the stand alone ssci attribute as it should, consequently creating a mismatch between the kernel and driver's ssci values. Fix by using the ssci value from the relevant attribute. Fixes: 4411a6c0abd3 ("net/mlx5e: Support MACsec offload extended packet number (EPN)") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index f900709639f6..7c0085ba2fc5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -62,6 +62,7 @@ struct mlx5e_macsec_sa { u32 enc_key_id; u32 next_pn; sci_t sci; + ssci_t ssci; salt_t salt; struct rhash_head hash; @@ -499,10 +500,11 @@ mlx5e_macsec_get_macsec_device_context(const struct mlx5e_macsec *macsec, } static void update_macsec_epn(struct mlx5e_macsec_sa *sa, const struct macsec_key *key, - const pn_t *next_pn_halves) + const pn_t *next_pn_halves, ssci_t ssci) { struct mlx5e_macsec_epn_state *epn_state = &sa->epn_state; + sa->ssci = ssci; sa->salt = key->salt; epn_state->epn_enabled = 1; epn_state->epn_msb = next_pn_halves->upper; @@ -550,7 +552,8 @@ static int mlx5e_macsec_add_txsa(struct macsec_context *ctx) tx_sa->assoc_num = assoc_num; if (secy->xpn) - update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves); + update_macsec_epn(tx_sa, &ctx_tx_sa->key, &ctx_tx_sa->next_pn_halves, + ctx_tx_sa->ssci); err = mlx5_create_encryption_key(mdev, ctx->sa.key, secy->key_len, MLX5_ACCEL_OBJ_MACSEC_KEY, @@ -945,7 +948,8 @@ static int mlx5e_macsec_add_rxsa(struct macsec_context *ctx) rx_sa->fs_id = rx_sc->sc_xarray_element->fs_id; if (ctx->secy->xpn) - update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves); + update_macsec_epn(rx_sa, &ctx_rx_sa->key, &ctx_rx_sa->next_pn_halves, + ctx_rx_sa->ssci); err = mlx5_create_encryption_key(mdev, ctx->sa.key, ctx->secy->key_len, MLX5_ACCEL_OBJ_MACSEC_KEY, From 514d9c6a39213d8200884e70f60ce7faef1ee597 Mon Sep 17 00:00:00 2001 From: Emeel Hakim Date: Sun, 11 Dec 2022 13:22:23 +0200 Subject: [PATCH 153/177] net/mlx5e: Fix macsec possible null dereference when updating MAC security entity (SecY) [ Upstream commit 9828994ac492e8e7de47fe66097b7e665328f348 ] Upon updating MAC security entity (SecY) in hw offload path, the macsec security association (SA) initialization routine is called. In case of extended packet number (epn) is enabled the salt and ssci attributes are retrieved using the MACsec driver rx_sa context which is unavailable when updating a SecY property such as encoding-sa hence the null dereference. Fix by using the provided SA to set those attributes. Fixes: 4411a6c0abd3 ("net/mlx5e: Support MACsec offload extended packet number (EPN)") Signed-off-by: Emeel Hakim Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 7c0085ba2fc5..b92d541b5286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -359,7 +359,6 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_macsec_obj_attrs obj_attrs; union mlx5e_macsec_rule *macsec_rule; - struct macsec_key *key; int err; obj_attrs.next_pn = sa->next_pn; @@ -369,13 +368,9 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx, obj_attrs.aso_pdn = macsec->aso.pdn; obj_attrs.epn_state = sa->epn_state; - key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key; - if (sa->epn_state.epn_enabled) { - obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) : - cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); - - memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); + obj_attrs.ssci = cpu_to_be32((__force u32)sa->ssci); + memcpy(&obj_attrs.salt, &sa->salt, sizeof(sa->salt)); } obj_attrs.replay_window = ctx->secy->replay_window; From 1621f9a5394880e765f28f1e3401ba582b2ab2ee Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:41 +0100 Subject: [PATCH 154/177] selftests/net: l2_tos_ttl_inherit.sh: Set IPv6 addresses with "nodad". [ Upstream commit e59370b2e96eb8e7e057a2a16e999ff385a3f2fb ] The ping command can run before DAD completes. In that case, ping may fail and break the selftest. We don't need DAD here since we're working on isolated device pairs. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- tools/testing/selftests/net/l2_tos_ttl_inherit.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index dca1e6f777a8..e2574b08eabc 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -137,8 +137,8 @@ setup() { if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 + ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 nodad + $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 nodad ip link add name tep0 type $type $local_addr1 \ remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ $vxlan $geneve @@ -170,8 +170,8 @@ setup() { ip addr add 198.19.0.1/24 brd + dev ${parent}0 $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 + ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 nodad + $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 nodad fi } From 2440f74a9967460b87b6ba74203a81515e049cf7 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:46 +0100 Subject: [PATCH 155/177] selftests/net: l2_tos_ttl_inherit.sh: Run tests in their own netns. [ Upstream commit c53cb00f7983a5474f2d36967f84908b85af9159 ] This selftest currently runs half in the current namespace and half in a netns of its own. Therefore, the test can fail if the current namespace is already configured with incompatible parameters (for example if it already has a veth0 interface). Adapt the script to put both ends of the veth pair in their own netns. Now veth0 is created in NS0 instead of the current namespace, while veth1 is set up in NS1 (instead of the 'testing' netns). The user visible netns names are randomised to minimise the risk of conflicts with already existing namespaces. The cleanup() function doesn't need to remove the virtual interface anymore: deleting NS0 and NS1 automatically removes the virtual interfaces they contained. We can remove $ns, which was only used to run ip commands in the 'testing' netns (let's use the builtin "-netns" option instead). However, we still need a similar functionality as ping and tcpdump now need to run in NS0. So we now have $RUN_NS0 for that. Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../selftests/net/l2_tos_ttl_inherit.sh | 162 ++++++++++-------- 1 file changed, 93 insertions(+), 69 deletions(-) diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index e2574b08eabc..cf56680d598f 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -25,6 +25,11 @@ expected_tos="0x00" expected_ttl="0" failed=false +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) + +RUN_NS0="ip netns exec ${NS0}" + get_random_tos() { # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4 echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\ @@ -61,7 +66,6 @@ setup() { local vlan="$5" local test_tos="0x00" local test_ttl="0" - local ns="ip netns exec testing" # We don't want a test-tos of 0x00, # because this is the value that we get when no tos is set. @@ -94,14 +98,15 @@ setup() { printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan" - # Create 'testing' netns, veth pair and connect main ns with testing ns - ip netns add testing - ip link add type veth - ip link set veth1 netns testing - ip link set veth0 up - $ns ip link set veth1 up - ip addr flush dev veth0 - $ns ip addr flush dev veth1 + # Create netns NS0 and NS1 and connect them with a veth pair + ip netns add "${NS0}" + ip netns add "${NS1}" + ip link add name veth0 netns "${NS0}" type veth \ + peer name veth1 netns "${NS1}" + ip -netns "${NS0}" link set dev veth0 up + ip -netns "${NS1}" link set dev veth1 up + ip -netns "${NS0}" address flush dev veth0 + ip -netns "${NS1}" address flush dev veth1 local local_addr1="" local local_addr2="" @@ -127,51 +132,59 @@ setup() { if [ "$type" = "gre" ]; then type="gretap" fi - ip addr add 198.18.0.1/24 dev veth0 - $ns ip addr add 198.18.0.2/24 dev veth1 - ip link add name tep0 type $type $local_addr1 remote \ - 198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 remote \ - 198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve + ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0 + ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1 + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote 198.18.0.2 tos $test_tos ttl $test_ttl \ + $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote 198.18.0.1 tos $test_tos ttl $test_ttl \ + $vxlan $geneve elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then type="ip6gretap" fi - ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0 nodad - $ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1 nodad - ip link add name tep0 type $type $local_addr1 \ - remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \ - $vxlan $geneve - $ns ip link add name tep1 type $type $local_addr2 \ - remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \ - $vxlan $geneve + ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \ + dev veth0 nodad + ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \ + dev veth1 nodad + ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \ + remote fdd1:ced0:5d88:3fce::2 tos $test_tos \ + ttl $test_ttl $vxlan $geneve + ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \ + remote fdd1:ced0:5d88:3fce::1 tos $test_tos \ + ttl $test_ttl $vxlan $geneve fi # Bring L2-tunnel link up and create VLAN on top - ip link set tep0 up - $ns ip link set tep1 up - ip addr flush dev tep0 - $ns ip addr flush dev tep1 + ip -netns "${NS0}" link set tep0 up + ip -netns "${NS1}" link set tep1 up + ip -netns "${NS0}" address flush dev tep0 + ip -netns "${NS1}" address flush dev tep1 local parent if $vlan; then parent="vlan99-" - ip link add link tep0 name ${parent}0 type vlan id 99 - $ns ip link add link tep1 name ${parent}1 type vlan id 99 - ip link set ${parent}0 up - $ns ip link set ${parent}1 up - ip addr flush dev ${parent}0 - $ns ip addr flush dev ${parent}1 + ip -netns "${NS0}" link add link tep0 name ${parent}0 \ + type vlan id 99 + ip -netns "${NS1}" link add link tep1 name ${parent}1 \ + type vlan id 99 + ip -netns "${NS0}" link set dev ${parent}0 up + ip -netns "${NS1}" link set dev ${parent}1 up + ip -netns "${NS0}" address flush dev ${parent}0 + ip -netns "${NS1}" address flush dev ${parent}1 else parent="tep" fi # Assign inner IPv4/IPv6 addresses if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then - ip addr add 198.19.0.1/24 brd + dev ${parent}0 - $ns ip addr add 198.19.0.2/24 brd + dev ${parent}1 + ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0 + ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1 elif [ "$inner" = "6" ]; then - ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0 nodad - $ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1 nodad + ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \ + dev ${parent}0 nodad + ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \ + dev ${parent}1 nodad fi } @@ -192,10 +205,10 @@ verify() { ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6 fi if [ "$tos_ttl" = "inherit" ]; then - ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \ - 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \ + -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!" else - ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" + ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!" fi local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset if [ "$type" = "gre" ]; then @@ -216,10 +229,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x01 and \ - ip[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x01 and \ + ip[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then req_proto_offset="44" req_offset="78" @@ -231,10 +246,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x3a and \ - ip[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x3a and \ + ip[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then req_proto_offset="36" req_offset="45" @@ -250,11 +267,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip[$tunnel_type_offset] = $tunnel_type_proto and \ - ip[$req_proto_offset] = 0x08 and \ - ip[$((req_proto_offset + 1))] = 0x06 and \ - ip[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip[$tunnel_type_offset] = $tunnel_type_proto and \ + ip[$req_proto_offset] = 0x08 and \ + ip[$((req_proto_offset + 1))] = 0x06 and \ + ip[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi elif [ "$outer" = "6" ]; then if [ "$type" = "gre" ]; then @@ -273,10 +292,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x01 and \ - ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x01 and \ + ip6[$req_offset] = 0x08 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "6" ]; then local req_proto_offset="72" local req_offset="106" @@ -288,10 +309,12 @@ verify() { req_proto_offset="$((req_proto_offset + 4))" req_offset="$((req_offset + 4))" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x3a and \ - ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x3a and \ + ip6[$req_offset] = 0x80 2>/dev/null \ + | head -n 1)" elif [ "$inner" = "other" ]; then local req_proto_offset="64" local req_offset="73" @@ -307,11 +330,13 @@ verify() { expected_tos="0x00" expected_ttl="64" fi - out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \ - ip6[$tunnel_type_offset] = $tunnel_type_proto and \ - ip6[$req_proto_offset] = 0x08 and \ - ip6[$((req_proto_offset + 1))] = 0x06 and \ - ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)" + out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \ + -i veth0 -n \ + ip6[$tunnel_type_offset] = $tunnel_type_proto and \ + ip6[$req_proto_offset] = 0x08 and \ + ip6[$((req_proto_offset + 1))] = 0x06 and \ + ip6[$req_offset] = 0x01 2>/dev/null \ + | head -n 1)" fi fi kill -9 $ping_pid @@ -351,9 +376,8 @@ verify() { } cleanup() { - ip link del veth0 2>/dev/null - ip netns del testing 2>/dev/null - ip link del tep0 2>/dev/null + ip netns del "${NS0}" 2>/dev/null + ip netns del "${NS1}" 2>/dev/null } printf "┌────────┬───────┬───────┬──────────────┬" From 0daf5f4a1eef482be8f59c6d0b39a413dbaf235e Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Sun, 8 Jan 2023 16:45:50 +0100 Subject: [PATCH 156/177] selftests/net: l2_tos_ttl_inherit.sh: Ensure environment cleanup on failure. [ Upstream commit d68ff8ad3351b8fc8d6f14b9a4f5cc8ba3e8bd13 ] Use 'set -e' and an exit handler to stop the script if a command fails and ensure the test environment is cleaned up in any case. Also, handle the case where the script is interrupted by SIGINT. The only command that's expected to fail is 'wait $ping_pid', since it's killed by the script. Handle this case with '|| true' to make it play well with 'set -e'. Finally, return the Kselftest SKIP code (4) when the script breaks because of an environment problem or a command line failure. The 0 and 1 return codes should now reliably indicate that all tests have been run (0: all tests run and passed, 1: all tests run but at least one failed, 4: test script didn't run completely). Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting") Reported-by: Mirsad Goran Todorovac Tested-by: Mirsad Goran Todorovac Signed-off-by: Guillaume Nault Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../selftests/net/l2_tos_ttl_inherit.sh | 40 +++++++++++++++++-- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh index cf56680d598f..f11756e7df2f 100755 --- a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh +++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh @@ -12,13 +12,16 @@ # In addition this script also checks if forcing a specific field in the # outer header is working. +# Return 4 by default (Kselftest SKIP code) +ERR=4 + if [ "$(id -u)" != "0" ]; then echo "Please run as root." - exit 0 + exit $ERR fi if ! which tcpdump > /dev/null 2>&1; then echo "No tcpdump found. Required for this test." - exit 0 + exit $ERR fi expected_tos="0x00" @@ -340,7 +343,7 @@ verify() { fi fi kill -9 $ping_pid - wait $ping_pid 2>/dev/null + wait $ping_pid 2>/dev/null || true result="FAIL" if [ "$outer" = "4" ]; then captured_ttl="$(get_field "ttl" "$out")" @@ -380,6 +383,31 @@ cleanup() { ip netns del "${NS1}" 2>/dev/null } +exit_handler() { + # Don't exit immediately if one of the intermediate commands fails. + # We might be called at the end of the script, when the network + # namespaces have already been deleted. So cleanup() may fail, but we + # still need to run until 'exit $ERR' or the script won't return the + # correct error code. + set +e + + cleanup + + exit $ERR +} + +# Restore the default SIGINT handler (just in case) and exit. +# The exit handler will take care of cleaning everything up. +interrupted() { + trap - INT + + exit $ERR +} + +set -e +trap exit_handler EXIT +trap interrupted INT + printf "┌────────┬───────┬───────┬──────────────┬" printf "──────────────┬───────┬────────┐\n" for type in gre vxlan geneve; do @@ -409,6 +437,10 @@ done printf "└────────┴───────┴───────┴──────────────┴" printf "──────────────┴───────┴────────┘\n" +# All tests done. +# Set ERR appropriately: it will be returned by the exit handler. if $failed; then - exit 1 + ERR=1 +else + ERR=0 fi From c8ca0ad10df08ea36bcac1288062d567d22604c9 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Mon, 9 Jan 2023 11:43:25 +0530 Subject: [PATCH 157/177] octeontx2-pf: Fix resource leakage in VF driver unbind [ Upstream commit 53da7aec32982f5ee775b69dce06d63992ce4af3 ] resources allocated like mcam entries to support the Ntuple feature and hash tables for the tc feature are not getting freed in driver unbind. This patch fixes the issue. Fixes: 2da489432747 ("octeontx2-pf: devlink params support to set mcam entry count") Signed-off-by: Hariprasad Kelam Signed-off-by: Sunil Kovvuri Goutham Link: https://lore.kernel.org/r/20230109061325.21395-1-hkelam@marvell.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 86653bb8e403..7f8ffbf79cf7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -758,6 +758,8 @@ static void otx2vf_remove(struct pci_dev *pdev) if (vf->otx2_wq) destroy_workqueue(vf->otx2_wq); otx2_ptp_destroy(vf); + otx2_mcam_flow_del(vf); + otx2_shutdown_tc(vf); otx2vf_disable_mbox_intr(vf); otx2_detach_resources(&vf->mbox); if (test_bit(CN10K_LMTST, &vf->hw.cap_flag)) From d4bc9f017f3163348bfa7c2f76b2ab2058d12e72 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 6 Jan 2023 07:13:19 -0800 Subject: [PATCH 158/177] perf build: Properly guard libbpf includes [ Upstream commit d891f2b724b39a2a41e3ad7b57110193993242ff ] Including libbpf header files should be guarded by HAVE_LIBBPF_SUPPORT. In bpf_counter.h, move the skeleton utilities under HAVE_BPF_SKEL. Fixes: d6a735ef3277c45f ("perf bpf_counter: Move common functions to bpf_counter.h") Reported-by: Mike Leach Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Tested-by: Jiri Olsa Tested-by: Mike Leach Cc: Alexander Shishkin Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20230105172243.7238-1-mike.leach@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-trace.c | 2 ++ tools/perf/util/bpf_counter.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 3dcf6aed1ef7..97b17f8941dc 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -17,7 +17,9 @@ #include "util/record.h" #include #include +#ifdef HAVE_LIBBPF_SUPPORT #include +#endif #include "util/bpf_map.h" #include "util/rlimit.h" #include "builtin.h" diff --git a/tools/perf/util/bpf_counter.h b/tools/perf/util/bpf_counter.h index 4dbf26408b69..c6d21c07b14c 100644 --- a/tools/perf/util/bpf_counter.h +++ b/tools/perf/util/bpf_counter.h @@ -4,9 +4,12 @@ #include #include + +#ifdef HAVE_LIBBPF_SUPPORT #include #include #include +#endif struct evsel; struct target; @@ -87,6 +90,8 @@ static inline void set_max_rlimit(void) setrlimit(RLIMIT_MEMLOCK, &rinf); } +#ifdef HAVE_BPF_SKEL + static inline __u32 bpf_link_get_id(int fd) { struct bpf_link_info link_info = { .id = 0, }; @@ -127,5 +132,6 @@ static inline int bperf_trigger_reading(int prog_fd, int cpu) return bpf_prog_test_run_opts(prog_fd, &opts); } +#endif /* HAVE_BPF_SKEL */ #endif /* __PERF_BPF_COUNTER_H */ From 4e81a8a47dc667574afe2d1ee825b9a130b9fe0f Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 8 Jan 2023 14:23:59 +0800 Subject: [PATCH 159/177] perf kmem: Support legacy tracepoints [ Upstream commit b3719108ae60169eda5c941ca5e1be1faa371c57 ] Commit 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") removed tracepoints 'kmalloc_node' and 'kmem_cache_alloc_node', we need to consider the tool should be backward compatible. If it detect the tracepoint "kmem:kmalloc_node", this patch enables the legacy tracepoints, otherwise, it will ignore them. Fixes: 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") Reported-by: Ravi Bangoria Reviewed-by: James Clark Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vlastimil Babka Link: https://lore.kernel.org/r/20230108062400.250690-1-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-kmem.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ebfab2ca1702..63c759edb8bc 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -1823,6 +1823,19 @@ static int parse_line_opt(const struct option *opt __maybe_unused, return 0; } +static bool slab_legacy_tp_is_exposed(void) +{ + /* + * The tracepoints "kmem:kmalloc_node" and + * "kmem:kmem_cache_alloc_node" have been removed on the latest + * kernel, if the tracepoint "kmem:kmalloc_node" is existed it + * means the tool is running on an old kernel, we need to + * rollback to support these legacy tracepoints. + */ + return IS_ERR(trace_event__tp_format("kmem", "kmalloc_node")) ? + false : true; +} + static int __cmd_record(int argc, const char **argv) { const char * const record_args[] = { @@ -1830,22 +1843,28 @@ static int __cmd_record(int argc, const char **argv) }; const char * const slab_events[] = { "-e", "kmem:kmalloc", - "-e", "kmem:kmalloc_node", "-e", "kmem:kfree", "-e", "kmem:kmem_cache_alloc", - "-e", "kmem:kmem_cache_alloc_node", "-e", "kmem:kmem_cache_free", }; + const char * const slab_legacy_events[] = { + "-e", "kmem:kmalloc_node", + "-e", "kmem:kmem_cache_alloc_node", + }; const char * const page_events[] = { "-e", "kmem:mm_page_alloc", "-e", "kmem:mm_page_free", }; unsigned int rec_argc, i, j; const char **rec_argv; + unsigned int slab_legacy_tp_exposed = slab_legacy_tp_is_exposed(); rec_argc = ARRAY_SIZE(record_args) + argc - 1; - if (kmem_slab) + if (kmem_slab) { rec_argc += ARRAY_SIZE(slab_events); + if (slab_legacy_tp_exposed) + rec_argc += ARRAY_SIZE(slab_legacy_events); + } if (kmem_page) rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */ @@ -1860,6 +1879,10 @@ static int __cmd_record(int argc, const char **argv) if (kmem_slab) { for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) rec_argv[i] = strdup(slab_events[j]); + if (slab_legacy_tp_exposed) { + for (j = 0; j < ARRAY_SIZE(slab_legacy_events); j++, i++) + rec_argv[i] = strdup(slab_legacy_events[j]); + } } if (kmem_page) { rec_argv[i++] = strdup("-g"); From 8c3dc300617cc7ceae0335c8363d4e03a0a92e54 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Sun, 8 Jan 2023 14:24:00 +0800 Subject: [PATCH 160/177] perf kmem: Support field "node" in evsel__process_alloc_event() coping with recent tracepoint restructuring [ Upstream commit dce088ab0d51ae3b14fb2bd608e9c649aadfe5dc ] Commit 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") adds the field "node" into the tracepoints 'kmalloc' and 'kmem_cache_alloc', so this patch modifies the event process function to support the field "node". If field "node" is detected by checking function evsel__field(), it stats the cross allocation. When the "node" value is NUMA_NO_NODE (-1), it means the memory can be allocated from any memory node, in this case, we don't account it as a cross allocation. Fixes: 11e9734bcb6a7361 ("mm/slab_common: unify NUMA and UMA version of tracepoints") Reported-by: Ravi Bangoria Reviewed-by: James Clark Signed-off-by: Leo Yan Cc: Alexander Shishkin Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com> Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vlastimil Babka Link: https://lore.kernel.org/r/20230108062400.250690-2-leo.yan@linaro.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-kmem.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 63c759edb8bc..40dd52acc48a 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include +#include #include #include #include @@ -184,22 +185,33 @@ static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *s total_allocated += bytes_alloc; nr_allocs++; - return 0; -} -static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample) -{ - int ret = evsel__process_alloc_event(evsel, sample); + /* + * Commit 11e9734bcb6a ("mm/slab_common: unify NUMA and UMA + * version of tracepoints") adds the field "node" into the + * tracepoints 'kmalloc' and 'kmem_cache_alloc'. + * + * The legacy tracepoints 'kmalloc_node' and 'kmem_cache_alloc_node' + * also contain the field "node". + * + * If the tracepoint contains the field "node" the tool stats the + * cross allocation. + */ + if (evsel__field(evsel, "node")) { + int node1, node2; - if (!ret) { - int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}), - node2 = evsel__intval(evsel, sample, "node"); + node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}); + node2 = evsel__intval(evsel, sample, "node"); - if (node1 != node2) + /* + * If the field "node" is NUMA_NO_NODE (-1), we don't take it + * as a cross allocation. + */ + if ((node2 != NUMA_NO_NODE) && (node1 != node2)) nr_cross_allocs++; } - return ret; + return 0; } static int ptr_cmp(void *, void *); @@ -1368,8 +1380,8 @@ static int __cmd_kmem(struct perf_session *session) /* slab allocator */ { "kmem:kmalloc", evsel__process_alloc_event, }, { "kmem:kmem_cache_alloc", evsel__process_alloc_event, }, - { "kmem:kmalloc_node", evsel__process_alloc_node_event, }, - { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, }, + { "kmem:kmalloc_node", evsel__process_alloc_event, }, + { "kmem:kmem_cache_alloc_node", evsel__process_alloc_event, }, { "kmem:kfree", evsel__process_free_event, }, { "kmem:kmem_cache_free", evsel__process_free_event, }, /* page allocator */ From bbc17e883cbad205d9d45b215b41d3dbad03168f Mon Sep 17 00:00:00 2001 From: Christopher S Hall Date: Wed, 14 Dec 2022 16:10:38 +0800 Subject: [PATCH 161/177] igc: Fix PPS delta between two synchronized end-points [ Upstream commit 5e91c72e560cc85f7163bbe3d14197268de31383 ] This patch fix the pulse per second output delta between two synchronized end-points. Based on Intel Discrete I225 Software User Manual Section 4.2.15 TimeSync Auxiliary Control Register, ST0[Bit 4] and ST1[Bit 7] must be set to ensure that clock output will be toggles based on frequency value defined. This is to ensure that output of the PPS is aligned with the clock. How to test: 1) Running time synchronization on both end points. Ex: ptp4l --step_threshold=1 -m -f gPTP.cfg -i 2) Configure PPS output using below command for both end-points Ex: SDP0 on I225 REV4 SKU variant ./testptp -d /dev/ptp0 -L 0,2 ./testptp -d /dev/ptp0 -p 1000000000 3) Measure the output using analyzer for both end-points Fixes: 87938851b6ef ("igc: enable auxiliary PHC functions for the i225") Signed-off-by: Christopher S Hall Signed-off-by: Muhammad Husaini Zulkifli Acked-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_defines.h | 2 ++ drivers/net/ethernet/intel/igc/igc_ptp.c | 10 ++++++---- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 4ad35fbdc02e..dbfa4b9dee06 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -466,7 +466,9 @@ #define IGC_TSAUXC_EN_TT0 BIT(0) /* Enable target time 0. */ #define IGC_TSAUXC_EN_TT1 BIT(1) /* Enable target time 1. */ #define IGC_TSAUXC_EN_CLK0 BIT(2) /* Enable Configurable Frequency Clock 0. */ +#define IGC_TSAUXC_ST0 BIT(4) /* Start Clock 0 Toggle on Target Time 0. */ #define IGC_TSAUXC_EN_CLK1 BIT(5) /* Enable Configurable Frequency Clock 1. */ +#define IGC_TSAUXC_ST1 BIT(7) /* Start Clock 1 Toggle on Target Time 1. */ #define IGC_TSAUXC_EN_TS0 BIT(8) /* Enable hardware timestamp 0. */ #define IGC_TSAUXC_AUTT0 BIT(9) /* Auxiliary Timestamp Taken. */ #define IGC_TSAUXC_EN_TS1 BIT(10) /* Enable hardware timestamp 0. */ diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 8dbb9f903ca7..c34734d432e0 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -322,7 +322,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, ts = ns_to_timespec64(ns); if (rq->perout.index == 1) { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK1; + tsauxc_mask = IGC_TSAUXC_EN_CLK1 | IGC_TSAUXC_ST1; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT1; @@ -333,7 +333,7 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, freqout = IGC_FREQOUT1; } else { if (use_freq) { - tsauxc_mask = IGC_TSAUXC_EN_CLK0; + tsauxc_mask = IGC_TSAUXC_EN_CLK0 | IGC_TSAUXC_ST0; tsim_mask = 0; } else { tsauxc_mask = IGC_TSAUXC_EN_TT0; @@ -347,10 +347,12 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp, tsauxc = rd32(IGC_TSAUXC); tsim = rd32(IGC_TSIM); if (rq->perout.index == 1) { - tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1); + tsauxc &= ~(IGC_TSAUXC_EN_TT1 | IGC_TSAUXC_EN_CLK1 | + IGC_TSAUXC_ST1); tsim &= ~IGC_TSICR_TT1; } else { - tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0); + tsauxc &= ~(IGC_TSAUXC_EN_TT0 | IGC_TSAUXC_EN_CLK0 | + IGC_TSAUXC_ST0); tsim &= ~IGC_TSICR_TT0; } if (on) { From c2258d55935c23095e545c7e36f08d8b3433969b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= Date: Mon, 9 Jan 2023 16:32:23 +0100 Subject: [PATCH 162/177] net: lan966x: check for ptp to be enabled in lan966x_ptp_deinit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b0e380b5d4275299adf43e249f18309331b6f54f ] If ptp was not enabled due to missing IRQ for instance, lan966x_ptp_deinit() will dereference NULL pointers. Fixes: d096459494a8 ("net: lan966x: Add support for ptp clocks") Signed-off-by: Clément Léger Reviewed-by: Horatiu Vultur Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index e5a2bbe064f8..8e368318558a 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -853,6 +853,9 @@ void lan966x_ptp_deinit(struct lan966x *lan966x) struct lan966x_port *port; int i; + if (!lan966x->ptp) + return; + for (i = 0; i < lan966x->num_phys_ports; i++) { port = lan966x->ports[i]; if (!port) From 4fe577ad7b29413f7a64cd9992c51ee1239a27a3 Mon Sep 17 00:00:00 2001 From: Jie Wang Date: Tue, 10 Jan 2023 19:53:59 +0800 Subject: [PATCH 163/177] net: hns3: fix wrong use of rss size during VF rss config [ Upstream commit ae9f29fdfd827ad06c1ae8155c042245a9d00757 ] Currently, it used old rss size to get current tc mode. As a result, the rss size is updated, but the tc mode is still configured based on the old rss size. So this patch fixes it by using the new rss size in both process. Fixes: 93969dc14fcd ("net: hns3: refactor VF rss init APIs with new common rss init APIs") Signed-off-by: Jie Wang Signed-off-by: Hao Lan Reviewed-by: Alexander Duyck Link: https://lore.kernel.org/r/20230110115359.10163-1-lanhao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 081bd2c3f289..e84e5be8e59e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -3130,7 +3130,7 @@ static int hclgevf_set_channels(struct hnae3_handle *handle, u32 new_tqps_num, hclgevf_update_rss_size(handle, new_tqps_num); - hclge_comm_get_rss_tc_info(cur_rss_size, hdev->hw_tc_map, + hclge_comm_get_rss_tc_info(kinfo->rss_size, hdev->hw_tc_map, tc_offset, tc_valid, tc_size); ret = hclge_comm_set_rss_tc_mode(&hdev->hw.hw, tc_offset, tc_valid, tc_size); From de76fc134f74fa1005b7de2bf3841b503ad1d971 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 10 Jan 2023 20:25:47 -0800 Subject: [PATCH 164/177] bnxt: make sure we return pages to the pool [ Upstream commit 97f5e03a4a27d27ee4fed0cdb1658c81cf2784db ] Before the commit under Fixes the page would have been released from the pool before the napi_alloc_skb() call, so normal page freeing was fine (released page == no longer in the pool). After the change we just mark the page for recycling so it's still in the pool if the skb alloc fails, we need to recycle. Same commit added the same bug in the new bnxt_rx_multi_page_skb(). Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff") Reviewed-by: Andy Gospodarek Link: https://lore.kernel.org/r/20230111042547.987749-1-kuba@kernel.org Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f5a8bae8d79a..edca16b5f9e3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -990,7 +990,7 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp, DMA_ATTR_WEAK_ORDERING); skb = build_skb(page_address(page), PAGE_SIZE); if (!skb) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } skb_mark_for_recycle(skb); @@ -1028,7 +1028,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, skb = napi_alloc_skb(&rxr->bnapi->napi, payload); if (!skb) { - __free_page(page); + page_pool_recycle_direct(rxr->page_pool, page); return NULL; } From 50b3cdf8239b11545f311c4f7b89e0092e4feedb Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Tue, 20 Dec 2022 18:56:07 +0100 Subject: [PATCH 165/177] platform/surface: aggregator: Add missing call to ssam_request_sync_free() [ Upstream commit c965daac370f08a9b71d573a71d13cda76f2a884 ] Although rare, ssam_request_sync_init() can fail. In that case, the request should be freed via ssam_request_sync_free(). Currently it is leaked instead. Fix this. Fixes: c167b9c7e3d6 ("platform/surface: Add Surface Aggregator subsystem") Signed-off-by: Maximilian Luz Link: https://lore.kernel.org/r/20221220175608.1436273-1-luzmaximilian@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/surface/aggregator/controller.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 43e765199137..c6537a1b3a2e 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -1700,8 +1700,10 @@ int ssam_request_sync(struct ssam_controller *ctrl, return status; status = ssam_request_sync_init(rqst, spec->flags); - if (status) + if (status) { + ssam_request_sync_free(rqst); return status; + } ssam_request_sync_set_resp(rqst, rsp); From 3944162821295993ec89992dec98ab6be6306cc0 Mon Sep 17 00:00:00 2001 From: Miaoqian Lin Date: Thu, 29 Dec 2022 11:25:33 +0400 Subject: [PATCH 166/177] platform/x86/amd: Fix refcount leak in amd_pmc_probe [ Upstream commit ccb32e2be14271a60e9ba89c6d5660cc9998773c ] pci_get_domain_bus_and_slot() takes reference, the caller should release the reference by calling pci_dev_put() after use. Call pci_dev_put() in the error path to fix this. Fixes: 3d7d407dfb05 ("platform/x86: amd-pmc: Add support for AMD Spill to DRAM STB feature") Signed-off-by: Miaoqian Lin Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20221229072534.1381432-1-linmq006@gmail.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Sasha Levin --- drivers/platform/x86/amd/pmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index 439d282aafd1..8d924986381b 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -932,7 +932,7 @@ static int amd_pmc_probe(struct platform_device *pdev) if (enable_stb && (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB)) { err = amd_pmc_s2d_init(dev); if (err) - return err; + goto err_pci_dev_put; } platform_set_drvdata(pdev, dev); From a474d4ad59cd4642d1b7e3a6c08cef9eca0992c8 Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Fri, 13 Jan 2023 09:53:11 +0100 Subject: [PATCH 167/177] ALSA: usb-audio: Fix possible NULL pointer dereference in snd_usb_pcm_has_fixed_rate() [ Upstream commit 92a9c0ad86d47ff4cce899012e355c400f02cfb8 ] The subs function argument may be NULL, so do not use it before the NULL check. Fixes: 291e9da91403 ("ALSA: usb-audio: Always initialize fixed_rate in snd_usb_find_implicit_fb_sync_format()") Reported-by: coverity-bot Link: https://lore.kernel.org/alsa-devel/202301121424.4A79A485@keescook/ Signed-off-by: Jaroslav Kysela Link: https://lore.kernel.org/r/20230113085311.623325-1-perex@perex.cz Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/usb/pcm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 29838000eee0..2c5765cbed2d 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -160,11 +160,12 @@ find_substream_format(struct snd_usb_substream *subs, bool snd_usb_pcm_has_fixed_rate(struct snd_usb_substream *subs) { const struct audioformat *fp; - struct snd_usb_audio *chip = subs->stream->chip; + struct snd_usb_audio *chip; int rate = -1; if (!subs) return false; + chip = subs->stream->chip; if (!(chip->quirk_flags & QUIRK_FLAG_FIXED_RATE)) return false; list_for_each_entry(fp, &subs->fmt_list, list) { From adc96d30f6503d30dc68670c013716f1d9fcc747 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 19 Dec 2022 10:10:04 +0100 Subject: [PATCH 168/177] efi: fix NULL-deref in init error path [ Upstream commit 703c13fe3c9af557d312f5895ed6a5fda2711104 ] In cases where runtime services are not supported or have been disabled, the runtime services workqueue will never have been allocated. Do not try to destroy the workqueue unconditionally in the unlikely event that EFI initialisation fails to avoid dereferencing a NULL pointer. Fixes: 98086df8b70c ("efi: add missed destroy_workqueue when efisubsys_init fails") Cc: stable@vger.kernel.org Cc: Li Heng Signed-off-by: Johan Hovold Signed-off-by: Ard Biesheuvel Signed-off-by: Sasha Levin --- drivers/firmware/efi/efi.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index f12cc29bd4b8..033aac6be7da 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -374,8 +374,8 @@ static int __init efisubsys_init(void) efi_kobj = kobject_create_and_add("efi", firmware_kobj); if (!efi_kobj) { pr_err("efi: Firmware registration failed.\n"); - destroy_workqueue(efi_rts_wq); - return -ENOMEM; + error = -ENOMEM; + goto err_destroy_wq; } if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE | @@ -423,7 +423,10 @@ err_unregister: generic_ops_unregister(); err_put: kobject_put(efi_kobj); - destroy_workqueue(efi_rts_wq); +err_destroy_wq: + if (efi_rts_wq) + destroy_workqueue(efi_rts_wq); + return error; } From 7fc3990dad04a677606337ebc61964094d6cb41b Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 14 Jan 2023 08:41:33 -0700 Subject: [PATCH 169/177] io_uring: lock overflowing for IOPOLL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 544d163d659d45a206d8929370d5a2984e546cb7 upstream. syzbot reports an issue with overflow filling for IOPOLL: WARNING: CPU: 0 PID: 28 at io_uring/io_uring.c:734 io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734 CPU: 0 PID: 28 Comm: kworker/u4:1 Not tainted 6.2.0-rc3-syzkaller-16369-g358a161a6a9e #0 Workqueue: events_unbound io_ring_exit_work Call trace:  io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734  io_req_cqe_overflow+0x5c/0x70 io_uring/io_uring.c:773  io_fill_cqe_req io_uring/io_uring.h:168 [inline]  io_do_iopoll+0x474/0x62c io_uring/rw.c:1065  io_iopoll_try_reap_events+0x6c/0x108 io_uring/io_uring.c:1513  io_uring_try_cancel_requests+0x13c/0x258 io_uring/io_uring.c:3056  io_ring_exit_work+0xec/0x390 io_uring/io_uring.c:2869  process_one_work+0x2d8/0x504 kernel/workqueue.c:2289  worker_thread+0x340/0x610 kernel/workqueue.c:2436  kthread+0x12c/0x158 kernel/kthread.c:376  ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:863 There is no real problem for normal IOPOLL as flush is also called with uring_lock taken, but it's getting more complicated for IOPOLL|SQPOLL, for which __io_cqring_overflow_flush() happens from the CQ waiting path. Reported-and-tested-by: syzbot+6805087452d72929404e@syzkaller.appspotmail.com Cc: stable@vger.kernel.org # 5.10+ Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/rw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index bb47cc4da713..6223472095d2 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -1055,7 +1055,11 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) continue; req->cqe.flags = io_put_kbuf(req, 0); - __io_fill_cqe_req(req->ctx, req); + if (unlikely(!__io_fill_cqe_req(ctx, req))) { + spin_lock(&ctx->completion_lock); + io_req_cqe_overflow(req); + spin_unlock(&ctx->completion_lock); + } } if (unlikely(!nr_events)) From c06015ebc4367be38904b88582e13cc079672075 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 14 Jan 2023 08:46:14 -0700 Subject: [PATCH 170/177] io_uring/poll: attempt request issue after racy poll wakeup commit 6e5aedb9324aab1c14a23fae3d8eeb64a679c20e upstream. If we have multiple requests waiting on the same target poll waitqueue, then it's quite possible to get a request triggered and get disappointed in not being able to make any progress with it. If we race in doing so, we'll potentially leave the poll request on the internal tables, but removed from the waitqueue. That means that any subsequent trigger of the poll waitqueue will not kick that request into action, causing an application to potentially wait for completion of a request that will never happen. Fix this by adding a new poll return state, IOU_POLL_REISSUE. Rather than have complicated logic for how to re-arm a given type of request, just punt it for a reissue. While in there, move the 'ret' variable to the only section where it gets used. This avoids confusion the scope of it. Cc: stable@vger.kernel.org Fixes: eb0089d629ba ("io_uring: single shot poll removal optimisation") Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- io_uring/poll.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/io_uring/poll.c b/io_uring/poll.c index df42fd8a6ab0..f2f9f174fc62 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -223,22 +223,23 @@ enum { IOU_POLL_DONE = 0, IOU_POLL_NO_ACTION = 1, IOU_POLL_REMOVE_POLL_USE_RES = 2, + IOU_POLL_REISSUE = 3, }; /* * All poll tw should go through this. Checks for poll events, manages * references, does rewait, etc. * - * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action require, - * which is either spurious wakeup or multishot CQE is served. - * IOU_POLL_DONE when it's done with the request, then the mask is stored in req->cqe.res. - * IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot poll and that the result - * is stored in req->cqe. + * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action + * require, which is either spurious wakeup or multishot CQE is served. + * IOU_POLL_DONE when it's done with the request, then the mask is stored in + * req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot + * poll and that the result is stored in req->cqe. */ static int io_poll_check_events(struct io_kiocb *req, bool *locked) { struct io_ring_ctx *ctx = req->ctx; - int v, ret; + int v; /* req->task == current here, checking PF_EXITING is safe */ if (unlikely(req->task->flags & PF_EXITING)) @@ -274,10 +275,15 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) if (!req->cqe.res) { struct poll_table_struct pt = { ._key = req->apoll_events }; req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; + /* + * We got woken with a mask, but someone else got to + * it first. The above vfs_poll() doesn't add us back + * to the waitqueue, so if we get nothing back, we + * should be safe and attempt a reissue. + */ + if (unlikely(!req->cqe.res)) + return IOU_POLL_REISSUE; } - - if ((unlikely(!req->cqe.res))) - continue; if (req->apoll_events & EPOLLONESHOT) return IOU_POLL_DONE; if (io_is_uring_fops(req->file)) @@ -294,7 +300,7 @@ static int io_poll_check_events(struct io_kiocb *req, bool *locked) return IOU_POLL_REMOVE_POLL_USE_RES; } } else { - ret = io_poll_issue(req, locked); + int ret = io_poll_issue(req, locked); if (ret == IOU_STOP_MULTISHOT) return IOU_POLL_REMOVE_POLL_USE_RES; if (ret < 0) @@ -325,6 +331,11 @@ static void io_poll_task_func(struct io_kiocb *req, bool *locked) if (ret == IOU_POLL_DONE) { struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); req->cqe.res = mangle_poll(req->cqe.res & poll->events); + } else if (ret == IOU_POLL_REISSUE) { + io_poll_remove_entries(req); + io_poll_tw_hash_eject(req, locked); + io_req_task_submit(req, locked); + return; } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { req->cqe.res = ret; req_set_fail(req); @@ -350,7 +361,7 @@ static void io_apoll_task_func(struct io_kiocb *req, bool *locked) if (ret == IOU_POLL_REMOVE_POLL_USE_RES) io_req_complete_post(req); - else if (ret == IOU_POLL_DONE) + else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE) io_req_task_submit(req, locked); else io_req_complete_failed(req, ret); From 3925336af00f79fb7f3a390b6ab30d455d32d863 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 25 Oct 2022 21:50:15 +0200 Subject: [PATCH 171/177] drm/i915: Fix CFI violations in gt_sysfs commit a8a4f0467d706fc22d286dfa973946e5944b793c upstream. When booting with CONFIG_CFI_CLANG, there are numerous violations when accessing the files under /sys/devices/pci0000:00/0000:00:02.0/drm/card0/gt/gt0: $ cd /sys/devices/pci0000:00/0000:00:02.0/drm/card0/gt/gt0 $ grep . * id:0 punit_req_freq_mhz:350 rc6_enable:1 rc6_residency_ms:214934 rps_act_freq_mhz:1300 rps_boost_freq_mhz:1300 rps_cur_freq_mhz:350 rps_max_freq_mhz:1300 rps_min_freq_mhz:350 rps_RP0_freq_mhz:1300 rps_RP1_freq_mhz:350 rps_RPn_freq_mhz:350 throttle_reason_pl1:0 throttle_reason_pl2:0 throttle_reason_pl4:0 throttle_reason_prochot:0 throttle_reason_ratl:0 throttle_reason_status:0 throttle_reason_thermal:0 throttle_reason_vr_tdc:0 throttle_reason_vr_thermalert:0 $ sudo dmesg &| grep "CFI failure at" [ 214.595903] CFI failure at kobj_attr_show+0x19/0x30 (target: id_show+0x0/0x70 [i915]; expected type: 0xc527b809) [ 214.596064] CFI failure at kobj_attr_show+0x19/0x30 (target: punit_req_freq_mhz_show+0x0/0x40 [i915]; expected type: 0xc527b809) [ 214.596407] CFI failure at kobj_attr_show+0x19/0x30 (target: rc6_enable_show+0x0/0x40 [i915]; expected type: 0xc527b809) [ 214.596528] CFI failure at kobj_attr_show+0x19/0x30 (target: rc6_residency_ms_show+0x0/0x270 [i915]; expected type: 0xc527b809) [ 214.596682] CFI failure at kobj_attr_show+0x19/0x30 (target: act_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.596792] CFI failure at kobj_attr_show+0x19/0x30 (target: boost_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.596893] CFI failure at kobj_attr_show+0x19/0x30 (target: cur_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.596996] CFI failure at kobj_attr_show+0x19/0x30 (target: max_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.597099] CFI failure at kobj_attr_show+0x19/0x30 (target: min_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.597198] CFI failure at kobj_attr_show+0x19/0x30 (target: RP0_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.597301] CFI failure at kobj_attr_show+0x19/0x30 (target: RP1_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.597405] CFI failure at kobj_attr_show+0x19/0x30 (target: RPn_freq_mhz_show+0x0/0xe0 [i915]; expected type: 0xc527b809) [ 214.597538] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.597701] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.597836] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.597952] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.598071] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.598177] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.598307] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.598439] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) [ 214.598542] CFI failure at kobj_attr_show+0x19/0x30 (target: throttle_reason_bool_show+0x0/0x50 [i915]; expected type: 0xc527b809) With kCFI, indirect calls are validated against their expected type versus actual type and failures occur when the two types do not match. The ultimate issue is that these sysfs functions are expecting to be called via dev_attr_show() but they may also be called via kobj_attr_show(), as certain files are created under two different kobjects that have two different sysfs_ops in intel_gt_sysfs_register(), hence the warnings above. When accessing the gt_ files under /sys/devices/pci0000:00/0000:00:02.0/drm/card0, which are using the same sysfs functions, there are no violations, meaning the functions are being called with the proper type. To make everything work properly, adjust certain functions to match the type of the ->show() and ->store() members in 'struct kobj_attribute'. Add a macro to generate functions for that can be called via both dev_attr_{show,store}() or kobj_attr_{show,store}() so that they can be called through both kobject locations without violating kCFI and adjust the attribute groups to account for this. Link: https://github.com/ClangBuiltLinux/linux/issues/1716 Reviewed-by: Andi Shyti Reviewed-by: Andrzej Hajda Reviewed-by: Kees Cook Signed-off-by: Nathan Chancellor Signed-off-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20221013205909.1282545-1-nathan@kernel.org Signed-off-by: Nathan Chancellor Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/intel_gt_sysfs.c | 15 +- drivers/gpu/drm/i915/gt/intel_gt_sysfs.h | 2 +- drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c | 461 +++++++++----------- 3 files changed, 220 insertions(+), 258 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c index d651ccd0ab20..9486dd3bed99 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.c @@ -22,11 +22,9 @@ bool is_object_gt(struct kobject *kobj) return !strncmp(kobj->name, "gt", 2); } -struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, +struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj, const char *name) { - struct kobject *kobj = &dev->kobj; - /* * We are interested at knowing from where the interface * has been called, whether it's called from gt/ or from @@ -38,6 +36,7 @@ struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, * "struct drm_i915_private *" type. */ if (!is_object_gt(kobj)) { + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); return to_gt(i915); @@ -51,18 +50,18 @@ static struct kobject *gt_get_parent_obj(struct intel_gt *gt) return >->i915->drm.primary->kdev->kobj; } -static ssize_t id_show(struct device *dev, - struct device_attribute *attr, +static ssize_t id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); return sysfs_emit(buf, "%u\n", gt->info.id); } -static DEVICE_ATTR_RO(id); +static struct kobj_attribute attr_id = __ATTR_RO(id); static struct attribute *id_attrs[] = { - &dev_attr_id.attr, + &attr_id.attr, NULL, }; ATTRIBUTE_GROUPS(id); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h index 6232923a420d..c3a123faee98 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs.h @@ -30,7 +30,7 @@ static inline struct intel_gt *kobj_to_gt(struct kobject *kobj) void intel_gt_sysfs_register(struct intel_gt *gt); void intel_gt_sysfs_unregister(struct intel_gt *gt); -struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev, +struct intel_gt *intel_gt_sysfs_get_drvdata(struct kobject *kobj, const char *name); #endif /* SYSFS_GT_H */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c index 180dd6f3ef57..b108f0a8a044 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c @@ -24,14 +24,15 @@ enum intel_gt_sysfs_op { }; static int -sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, +sysfs_gt_attribute_w_func(struct kobject *kobj, struct attribute *attr, int (func)(struct intel_gt *gt, u32 val), u32 val) { struct intel_gt *gt; int ret; - if (!is_object_gt(&dev->kobj)) { + if (!is_object_gt(kobj)) { int i; + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); for_each_gt(gt, i915, i) { @@ -40,7 +41,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, break; } } else { - gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + gt = intel_gt_sysfs_get_drvdata(kobj, attr->name); ret = func(gt, val); } @@ -48,7 +49,7 @@ sysfs_gt_attribute_w_func(struct device *dev, struct device_attribute *attr, } static u32 -sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, +sysfs_gt_attribute_r_func(struct kobject *kobj, struct attribute *attr, u32 (func)(struct intel_gt *gt), enum intel_gt_sysfs_op op) { @@ -57,8 +58,9 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, ret = (op == INTEL_GT_SYSFS_MAX) ? 0 : (u32) -1; - if (!is_object_gt(&dev->kobj)) { + if (!is_object_gt(kobj)) { int i; + struct device *dev = kobj_to_dev(kobj); struct drm_i915_private *i915 = kdev_minor_to_i915(dev); for_each_gt(gt, i915, i) { @@ -77,7 +79,7 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, } } } else { - gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + gt = intel_gt_sysfs_get_drvdata(kobj, attr->name); ret = func(gt); } @@ -92,6 +94,76 @@ sysfs_gt_attribute_r_func(struct device *dev, struct device_attribute *attr, #define sysfs_gt_attribute_r_max_func(d, a, f) \ sysfs_gt_attribute_r_func(d, a, f, INTEL_GT_SYSFS_MAX) +#define INTEL_GT_SYSFS_SHOW(_name, _attr_type) \ + static ssize_t _name##_show_common(struct kobject *kobj, \ + struct attribute *attr, char *buff) \ + { \ + u32 val = sysfs_gt_attribute_r_##_attr_type##_func(kobj, attr, \ + __##_name##_show); \ + \ + return sysfs_emit(buff, "%u\n", val); \ + } \ + static ssize_t _name##_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *buff) \ + { \ + return _name ##_show_common(kobj, &attr->attr, buff); \ + } \ + static ssize_t _name##_dev_show(struct device *dev, \ + struct device_attribute *attr, char *buff) \ + { \ + return _name##_show_common(&dev->kobj, &attr->attr, buff); \ + } + +#define INTEL_GT_SYSFS_STORE(_name, _func) \ + static ssize_t _name##_store_common(struct kobject *kobj, \ + struct attribute *attr, \ + const char *buff, size_t count) \ + { \ + int ret; \ + u32 val; \ + \ + ret = kstrtou32(buff, 0, &val); \ + if (ret) \ + return ret; \ + \ + ret = sysfs_gt_attribute_w_func(kobj, attr, _func, val); \ + \ + return ret ?: count; \ + } \ + static ssize_t _name##_store(struct kobject *kobj, \ + struct kobj_attribute *attr, const char *buff, \ + size_t count) \ + { \ + return _name##_store_common(kobj, &attr->attr, buff, count); \ + } \ + static ssize_t _name##_dev_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buff, size_t count) \ + { \ + return _name##_store_common(&dev->kobj, &attr->attr, buff, count); \ + } + +#define INTEL_GT_SYSFS_SHOW_MAX(_name) INTEL_GT_SYSFS_SHOW(_name, max) +#define INTEL_GT_SYSFS_SHOW_MIN(_name) INTEL_GT_SYSFS_SHOW(_name, min) + +#define INTEL_GT_ATTR_RW(_name) \ + static struct kobj_attribute attr_##_name = __ATTR_RW(_name) + +#define INTEL_GT_ATTR_RO(_name) \ + static struct kobj_attribute attr_##_name = __ATTR_RO(_name) + +#define INTEL_GT_DUAL_ATTR_RW(_name) \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, 0644, \ + _name##_dev_show, \ + _name##_dev_store); \ + INTEL_GT_ATTR_RW(_name) + +#define INTEL_GT_DUAL_ATTR_RO(_name) \ + static struct device_attribute dev_attr_##_name = __ATTR(_name, 0444, \ + _name##_dev_show, \ + NULL); \ + INTEL_GT_ATTR_RO(_name) + #ifdef CONFIG_PM static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) { @@ -104,11 +176,8 @@ static u32 get_residency(struct intel_gt *gt, i915_reg_t reg) return DIV_ROUND_CLOSEST_ULL(res, 1000); } -static ssize_t rc6_enable_show(struct device *dev, - struct device_attribute *attr, - char *buff) +static u8 get_rc6_mask(struct intel_gt *gt) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); u8 mask = 0; if (HAS_RC6(gt->i915)) @@ -118,7 +187,25 @@ static ssize_t rc6_enable_show(struct device *dev, if (HAS_RC6pp(gt->i915)) mask |= BIT(2); - return sysfs_emit(buff, "%x\n", mask); + return mask; +} + +static ssize_t rc6_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); + + return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); +} + +static ssize_t rc6_enable_dev_show(struct device *dev, + struct device_attribute *attr, + char *buff) +{ + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(&dev->kobj, attr->attr.name); + + return sysfs_emit(buff, "%x\n", get_rc6_mask(gt)); } static u32 __rc6_residency_ms_show(struct intel_gt *gt) @@ -126,97 +213,79 @@ static u32 __rc6_residency_ms_show(struct intel_gt *gt) return get_residency(gt, GEN6_GT_GFX_RC6); } -static ssize_t rc6_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6_residency_ms_show); - - return sysfs_emit(buff, "%u\n", rc6_residency); -} - static u32 __rc6p_residency_ms_show(struct intel_gt *gt) { return get_residency(gt, GEN6_GT_GFX_RC6p); } -static ssize_t rc6p_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6p_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6p_residency_ms_show); - - return sysfs_emit(buff, "%u\n", rc6p_residency); -} - static u32 __rc6pp_residency_ms_show(struct intel_gt *gt) { return get_residency(gt, GEN6_GT_GFX_RC6pp); } -static ssize_t rc6pp_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6pp_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __rc6pp_residency_ms_show); - - return sysfs_emit(buff, "%u\n", rc6pp_residency); -} - static u32 __media_rc6_residency_ms_show(struct intel_gt *gt) { return get_residency(gt, VLV_GT_MEDIA_RC6); } -static ssize_t media_rc6_residency_ms_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 rc6_residency = sysfs_gt_attribute_r_min_func(dev, attr, - __media_rc6_residency_ms_show); +INTEL_GT_SYSFS_SHOW_MIN(rc6_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(rc6p_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(rc6pp_residency_ms); +INTEL_GT_SYSFS_SHOW_MIN(media_rc6_residency_ms); - return sysfs_emit(buff, "%u\n", rc6_residency); -} - -static DEVICE_ATTR_RO(rc6_enable); -static DEVICE_ATTR_RO(rc6_residency_ms); -static DEVICE_ATTR_RO(rc6p_residency_ms); -static DEVICE_ATTR_RO(rc6pp_residency_ms); -static DEVICE_ATTR_RO(media_rc6_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6_enable); +INTEL_GT_DUAL_ATTR_RO(rc6_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6p_residency_ms); +INTEL_GT_DUAL_ATTR_RO(rc6pp_residency_ms); +INTEL_GT_DUAL_ATTR_RO(media_rc6_residency_ms); static struct attribute *rc6_attrs[] = { + &attr_rc6_enable.attr, + &attr_rc6_residency_ms.attr, + NULL +}; + +static struct attribute *rc6p_attrs[] = { + &attr_rc6p_residency_ms.attr, + &attr_rc6pp_residency_ms.attr, + NULL +}; + +static struct attribute *media_rc6_attrs[] = { + &attr_media_rc6_residency_ms.attr, + NULL +}; + +static struct attribute *rc6_dev_attrs[] = { &dev_attr_rc6_enable.attr, &dev_attr_rc6_residency_ms.attr, NULL }; -static struct attribute *rc6p_attrs[] = { +static struct attribute *rc6p_dev_attrs[] = { &dev_attr_rc6p_residency_ms.attr, &dev_attr_rc6pp_residency_ms.attr, NULL }; -static struct attribute *media_rc6_attrs[] = { +static struct attribute *media_rc6_dev_attrs[] = { &dev_attr_media_rc6_residency_ms.attr, NULL }; static const struct attribute_group rc6_attr_group[] = { { .attrs = rc6_attrs, }, - { .name = power_group_name, .attrs = rc6_attrs, }, + { .name = power_group_name, .attrs = rc6_dev_attrs, }, }; static const struct attribute_group rc6p_attr_group[] = { { .attrs = rc6p_attrs, }, - { .name = power_group_name, .attrs = rc6p_attrs, }, + { .name = power_group_name, .attrs = rc6p_dev_attrs, }, }; static const struct attribute_group media_rc6_attr_group[] = { { .attrs = media_rc6_attrs, }, - { .name = power_group_name, .attrs = media_rc6_attrs, }, + { .name = power_group_name, .attrs = media_rc6_dev_attrs, }, }; static int __intel_gt_sysfs_create_group(struct kobject *kobj, @@ -271,104 +340,34 @@ static u32 __act_freq_mhz_show(struct intel_gt *gt) return intel_rps_read_actual_frequency(>->rps); } -static ssize_t act_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 actual_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __act_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", actual_freq); -} - static u32 __cur_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_requested_frequency(>->rps); } -static ssize_t cur_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 cur_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __cur_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", cur_freq); -} - static u32 __boost_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_boost_frequency(>->rps); } -static ssize_t boost_freq_mhz_show(struct device *dev, - struct device_attribute *attr, - char *buff) -{ - u32 boost_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __boost_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", boost_freq); -} - static int __boost_freq_mhz_store(struct intel_gt *gt, u32 val) { return intel_rps_set_boost_frequency(>->rps, val); } -static ssize_t boost_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - ssize_t ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - return sysfs_gt_attribute_w_func(dev, attr, - __boost_freq_mhz_store, val) ?: count; -} - -static u32 __rp0_freq_mhz_show(struct intel_gt *gt) +static u32 __RP0_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_rp0_frequency(>->rps); } -static ssize_t RP0_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rp0_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rp0_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rp0_freq); -} - -static u32 __rp1_freq_mhz_show(struct intel_gt *gt) -{ - return intel_rps_get_rp1_frequency(>->rps); -} - -static ssize_t RP1_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rp1_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rp1_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rp1_freq); -} - -static u32 __rpn_freq_mhz_show(struct intel_gt *gt) +static u32 __RPn_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_rpn_frequency(>->rps); } -static ssize_t RPn_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) +static u32 __RP1_freq_mhz_show(struct intel_gt *gt) { - u32 rpn_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __rpn_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", rpn_freq); + return intel_rps_get_rp1_frequency(>->rps); } static u32 __max_freq_mhz_show(struct intel_gt *gt) @@ -376,71 +375,21 @@ static u32 __max_freq_mhz_show(struct intel_gt *gt) return intel_rps_get_max_frequency(>->rps); } -static ssize_t max_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 max_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __max_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", max_freq); -} - static int __set_max_freq(struct intel_gt *gt, u32 val) { return intel_rps_set_max_frequency(>->rps, val); } -static ssize_t max_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - int ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - ret = sysfs_gt_attribute_w_func(dev, attr, __set_max_freq, val); - - return ret ?: count; -} - static u32 __min_freq_mhz_show(struct intel_gt *gt) { return intel_rps_get_min_frequency(>->rps); } -static ssize_t min_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 min_freq = sysfs_gt_attribute_r_min_func(dev, attr, - __min_freq_mhz_show); - - return sysfs_emit(buff, "%u\n", min_freq); -} - static int __set_min_freq(struct intel_gt *gt, u32 val) { return intel_rps_set_min_frequency(>->rps, val); } -static ssize_t min_freq_mhz_store(struct device *dev, - struct device_attribute *attr, - const char *buff, size_t count) -{ - int ret; - u32 val; - - ret = kstrtou32(buff, 0, &val); - if (ret) - return ret; - - ret = sysfs_gt_attribute_w_func(dev, attr, __set_min_freq, val); - - return ret ?: count; -} - static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt) { struct intel_rps *rps = >->rps; @@ -448,23 +397,31 @@ static u32 __vlv_rpe_freq_mhz_show(struct intel_gt *gt) return intel_gpu_freq(rps, rps->efficient_freq); } -static ssize_t vlv_rpe_freq_mhz_show(struct device *dev, - struct device_attribute *attr, char *buff) -{ - u32 rpe_freq = sysfs_gt_attribute_r_max_func(dev, attr, - __vlv_rpe_freq_mhz_show); +INTEL_GT_SYSFS_SHOW_MAX(act_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(boost_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(cur_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RP0_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RP1_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(RPn_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(max_freq_mhz); +INTEL_GT_SYSFS_SHOW_MIN(min_freq_mhz); +INTEL_GT_SYSFS_SHOW_MAX(vlv_rpe_freq_mhz); +INTEL_GT_SYSFS_STORE(boost_freq_mhz, __boost_freq_mhz_store); +INTEL_GT_SYSFS_STORE(max_freq_mhz, __set_max_freq); +INTEL_GT_SYSFS_STORE(min_freq_mhz, __set_min_freq); - return sysfs_emit(buff, "%u\n", rpe_freq); -} +#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store, _show_dev, _store_dev) \ + static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, \ + _show_dev, _store_dev); \ + static struct kobj_attribute attr_rps_##_name = __ATTR(rps_##_name, _mode, \ + _show, _store) -#define INTEL_GT_RPS_SYSFS_ATTR(_name, _mode, _show, _store) \ - static struct device_attribute dev_attr_gt_##_name = __ATTR(gt_##_name, _mode, _show, _store); \ - static struct device_attribute dev_attr_rps_##_name = __ATTR(rps_##_name, _mode, _show, _store) - -#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \ - INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL) -#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \ - INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store) +#define INTEL_GT_RPS_SYSFS_ATTR_RO(_name) \ + INTEL_GT_RPS_SYSFS_ATTR(_name, 0444, _name##_show, NULL, \ + _name##_dev_show, NULL) +#define INTEL_GT_RPS_SYSFS_ATTR_RW(_name) \ + INTEL_GT_RPS_SYSFS_ATTR(_name, 0644, _name##_show, _name##_store, \ + _name##_dev_show, _name##_dev_store) /* The below macros generate static structures */ INTEL_GT_RPS_SYSFS_ATTR_RO(act_freq_mhz); @@ -475,32 +432,31 @@ INTEL_GT_RPS_SYSFS_ATTR_RO(RP1_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RO(RPn_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RW(max_freq_mhz); INTEL_GT_RPS_SYSFS_ATTR_RW(min_freq_mhz); +INTEL_GT_RPS_SYSFS_ATTR_RO(vlv_rpe_freq_mhz); -static DEVICE_ATTR_RO(vlv_rpe_freq_mhz); - -#define GEN6_ATTR(s) { \ - &dev_attr_##s##_act_freq_mhz.attr, \ - &dev_attr_##s##_cur_freq_mhz.attr, \ - &dev_attr_##s##_boost_freq_mhz.attr, \ - &dev_attr_##s##_max_freq_mhz.attr, \ - &dev_attr_##s##_min_freq_mhz.attr, \ - &dev_attr_##s##_RP0_freq_mhz.attr, \ - &dev_attr_##s##_RP1_freq_mhz.attr, \ - &dev_attr_##s##_RPn_freq_mhz.attr, \ +#define GEN6_ATTR(p, s) { \ + &p##attr_##s##_act_freq_mhz.attr, \ + &p##attr_##s##_cur_freq_mhz.attr, \ + &p##attr_##s##_boost_freq_mhz.attr, \ + &p##attr_##s##_max_freq_mhz.attr, \ + &p##attr_##s##_min_freq_mhz.attr, \ + &p##attr_##s##_RP0_freq_mhz.attr, \ + &p##attr_##s##_RP1_freq_mhz.attr, \ + &p##attr_##s##_RPn_freq_mhz.attr, \ NULL, \ } -#define GEN6_RPS_ATTR GEN6_ATTR(rps) -#define GEN6_GT_ATTR GEN6_ATTR(gt) +#define GEN6_RPS_ATTR GEN6_ATTR(, rps) +#define GEN6_GT_ATTR GEN6_ATTR(dev_, gt) static const struct attribute * const gen6_rps_attrs[] = GEN6_RPS_ATTR; static const struct attribute * const gen6_gt_attrs[] = GEN6_GT_ATTR; -static ssize_t punit_req_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t punit_req_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 preq = intel_rps_read_punit_req_frequency(>->rps); return sysfs_emit(buff, "%u\n", preq); @@ -508,17 +464,17 @@ static ssize_t punit_req_freq_mhz_show(struct device *dev, struct intel_gt_bool_throttle_attr { struct attribute attr; - ssize_t (*show)(struct device *dev, struct device_attribute *attr, + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, char *buf); i915_reg_t reg32; u32 mask; }; -static ssize_t throttle_reason_bool_show(struct device *dev, - struct device_attribute *attr, +static ssize_t throttle_reason_bool_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_gt_bool_throttle_attr *t_attr = (struct intel_gt_bool_throttle_attr *) attr; bool val = rps_read_mask_mmio(>->rps, t_attr->reg32, t_attr->mask); @@ -534,7 +490,7 @@ struct intel_gt_bool_throttle_attr attr_##sysfs_func__ = { \ .mask = mask__, \ } -static DEVICE_ATTR_RO(punit_req_freq_mhz); +INTEL_GT_ATTR_RO(punit_req_freq_mhz); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_status, GT0_PERF_LIMIT_REASONS_MASK); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl1, POWER_LIMIT_1_MASK); static INTEL_GT_RPS_BOOL_ATTR_RO(throttle_reason_pl2, POWER_LIMIT_2_MASK); @@ -597,8 +553,8 @@ static const struct attribute *throttle_reason_attrs[] = { #define U8_8_VAL_MASK 0xffff #define U8_8_SCALE_TO_VALUE "0.00390625" -static ssize_t freq_factor_scale_show(struct device *dev, - struct device_attribute *attr, +static ssize_t freq_factor_scale_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { return sysfs_emit(buff, "%s\n", U8_8_SCALE_TO_VALUE); @@ -610,11 +566,11 @@ static u32 media_ratio_mode_to_factor(u32 mode) return !mode ? mode : 256 / mode; } -static ssize_t media_freq_factor_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_freq_factor_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_guc_slpc *slpc = >->uc.guc.slpc; intel_wakeref_t wakeref; u32 mode; @@ -641,11 +597,11 @@ static ssize_t media_freq_factor_show(struct device *dev, return sysfs_emit(buff, "%u\n", media_ratio_mode_to_factor(mode)); } -static ssize_t media_freq_factor_store(struct device *dev, - struct device_attribute *attr, +static ssize_t media_freq_factor_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buff, size_t count) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); struct intel_guc_slpc *slpc = >->uc.guc.slpc; u32 factor, mode; int err; @@ -670,11 +626,11 @@ static ssize_t media_freq_factor_store(struct device *dev, return err ?: count; } -static ssize_t media_RP0_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_RP0_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 val; int err; @@ -691,11 +647,11 @@ static ssize_t media_RP0_freq_mhz_show(struct device *dev, return sysfs_emit(buff, "%u\n", val); } -static ssize_t media_RPn_freq_mhz_show(struct device *dev, - struct device_attribute *attr, +static ssize_t media_RPn_freq_mhz_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) { - struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name); + struct intel_gt *gt = intel_gt_sysfs_get_drvdata(kobj, attr->attr.name); u32 val; int err; @@ -712,17 +668,17 @@ static ssize_t media_RPn_freq_mhz_show(struct device *dev, return sysfs_emit(buff, "%u\n", val); } -static DEVICE_ATTR_RW(media_freq_factor); -static struct device_attribute dev_attr_media_freq_factor_scale = +INTEL_GT_ATTR_RW(media_freq_factor); +static struct kobj_attribute attr_media_freq_factor_scale = __ATTR(media_freq_factor.scale, 0444, freq_factor_scale_show, NULL); -static DEVICE_ATTR_RO(media_RP0_freq_mhz); -static DEVICE_ATTR_RO(media_RPn_freq_mhz); +INTEL_GT_ATTR_RO(media_RP0_freq_mhz); +INTEL_GT_ATTR_RO(media_RPn_freq_mhz); static const struct attribute *media_perf_power_attrs[] = { - &dev_attr_media_freq_factor.attr, - &dev_attr_media_freq_factor_scale.attr, - &dev_attr_media_RP0_freq_mhz.attr, - &dev_attr_media_RPn_freq_mhz.attr, + &attr_media_freq_factor.attr, + &attr_media_freq_factor_scale.attr, + &attr_media_RP0_freq_mhz.attr, + &attr_media_RPn_freq_mhz.attr, NULL }; @@ -754,20 +710,29 @@ static const struct attribute * const rps_defaults_attrs[] = { NULL }; -static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj, - const struct attribute * const *attrs) +static int intel_sysfs_rps_init(struct intel_gt *gt, struct kobject *kobj) { + const struct attribute * const *attrs; + struct attribute *vlv_attr; int ret; if (GRAPHICS_VER(gt->i915) < 6) return 0; + if (is_object_gt(kobj)) { + attrs = gen6_rps_attrs; + vlv_attr = &attr_rps_vlv_rpe_freq_mhz.attr; + } else { + attrs = gen6_gt_attrs; + vlv_attr = &dev_attr_gt_vlv_rpe_freq_mhz.attr; + } + ret = sysfs_create_files(kobj, attrs); if (ret) return ret; if (IS_VALLEYVIEW(gt->i915) || IS_CHERRYVIEW(gt->i915)) - ret = sysfs_create_file(kobj, &dev_attr_vlv_rpe_freq_mhz.attr); + ret = sysfs_create_file(kobj, vlv_attr); return ret; } @@ -778,9 +743,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) intel_sysfs_rc6_init(gt, kobj); - ret = is_object_gt(kobj) ? - intel_sysfs_rps_init(gt, kobj, gen6_rps_attrs) : - intel_sysfs_rps_init(gt, kobj, gen6_gt_attrs); + ret = intel_sysfs_rps_init(gt, kobj); if (ret) drm_warn(>->i915->drm, "failed to create gt%u RPS sysfs files (%pe)", @@ -790,7 +753,7 @@ void intel_gt_sysfs_pm_init(struct intel_gt *gt, struct kobject *kobj) if (!is_object_gt(kobj)) return; - ret = sysfs_create_file(kobj, &dev_attr_punit_req_freq_mhz.attr); + ret = sysfs_create_file(kobj, &attr_punit_req_freq_mhz.attr); if (ret) drm_warn(>->i915->drm, "failed to create gt%u punit_req_freq_mhz sysfs (%pe)", From 0a14c3ded80c7dd399e68f361ee3338b5271c98e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 2 Jan 2023 16:49:46 -0700 Subject: [PATCH 172/177] io_uring/io-wq: free worker if task_work creation is canceled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit af82425c6a2d2f347c79b63ce74fca6dc6be157f upstream. If we cancel the task_work, the worker will never come into existance. As this is the last reference to it, ensure that we get it freed appropriately. Cc: stable@vger.kernel.org Reported-by: 진호 Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io-wq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 6f1d0e5df23a..992dcd9f8c4c 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1230,6 +1230,7 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); + kfree(worker); } } From e62e6258ab140b55bbef1f0d288263b75e2f0c20 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 8 Jan 2023 10:39:17 -0700 Subject: [PATCH 173/177] io_uring/io-wq: only free worker if it was allocated for creation commit e6db6f9398dadcbc06318a133d4c44a2d3844e61 upstream. We have two types of task_work based creation, one is using an existing worker to setup a new one (eg when going to sleep and we have no free workers), and the other is allocating a new worker. Only the latter should be freed when we cancel task_work creation for a new worker. Fixes: af82425c6a2d ("io_uring/io-wq: free worker if task_work creation is canceled") Reported-by: syzbot+d56ec896af3637bdb7e4@syzkaller.appspotmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- io_uring/io-wq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 992dcd9f8c4c..411bb2d1acd4 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -1230,7 +1230,12 @@ static void io_wq_cancel_tw_create(struct io_wq *wq) worker = container_of(cb, struct io_worker, create_work); io_worker_cancel_cb(worker); - kfree(worker); + /* + * Only the worker continuation helper has worker allocated and + * hence needs freeing. + */ + if (cb->func == create_worker_cont) + kfree(worker); } } From 73a630b359c852d6edc113588c513ddbcd3be47a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 08:51:19 -0700 Subject: [PATCH 174/177] block: handle bio_split_to_limits() NULL return commit 613b14884b8595e20b9fac4126bf627313827fbe upstream. This can't happen right now, but in preparation for allowing bio_split_to_limits() returning NULL if it ended the bio, check for it in all the callers. Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-merge.c | 4 +++- block/blk-mq.c | 5 ++++- drivers/block/drbd/drbd_req.c | 2 ++ drivers/block/ps3vram.c | 2 ++ drivers/md/dm.c | 2 ++ drivers/md/md.c | 2 ++ drivers/nvme/host/multipath.c | 2 ++ drivers/s390/block/dcssblk.c | 2 ++ 8 files changed, 19 insertions(+), 2 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index f46c87ef951d..84f03d066cb3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -358,11 +358,13 @@ struct bio *__bio_split_to_limits(struct bio *bio, struct queue_limits *lim, default: split = bio_split_rw(bio, lim, nr_segs, bs, get_max_io_size(bio, lim) << SECTOR_SHIFT); + if (IS_ERR(split)) + return NULL; break; } if (split) { - /* there isn't chance to merge the splitted bio */ + /* there isn't chance to merge the split bio */ split->bi_opf |= REQ_NOMERGE; blkcg_bio_issue_init(split); diff --git a/block/blk-mq.c b/block/blk-mq.c index 0b855e033a83..63abbe342b28 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2919,8 +2919,11 @@ void blk_mq_submit_bio(struct bio *bio) blk_status_t ret; bio = blk_queue_bounce(bio, q); - if (bio_may_exceed_limits(bio, &q->limits)) + if (bio_may_exceed_limits(bio, &q->limits)) { bio = __bio_split_to_limits(bio, &q->limits, &nr_segs); + if (!bio) + return; + } if (!bio_integrity_prep(bio)) return; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 7f9bcc82fc9c..d700bf06b534 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1607,6 +1607,8 @@ void drbd_submit_bio(struct bio *bio) struct drbd_device *device = bio->bi_bdev->bd_disk->private_data; bio = bio_split_to_limits(bio); + if (!bio) + return; /* * what we "blindly" assume: diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c index c76e0148eada..574e470b220b 100644 --- a/drivers/block/ps3vram.c +++ b/drivers/block/ps3vram.c @@ -587,6 +587,8 @@ static void ps3vram_submit_bio(struct bio *bio) dev_dbg(&dev->core, "%s\n", __func__); bio = bio_split_to_limits(bio); + if (!bio) + return; spin_lock_irq(&priv->lock); busy = !bio_list_empty(&priv->list); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e30c2d2bc9c7..d49809e9db96 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1755,6 +1755,8 @@ static void dm_split_and_process_bio(struct mapped_device *md, * otherwise associated queue_limits won't be imposed. */ bio = bio_split_to_limits(bio); + if (!bio) + return; } init_clone_info(&ci, md, map, bio, is_abnormal); diff --git a/drivers/md/md.c b/drivers/md/md.c index fd82881761d3..b911085060dc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -443,6 +443,8 @@ static void md_submit_bio(struct bio *bio) } bio = bio_split_to_limits(bio); + if (!bio) + return; if (mddev->ro == 1 && unlikely(rw == WRITE)) { if (bio_sectors(bio) != 0) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 7e025b8948cb..d09ed0070174 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -351,6 +351,8 @@ static void nvme_ns_head_submit_bio(struct bio *bio) * pool from the original queue to allocate the bvecs from. */ bio = bio_split_to_limits(bio); + if (!bio) + return; srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index b392b9f5482e..c0f85ffb2b62 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -865,6 +865,8 @@ dcssblk_submit_bio(struct bio *bio) unsigned long bytes_done; bio = bio_split_to_limits(bio); + if (!bio) + return; bytes_done = 0; dev_info = bio->bi_bdev->bd_disk->private_data; From 86cd9d9ebda6460a2ec2c9e9e5970da12c6b8fce Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Thu, 22 Dec 2022 21:53:02 +0100 Subject: [PATCH 175/177] Revert "usb: ulpi: defer ulpi_register on ulpi_read_id timeout" commit b659b613cea2ae39746ca8bd2b69d1985dd9d770 upstream. This reverts commit 8a7b31d545d3a15f0e6f5984ae16f0ca4fd76aac. This patch results in some qemu test failures, specifically xilinx-zynq-a9 machine and zynq-zc702 as well as zynq-zed devicetree files, when trying to boot from USB drive. Link: https://lore.kernel.org/lkml/20221220194334.GA942039@roeck-us.net/ Fixes: 8a7b31d545d3 ("usb: ulpi: defer ulpi_register on ulpi_read_id timeout") Cc: stable@vger.kernel.org Reported-by: Guenter Roeck Signed-off-by: Ferry Toth Link: https://lore.kernel.org/r/20221222205302.45761-1-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/ulpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 60e8174686a1..d7c8461976ce 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -207,7 +207,7 @@ static int ulpi_read_id(struct ulpi *ulpi) /* Test the interface */ ret = ulpi_write(ulpi, ULPI_SCRATCH, 0xaa); if (ret < 0) - return ret; + goto err; ret = ulpi_read(ulpi, ULPI_SCRATCH); if (ret < 0) From 046c9972dd40775cff4d12294ca14b0b624f2c35 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 13 Oct 2022 08:47:29 -0500 Subject: [PATCH 176/177] pinctrl: amd: Add dynamic debugging for active GPIOs commit 1d66e379731f79ae5039a869c0fde22a4f6a6a91 upstream. Some laptops have been reported to wake up from s2idle when plugging in the AC adapter or by closing the lid. This is a surprising behavior that is further clarified by commit cb3e7d624c3ff ("PM: wakeup: Add extra debugging statement for multiple active IRQs"). With that commit in place the following interaction can be seen when the lid is closed: [ 28.946038] PM: suspend-to-idle [ 28.946083] ACPI: EC: ACPI EC GPE status set [ 28.946101] ACPI: PM: Rearming ACPI SCI for wakeup [ 28.950152] Timekeeping suspended for 3.320 seconds [ 28.950152] PM: Triggering wakeup from IRQ 9 [ 28.950152] ACPI: EC: ACPI EC GPE status set [ 28.950152] ACPI: EC: ACPI EC GPE dispatched [ 28.995057] ACPI: EC: ACPI EC work flushed [ 28.995075] ACPI: PM: Rearming ACPI SCI for wakeup [ 28.995131] PM: Triggering wakeup from IRQ 9 [ 28.995271] ACPI: EC: ACPI EC GPE status set [ 28.995291] ACPI: EC: ACPI EC GPE dispatched [ 29.098556] ACPI: EC: ACPI EC work flushed [ 29.207020] ACPI: EC: ACPI EC work flushed [ 29.207037] ACPI: PM: Rearming ACPI SCI for wakeup [ 29.211095] Timekeeping suspended for 0.739 seconds [ 29.211095] PM: Triggering wakeup from IRQ 9 [ 29.211079] PM: Triggering wakeup from IRQ 7 [ 29.211095] ACPI: PM: ACPI non-EC GPE wakeup [ 29.211095] PM: resume from suspend-to-idle * IRQ9 on this laptop is used for the ACPI SCI. * IRQ7 on this laptop is used for the GPIO controller. What has occurred is when the lid was closed the EC woke up the SoC from it's deepest sleep state and the kernel's s2idle loop processed all EC events. When it was finished processing EC events, it checked for any other reasons to wake (break the s2idle loop). The IRQ for the GPIO controller was active so the loop broke, and then this IRQ was processed. This is not a kernel bug but it is certainly a surprising behavior, and to better debug it we should have a dynamic debugging message that we can enact to catch it. Acked-by: Basavaraj Natikar Acked-by: Kai-Heng Feng Acked-by: Mark Pearson Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20221013134729.5592-2-mario.limonciello@amd.com Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/pinctrl-amd.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 6be896871718..9bc6e3922e78 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -628,13 +628,15 @@ static bool do_amd_gpio_irq_handler(int irq, void *dev_id) /* Each status bit covers four pins */ for (i = 0; i < 4; i++) { regval = readl(regs + i); - /* caused wake on resume context for shared IRQ */ - if (irq < 0 && (regval & BIT(WAKE_STS_OFF))) { + + if (regval & PIN_IRQ_PENDING) dev_dbg(&gpio_dev->pdev->dev, - "Waking due to GPIO %d: 0x%x", + "GPIO %d is active: 0x%x", irqnr + i, regval); + + /* caused wake on resume context for shared IRQ */ + if (irq < 0 && (regval & BIT(WAKE_STS_OFF))) return true; - } if (!(regval & PIN_IRQ_PENDING) || !(regval & BIT(INTERRUPT_MASK_OFF))) From 21e996306a6afaae88295858de0ffb8955173a15 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 18 Jan 2023 11:58:34 +0100 Subject: [PATCH 177/177] Linux 6.1.7 Link: https://lore.kernel.org/r/20230116154803.321528435@linuxfoundation.org Tested-by: Salvatore Bonaccorso Tested-by: Ronald Warsow Tested-by: Conor Dooley Tested-by: Shuah Khan Tested-by: Justin M. Forbes Tested-by: Takeshi Ogasawara Tested-by: Rudi Heitbaum Tested-by: Bagas Sanjaya Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: Jon Hunter Tested-by: Sudip Mukherjee Tested-by: Allen Pais Link: https://lore.kernel.org/r/20230117124546.116438951@linuxfoundation.org Tested-by: Allen Pais Tested-by: Ronald Warsow Tested-by: Ron Economos Tested-by: Takeshi Ogasawara Tested-by: Guenter Roeck Tested-by: Kelsey Steele Tested-by: Bagas Sanjaya Tested-by: Fenil Jain Tested-by: Linux Kernel Functional Testing Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 19e8c6dec6e5..7eb6793ecfbf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 6 +SUBLEVEL = 7 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth