From 5c947a8a6f05509d08d2e6402d36e2544fb1af2d Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Fri, 26 Feb 2021 09:04:40 +0800 Subject: [PATCH 01/43] ALSA: hda/realtek: Enable headset mic of Acer SWIFT with ALC256 commit d0e185616a0331c87ce3aa1d7dfde8df39d6d002 upstream. The Acer SWIFT Swift SF314-54/55 laptops with ALC256 cannot detect both the headset mic and the internal mic. Introduce new fixup to enable the jack sense and the headset mic. However, the internal mic actually connects to Intel SST audio. It still needs Intel SST support to make internal mic capture work. Signed-off-by: Chris Chiu Acked-by: Jian-Hong Pan Cc: Link: https://lore.kernel.org/r/20210226010440.8474-1-chris.chiu@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5f4f8c2d760f..b47504fa8dfd 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6408,6 +6408,7 @@ enum { ALC236_FIXUP_DELL_AIO_HEADSET_MIC, ALC282_FIXUP_ACER_DISABLE_LINEOUT, ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST, + ALC256_FIXUP_ACER_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -7864,6 +7865,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_ACER_MIC_NO_PRESENCE, }, + [ALC256_FIXUP_ACER_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x02a1113c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x90a1092f }, /* use as internal mic */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7890,9 +7901,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK), SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), From 4330e7a8bf01f6361f2c6d7fa34ddf0e108a188b Mon Sep 17 00:00:00 2001 From: Andrea Fagiani Date: Tue, 19 Jan 2021 08:47:44 +0000 Subject: [PATCH 02/43] ALSA: usb-audio: use Corsair Virtuoso mapping for Corsair Virtuoso SE commit 11302bb69e72d0526bc626ee5c451a3d22cde904 upstream. The Corsair Virtuoso SE RGB Wireless is a USB headset with a mic and a sidetone feature. Assign the Corsair Virtuoso name map to the SE product ids as well, in order to label its mixer appropriately and allow userspace to pick the correct volume controls. Signed-off-by: Andrea Fagiani Cc: Link: https://lore.kernel.org/r/40bbdf55-f854-e2ee-87b4-183e6451352c@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer_maps.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index a7212f16660e..646deb6244b1 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -536,6 +536,16 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x05a7, 0x1020), .map = bose_companion5_map, }, + { + /* Corsair Virtuoso SE (wired mode) */ + .id = USB_ID(0x1b1c, 0x0a3d), + .map = corsair_virtuoso_map, + }, + { + /* Corsair Virtuoso SE (wireless mode) */ + .id = USB_ID(0x1b1c, 0x0a3e), + .map = corsair_virtuoso_map, + }, { /* Corsair Virtuoso (wired mode) */ .id = USB_ID(0x1b1c, 0x0a41), From 86c524934277b279785659f7b453b31c8d143d0e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 27 Feb 2021 11:57:37 +0100 Subject: [PATCH 03/43] ALSA: usb-audio: Drop bogus dB range in too low level commit 21cba9c5359dd9d1bffe355336cfec0b66d1ee52 upstream. Some USB audio firmware seem to report broken dB values for the volume controls, and this screws up applications like PulseAudio who blindly trusts the given data. For example, Edifier G2000 reports a PCM volume from -128dB to -127dB, and this results in barely inaudible sound. This patch adds a sort of sanity check at parsing the dB values in USB-audio driver and disables the dB reporting if the range looks bogus. Here, we assume -96dB as the bottom line of the max dB. Note that, if one can figure out that proper dB range later, it can be patched in the mixer maps. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=211929 Cc: Link: https://lore.kernel.org/r/20210227105737.3656-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/mixer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 81e987eaf063..375cfb9c9ab7 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1301,6 +1301,17 @@ no_res_check: /* totally crap, return an error */ return -EINVAL; } + } else { + /* if the max volume is too low, it's likely a bogus range; + * here we use -96dB as the threshold + */ + if (cval->dBmax <= -9600) { + usb_audio_info(cval->head.mixer->chip, + "%d:%d: bogus dB values (%d/%d), disabling dB reporting\n", + cval->head.id, mixer_ctrl_intf(cval->head.mixer), + cval->dBmin, cval->dBmax); + cval->dBmin = cval->dBmax = 0; + } } return 0; From bfa2e4ed1d399b2dde49307c127b2d1c48dcd72f Mon Sep 17 00:00:00 2001 From: Lukasz Majczak Date: Tue, 16 Feb 2021 10:17:49 +0200 Subject: [PATCH 04/43] tpm, tpm_tis: Decorate tpm_tis_gen_interrupt() with request_locality() commit d53a6adfb553969809eb2b736a976ebb5146cd95 upstream. This is shown with Samsung Chromebook Pro (Caroline) with TPM 1.2 (SLB 9670): [ 4.324298] TPM returned invalid status [ 4.324806] WARNING: CPU: 2 PID: 1 at drivers/char/tpm/tpm_tis_core.c:275 tpm_tis_status+0x86/0x8f Background ========== TCG PC Client Platform TPM Profile (PTP) Specification, paragraph 6.1 FIFO Interface Locality Usage per Register, Table 39 Register Behavior Based on Locality Setting for FIFO - a read attempt to TPM_STS_x Registers returns 0xFF in case of lack of locality. The fix ======= Decorate tpm_tis_gen_interrupt() with request_locality() and release_locality(). Cc: Laurent Bigonville Cc: James Bottomley Cc: Guenter Roeck Cc: stable@vger.kernel.org Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()") Signed-off-by: Lukasz Majczak Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_core.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 431919d5f48a..ce80e7a2d420 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -707,12 +707,22 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip) const char *desc = "attempting to generate an interrupt"; u32 cap2; cap_t cap; + int ret; + /* TPM 2.0 */ if (chip->flags & TPM_CHIP_FLAG_TPM2) return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc); - else - return tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, - 0); + + /* TPM 1.2 */ + ret = request_locality(chip, 0); + if (ret < 0) + return ret; + + ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0); + + release_locality(chip, 0); + + return ret; } /* Register the IRQ and issue a command that will cause an interrupt. If an From fbefc2d2890021b1c57cb816983e529fb64eebd1 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Sat, 20 Feb 2021 00:55:59 +0200 Subject: [PATCH 05/43] tpm, tpm_tis: Decorate tpm_get_timeouts() with request_locality() commit a5665ec2affdba21bff3b0d4d3aed83b3951e8ff upstream. This is shown with Samsung Chromebook Pro (Caroline) with TPM 1.2 (SLB 9670): [ 4.324298] TPM returned invalid status [ 4.324806] WARNING: CPU: 2 PID: 1 at drivers/char/tpm/tpm_tis_core.c:275 tpm_tis_status+0x86/0x8f Background ========== TCG PC Client Platform TPM Profile (PTP) Specification, paragraph 6.1 FIFO Interface Locality Usage per Register, Table 39 Register Behavior Based on Locality Setting for FIFO - a read attempt to TPM_STS_x Registers returns 0xFF in case of lack of locality. The fix ======= Decorate tpm_get_timeouts() with request_locality() and release_locality(). Fixes: a3fbfae82b4c ("tpm: take TPM chip power gating out of tpm_transmit()") Cc: James Bottomley Cc: Guenter Roeck Cc: Laurent Bigonville Cc: stable@vger.kernel.org Reported-by: Lukasz Majczak Signed-off-by: Jarkko Sakkinen Signed-off-by: Greg Kroah-Hartman --- drivers/char/tpm/tpm_tis_core.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index ce80e7a2d420..a2e0395cbe61 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -1029,11 +1029,21 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, init_waitqueue_head(&priv->read_queue); init_waitqueue_head(&priv->int_queue); if (irq != -1) { - /* Before doing irq testing issue a command to the TPM in polling mode + /* + * Before doing irq testing issue a command to the TPM in polling mode * to make sure it works. May as well use that command to set the * proper timeouts for the driver. */ - if (tpm_get_timeouts(chip)) { + + rc = request_locality(chip, 0); + if (rc < 0) + goto out_err; + + rc = tpm_get_timeouts(chip); + + release_locality(chip, 0); + + if (rc) { dev_err(dev, "Could not get TPM timeouts and durations\n"); rc = -ENODEV; goto out_err; From a01415e5e862a60a52b10081532e09f9b3503592 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 25 Jan 2021 16:42:35 -0500 Subject: [PATCH 06/43] btrfs: avoid double put of block group when emptying cluster commit 95c85fba1f64c3249c67f0078a29f8a125078189 upstream. It's wrong calling btrfs_put_block_group in __btrfs_return_cluster_to_free_space if the block group passed is different than the block group the cluster represents. As this means the cluster doesn't have a reference to the passed block group. This results in double put and a use-after-free bug. Fix this by simply bailing if the block group we passed in does not match the block group on the cluster. Fixes: fa9c0d795f7b ("Btrfs: rework allocation clustering") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ update changelog ] Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ae4059ce2f84..9ebc8e2237e8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2714,8 +2714,10 @@ static void __btrfs_return_cluster_to_free_space( struct rb_node *node; spin_lock(&cluster->lock); - if (cluster->block_group != block_group) - goto out; + if (cluster->block_group != block_group) { + spin_unlock(&cluster->lock); + return; + } cluster->block_group = NULL; cluster->window_start = 0; @@ -2753,8 +2755,6 @@ static void __btrfs_return_cluster_to_free_space( entry->offset, &entry->offset_index, bitmap); } cluster->root = RB_ROOT; - -out: spin_unlock(&cluster->lock); btrfs_put_block_group(block_group); } From b2a48761321893e07721d8a2528c6db0b8ac75bf Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 27 Jan 2021 22:15:03 -0800 Subject: [PATCH 07/43] btrfs: fix raid6 qstripe kmap commit d70cef0d46729808dc53f145372c02b145c92604 upstream. When a qstripe is required an extra page is allocated and mapped. There were 3 problems: 1) There is no corresponding call of kunmap() for the qstripe page. 2) There is no reason to map the qstripe page more than once if the number of bits set in rbio->dbitmap is greater than one. 3) There is no reason to map the parity page and unmap it each time through the loop. The page memory can continue to be reused with a single mapping on each iteration by raid6_call.gen_syndrome() without remapping. So map the page for the duration of the loop. Similarly, improve the algorithm by mapping the parity page just 1 time. Fixes: 5a6ac9eacb49 ("Btrfs, raid56: support parity scrub on raid56") CC: stable@vger.kernel.org # 4.4.x: c17af96554a8: btrfs: raid56: simplify tracking of Q stripe presence CC: stable@vger.kernel.org # 4.4.x Signed-off-by: Ira Weiny Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/raid56.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 255490f42b5d..9d33bf0154ab 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2363,16 +2363,21 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, SetPageUptodate(p_page); if (has_qstripe) { + /* RAID6, allocate and map temp space for the Q stripe */ q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (!q_page) { __free_page(p_page); goto cleanup; } SetPageUptodate(q_page); + pointers[rbio->real_stripes - 1] = kmap(q_page); } atomic_set(&rbio->error, 0); + /* Map the parity stripe just once */ + pointers[nr_data] = kmap(p_page); + for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { struct page *p; void *parity; @@ -2382,16 +2387,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, pointers[stripe] = kmap(p); } - /* then add the parity stripe */ - pointers[stripe++] = kmap(p_page); - if (has_qstripe) { - /* - * raid6, add the qstripe and call the - * library function to fill in our p/q - */ - pointers[stripe++] = kmap(q_page); - + /* RAID6, call the library function to fill in our P/Q */ raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, pointers); } else { @@ -2412,12 +2409,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, for (stripe = 0; stripe < nr_data; stripe++) kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); - kunmap(p_page); } + kunmap(p_page); __free_page(p_page); - if (q_page) + if (q_page) { + kunmap(q_page); __free_page(q_page); + } writeback: /* From 501fdd1cefaeb15506221e36ffec2f76e46fc573 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 5 Feb 2021 12:55:37 +0000 Subject: [PATCH 08/43] btrfs: fix race between writes to swap files and scrub commit 195a49eaf655eb914896c92cecd96bc863c9feb3 upstream. When we active a swap file, at btrfs_swap_activate(), we acquire the exclusive operation lock to prevent the physical location of the swap file extents to be changed by operations such as balance and device replace/resize/remove. We also call there can_nocow_extent() which, among other things, checks if the block group of a swap file extent is currently RO, and if it is we can not use the extent, since a write into it would result in COWing the extent. However we have no protection against a scrub operation running after we activate the swap file, which can result in the swap file extents to be COWed while the scrub is running and operating on the respective block group, because scrub turns a block group into RO before it processes it and then back again to RW mode after processing it. That means an attempt to write into a swap file extent while scrub is processing the respective block group, will result in COWing the extent, changing its physical location on disk. Fix this by making sure that block groups that have extents that are used by active swap files can not be turned into RO mode, therefore making it not possible for a scrub to turn them into RO mode. When a scrub finds a block group that can not be turned to RO due to the existence of extents used by swap files, it proceeds to the next block group and logs a warning message that mentions the block group was skipped due to active swap files - this is the same approach we currently use for balance. Fixes: ed46ff3d42378 ("Btrfs: support swap files") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/block-group.c | 33 ++++++++++++++++++++++++++++++++- fs/btrfs/block-group.h | 9 +++++++++ fs/btrfs/ctree.h | 5 +++++ fs/btrfs/inode.c | 19 ++++++++++++++++++- fs/btrfs/scrub.c | 9 ++++++++- 5 files changed, 72 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 9a5d652c1672..c99e293b50f5 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1229,6 +1229,11 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) spin_lock(&sinfo->lock); spin_lock(&cache->lock); + if (cache->swap_extents) { + ret = -ETXTBSY; + goto out; + } + if (cache->ro) { cache->ro++; ret = 0; @@ -2274,7 +2279,7 @@ again: } ret = inc_block_group_ro(cache, 0); - if (!do_chunk_alloc) + if (!do_chunk_alloc || ret == -ETXTBSY) goto unlock_out; if (!ret) goto out; @@ -2283,6 +2288,8 @@ again: if (ret < 0) goto out; ret = inc_block_group_ro(cache, 0); + if (ret == -ETXTBSY) + goto unlock_out; out: if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); @@ -3363,6 +3370,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) ASSERT(list_empty(&block_group->io_list)); ASSERT(list_empty(&block_group->bg_list)); ASSERT(refcount_read(&block_group->refs) == 1); + ASSERT(block_group->swap_extents == 0); btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); @@ -3429,3 +3437,26 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group) __btrfs_remove_free_space_cache(block_group->free_space_ctl); } } + +bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg) +{ + bool ret = true; + + spin_lock(&bg->lock); + if (bg->ro) + ret = false; + else + bg->swap_extents++; + spin_unlock(&bg->lock); + + return ret; +} + +void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount) +{ + spin_lock(&bg->lock); + ASSERT(!bg->ro); + ASSERT(bg->swap_extents >= amount); + bg->swap_extents -= amount; + spin_unlock(&bg->lock); +} diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index adfd7583a17b..4c7614346f72 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -181,6 +181,12 @@ struct btrfs_block_group { */ int needs_free_space; + /* + * Number of extents in this block group used for swap files. + * All accesses protected by the spinlock 'lock'. + */ + int swap_extents; + /* Record locked full stripes for RAID5/6 block group */ struct btrfs_full_stripe_locks_tree full_stripe_locks_root; }; @@ -299,4 +305,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, u64 physical, u64 **logical, int *naddrs, int *stripe_len); #endif +bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg); +void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount); + #endif /* BTRFS_BLOCK_GROUP_H */ diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b6884eda9ff6..bcc6848bb6d6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -522,6 +522,11 @@ struct btrfs_swapfile_pin { * points to a struct btrfs_device. */ bool is_block_group; + /* + * Only used when 'is_block_group' is true and it is the number of + * extents used by a swapfile for this block group ('ptr' field). + */ + int bg_extent_count; }; bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4d85f3a6695d..db7ed44485a5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9993,6 +9993,7 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr, sp->ptr = ptr; sp->inode = inode; sp->is_block_group = is_block_group; + sp->bg_extent_count = 1; spin_lock(&fs_info->swapfile_pins_lock); p = &fs_info->swapfile_pins.rb_node; @@ -10006,6 +10007,8 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr, (sp->ptr == entry->ptr && sp->inode > entry->inode)) { p = &(*p)->rb_right; } else { + if (is_block_group) + entry->bg_extent_count++; spin_unlock(&fs_info->swapfile_pins_lock); kfree(sp); return 1; @@ -10031,8 +10034,11 @@ static void btrfs_free_swapfile_pins(struct inode *inode) sp = rb_entry(node, struct btrfs_swapfile_pin, node); if (sp->inode == inode) { rb_erase(&sp->node, &fs_info->swapfile_pins); - if (sp->is_block_group) + if (sp->is_block_group) { + btrfs_dec_block_group_swap_extents(sp->ptr, + sp->bg_extent_count); btrfs_put_block_group(sp->ptr); + } kfree(sp); } node = next; @@ -10247,6 +10253,17 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, goto out; } + if (!btrfs_inc_block_group_swap_extents(bg)) { + btrfs_warn(fs_info, + "block group for swapfile at %llu is read-only%s", + bg->start, + atomic_read(&fs_info->scrubs_running) ? + " (scrub running)" : ""); + btrfs_put_block_group(bg); + ret = -EINVAL; + goto out; + } + ret = btrfs_add_swapfile_pin(inode, bg, true); if (ret) { btrfs_put_block_group(bg); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index e71e7586e9eb..0392c556af60 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3568,6 +3568,13 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, * commit_transactions. */ ro_set = 0; + } else if (ret == -ETXTBSY) { + btrfs_warn(fs_info, + "skipping scrub of block group %llu due to active swapfile", + cache->start); + scrub_pause_off(fs_info); + ret = 0; + goto skip_unfreeze; } else { btrfs_warn(fs_info, "failed setting block group ro: %d", ret); @@ -3657,7 +3664,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, } else { spin_unlock(&cache->lock); } - +skip_unfreeze: btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); if (ret) From 6fc9e5866cb9d66bf3e9d05ff6831132733c098a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 5 Feb 2021 12:55:38 +0000 Subject: [PATCH 09/43] btrfs: fix race between swap file activation and snapshot creation commit dd0734f2a866f9d619d4abf97c3d71bcdee40ea9 upstream. When creating a snapshot we check if the current number of swap files, in the root, is non-zero, and if it is, we error out and warn that we can not create the snapshot because there are active swap files. However this is racy because when a task started activation of a swap file, another task might have started already snapshot creation and might have seen the counter for the number of swap files as zero. This means that after the swap file is activated we may end up with a snapshot of the same root successfully created, and therefore when the first write to the swap file happens it has to fall back into COW mode, which should never happen for active swap files. Basically what can happen is: 1) Task A starts snapshot creation and enters ioctl.c:create_snapshot(). There it sees that root->nr_swapfiles has a value of 0 so it continues; 2) Task B enters btrfs_swap_activate(). It is not aware that another task started snapshot creation but it did not finish yet. It increments root->nr_swapfiles from 0 to 1; 3) Task B checks that the file meets all requirements to be an active swap file - it has NOCOW set, there are no snapshots for the inode's root at the moment, no file holes, no reflinked extents, etc; 4) Task B returns success and now the file is an active swap file; 5) Task A commits the transaction to create the snapshot and finishes. The swap file's extents are now shared between the original root and the snapshot; 6) A write into an extent of the swap file is attempted - there is a snapshot of the file's root, so we fall back to COW mode and therefore the physical location of the extent changes on disk. So fix this by taking the snapshot lock during swap file activation before locking the extent range, as that is the order in which we lock these during buffered writes. Fixes: ed46ff3d42378 ("Btrfs: support swap files") CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Anand Jain Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index db7ed44485a5..9b3df72ceffb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10099,7 +10099,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_state *cached_state = NULL; struct extent_map *em = NULL; @@ -10150,13 +10151,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, "cannot activate swapfile while exclusive operation is running"); return -EBUSY; } + + /* + * Prevent snapshot creation while we are activating the swap file. + * We do not want to race with snapshot creation. If snapshot creation + * already started before we bumped nr_swapfiles from 0 to 1 and + * completes before the first write into the swap file after it is + * activated, than that write would fallback to COW. + */ + if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) { + btrfs_exclop_finish(fs_info); + btrfs_warn(fs_info, + "cannot activate swapfile because snapshot creation is in progress"); + return -EINVAL; + } /* * Snapshots can create extents which require COW even if NODATACOW is * set. We use this counter to prevent snapshots. We must increment it * before walking the extents because we don't want a concurrent * snapshot to run after we've already checked the extents. */ - atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles); + atomic_inc(&root->nr_swapfiles); isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); @@ -10302,6 +10317,8 @@ out: if (ret) btrfs_swap_deactivate(file); + btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_exclop_finish(fs_info); if (ret) From 1559d94fece25604808eae82af03d2c2c354317a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 16 Feb 2021 11:09:25 +0000 Subject: [PATCH 10/43] btrfs: fix stale data exposure after cloning a hole with NO_HOLES enabled commit 3660d0bcdb82807d434da9d2e57d88b37331182d upstream. When using the NO_HOLES feature, if we clone a file range that spans only a hole into a range that is at or beyond the current i_size of the destination file, we end up not setting the full sync runtime flag on the inode. As a result, if we then fsync the destination file and have a power failure, after log replay we can end up exposing stale data instead of having a hole for that range. The conditions for this to happen are the following: 1) We have a file with a size of, for example, 1280K; 2) There is a written (non-prealloc) extent for the file range from 1024K to 1280K with a length of 256K; 3) This particular file extent layout is durably persisted, so that the existing superblock persisted on disk points to a subvolume root where the file has that exact file extent layout and state; 4) The file is truncated to a smaller size, to an offset lower than the start offset of its last extent, for example to 800K. The truncate sets the full sync runtime flag on the inode; 6) Fsync the file to log it and clear the full sync runtime flag; 7) Clone a region that covers only a hole (implicit hole due to NO_HOLES) into the file with a destination offset that starts at or beyond the 256K file extent item we had - for example to offset 1024K; 8) Since the clone operation does not find extents in the source range, we end up in the if branch at the bottom of btrfs_clone() where we punch a hole for the file range starting at offset 1024K by calling btrfs_replace_file_extents(). There we end up not setting the full sync flag on the inode, because we don't know we are being called in a clone context (and not fallocate's punch hole operation), and neither do we create an extent map to represent a hole because the requested range is beyond eof; 9) A further fsync to the file will be a fast fsync, since the clone operation did not set the full sync flag, and therefore it relies on modified extent maps to correctly log the file layout. But since it does not find any extent map marking the range from 1024K (the previous eof) to the new eof, it does not log a file extent item for that range representing the hole; 10) After a power failure no hole for the range starting at 1024K is punched and we end up exposing stale data from the old 256K extent. Turning this into exact steps: $ mkfs.btrfs -f -O no-holes /dev/sdi $ mount /dev/sdi /mnt # Create our test file with 3 extents of 256K and a 256K hole at offset # 256K. The file has a size of 1280K. $ xfs_io -f -s \ -c "pwrite -S 0xab -b 256K 0 256K" \ -c "pwrite -S 0xcd -b 256K 512K 256K" \ -c "pwrite -S 0xef -b 256K 768K 256K" \ -c "pwrite -S 0x73 -b 256K 1024K 256K" \ /mnt/sdi/foobar # Make sure it's durably persisted. We want the last committed super # block to point to this particular file extent layout. sync # Now truncate our file to a smaller size, falling within a position of # the second extent. This sets the full sync runtime flag on the inode. # Then fsync the file to log it and clear the full sync flag from the # inode. The third extent is no longer part of the file and therefore # it is not logged. $ xfs_io -c "truncate 800K" -c "fsync" /mnt/foobar # Now do a clone operation that only clones the hole and sets back the # file size to match the size it had before the truncate operation # (1280K). $ xfs_io \ -c "reflink /mnt/foobar 256K 1024K 256K" \ -c "fsync" \ /mnt/foobar # File data before power failure: $ od -A d -t x1 /mnt/foobar 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 0262144 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0524288 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd * 0786432 ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef * 0819200 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 1310720 # Mount the fs again to replay the log tree. $ mount /dev/sdi /mnt # File data after power failure: $ od -A d -t x1 /mnt/foobar 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 0262144 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0524288 cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd cd * 0786432 ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef ef * 0819200 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 1048576 73 73 73 73 73 73 73 73 73 73 73 73 73 73 73 73 * 1310720 The range from 1024K to 1280K should correspond to a hole but instead it points to stale data, to the 256K extent that should not exist after the truncate operation. The issue does not exists when not using NO_HOLES, because for that case we use file extent items to represent holes, these are found and copied during the loop that iterates over extents at btrfs_clone(), and that causes btrfs_replace_file_extents() to be called with a non-NULL extent_info argument and therefore set the full sync runtime flag on the inode. So fix this by making the code that deals with a trailing hole during cloning, at btrfs_clone(), to set the full sync flag on the inode, if the range starts at or beyond the current i_size. A test case for fstests will follow soon. Backporting notes: for kernel 5.4 the change goes to ioctl.c into btrfs_clone before the last call to btrfs_punch_hole_range. CC: stable@vger.kernel.org # 5.4+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/reflink.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index a646af95dd10..c4f87df53283 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -548,6 +548,24 @@ process_slot: btrfs_release_path(path); path->leave_spinning = 0; + /* + * When using NO_HOLES and we are cloning a range that covers + * only a hole (no extents) into a range beyond the current + * i_size, punching a hole in the target range will not create + * an extent map defining a hole, because the range starts at or + * beyond current i_size. If the file previously had an i_size + * greater than the new i_size set by this clone operation, we + * need to make sure the next fsync is a full fsync, so that it + * detects and logs a hole covering a range from the current + * i_size to the new i_size. If the clone range covers extents, + * besides a hole, then we know the full sync flag was already + * set by previous calls to btrfs_replace_file_extents() that + * replaced file extent items. + */ + if (last_dest_end >= i_size_read(inode)) + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags); + ret = btrfs_replace_file_extents(inode, path, last_dest_end, destoff + len - 1, NULL, &trans); if (ret) From e82407d24968531d636fc5d544715c86675f62e6 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 8 Feb 2021 10:26:54 +0200 Subject: [PATCH 11/43] btrfs: fix race between extent freeing/allocation when using bitmaps commit 3c17916510428dbccdf657de050c34e208347089 upstream. During allocation the allocator will try to allocate an extent using cluster policy. Once the current cluster is exhausted it will remove the entry under btrfs_free_cluster::lock and subsequently acquire btrfs_free_space_ctl::tree_lock to dispose of the already-deleted entry and adjust btrfs_free_space_ctl::total_bitmap. This poses a problem because there exists a race condition between removing the entry under one lock and doing the necessary accounting holding a different lock since extent freeing only uses the 2nd lock. This can result in the following situation: T1: T2: btrfs_alloc_from_cluster insert_into_bitmap if (entry->bytes == 0) if (block_group && !list_empty(&block_group->cluster_list)) { rb_erase(entry) spin_unlock(&cluster->lock); (total_bitmaps is still 4) spin_lock(&cluster->lock); root> spin_lock(&ctl->tree_lock); recalculate_thresholds To fix this ensure that once depleted, the cluster entry is deleted when both cluster lock and tree locks are held in the allocator (T1), this ensures that even if there is a race with a concurrent insert_into_bitmap call it will correctly find the entry in the cluster and add the new space to it. CC: # 4.4+ Reviewed-by: Josef Bacik Signed-off-by: Nikolay Borisov Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/free-space-cache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9ebc8e2237e8..ba280707d5ec 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3034,8 +3034,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, entry->bytes -= bytes; } - if (entry->bytes == 0) - rb_erase(&entry->offset_index, &cluster->root); break; } out: @@ -3052,7 +3050,10 @@ out: ctl->free_space -= bytes; if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; + + spin_lock(&cluster->lock); if (entry->bytes == 0) { + rb_erase(&entry->offset_index, &cluster->root); ctl->free_extents--; if (entry->bitmap) { kmem_cache_free(btrfs_free_space_bitmap_cachep, @@ -3065,6 +3066,7 @@ out: kmem_cache_free(btrfs_free_space_cachep, entry); } + spin_unlock(&cluster->lock); spin_unlock(&ctl->tree_lock); return ret; From a64ad80223f2173df2737a49148d2566b808f484 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 17 Feb 2021 09:04:34 +0300 Subject: [PATCH 12/43] btrfs: validate qgroup inherit for SNAP_CREATE_V2 ioctl commit 5011c5a663b9c6d6aff3d394f11049b371199627 upstream. The problem is we're copying "inherit" from user space but we don't necessarily know that we're copying enough data for a 64 byte struct. Then the next problem is that 'inherit' has a variable size array at the end, and we have to verify that array is the size we expected. Fixes: 6f72c7e20dba ("Btrfs: add qgroup inheritance") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Dan Carpenter Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index bd46e107f955..f5135314e4b3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1914,7 +1914,10 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, if (vol_args->flags & BTRFS_SUBVOL_RDONLY) readonly = true; if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { - if (vol_args->size > PAGE_SIZE) { + u64 nums; + + if (vol_args->size < sizeof(*inherit) || + vol_args->size > PAGE_SIZE) { ret = -EINVAL; goto free_args; } @@ -1923,6 +1926,20 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ret = PTR_ERR(inherit); goto free_args; } + + if (inherit->num_qgroups > PAGE_SIZE || + inherit->num_ref_copies > PAGE_SIZE || + inherit->num_excl_copies > PAGE_SIZE) { + ret = -EINVAL; + goto free_inherit; + } + + nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + + 2 * inherit->num_excl_copies; + if (vol_args->size != struct_size(inherit, qgroups, nums)) { + ret = -EINVAL; + goto free_inherit; + } } ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd, From 37ffce966821eb5098bda58eef91786cc99c602c Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 22 Feb 2021 18:40:42 +0200 Subject: [PATCH 13/43] btrfs: free correct amount of space in btrfs_delayed_inode_reserve_metadata commit 0f9c03d824f6f522d3bc43629635c9765546ebc5 upstream. Following commit f218ea6c4792 ("btrfs: delayed-inode: Remove wrong qgroup meta reservation calls") this function now reserves num_bytes, rather than the fixed amount of nodesize. As such this requires the same amount to be freed in case of failure. Fix this by adjusting the amount we are freeing. Fixes: f218ea6c4792 ("btrfs: delayed-inode: Remove wrong qgroup meta reservation calls") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Qu Wenruo Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delayed-inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 5aba81e16113..36e0de34ec68 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -649,7 +649,7 @@ static int btrfs_delayed_inode_reserve_metadata( btrfs_ino(inode), num_bytes, 1); } else { - btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize); + btrfs_qgroup_free_meta_prealloc(root, num_bytes); } return ret; } From e6ba61aaff88ec5b8b2b8035431f75071d8b1690 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Tue, 23 Feb 2021 15:20:42 +0200 Subject: [PATCH 14/43] btrfs: unlock extents in btrfs_zero_range in case of quota reservation errors commit 4f6a49de64fd1b1dba5229c02047376da7cf24fd upstream. If btrfs_qgroup_reserve_data returns an error (i.e quota limit reached) the handling logic directly goes to the 'out' label without first unlocking the extent range between lockstart, lockend. This results in deadlocks as other processes try to lock the same extent. Fixes: a7f8b1c2ac21 ("btrfs: file: reserve qgroup space after the hole punch range is locked") CC: stable@vger.kernel.org # 5.10+ Reviewed-by: Qu Wenruo Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 4373da7bcc0d..c81a20cc10dc 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3236,8 +3236,11 @@ reserve_space: goto out; ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, alloc_start, bytes_to_reserve); - if (ret) + if (ret) { + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, + lockend, &cached_state); goto out; + } ret = btrfs_prealloc_file_range(inode, mode, alloc_start, alloc_end - alloc_start, i_blocksize(inode), From ae971992e9fe5603d8934843097963e499353c6d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 26 Feb 2021 17:51:44 +0000 Subject: [PATCH 15/43] btrfs: fix warning when creating a directory with smack enabled commit fd57a98d6f0c98fa295813087f13afb26c224e73 upstream. When we have smack enabled, during the creation of a directory smack may attempt to add a "smack transmute" xattr on the inode, which results in the following warning and trace: WARNING: CPU: 3 PID: 2548 at fs/btrfs/transaction.c:537 start_transaction+0x489/0x4f0 Modules linked in: nft_objref nf_conntrack_netbios_ns (...) CPU: 3 PID: 2548 Comm: mkdir Not tainted 5.9.0-rc2smack+ #81 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014 RIP: 0010:start_transaction+0x489/0x4f0 Code: e9 be fc ff ff (...) RSP: 0018:ffffc90001887d10 EFLAGS: 00010202 RAX: ffff88816f1e0000 RBX: 0000000000000201 RCX: 0000000000000003 RDX: 0000000000000201 RSI: 0000000000000002 RDI: ffff888177849000 RBP: ffff888177849000 R08: 0000000000000001 R09: 0000000000000004 R10: ffffffff825e8f7a R11: 0000000000000003 R12: ffffffffffffffe2 R13: 0000000000000000 R14: ffff88803d884270 R15: ffff8881680d8000 FS: 00007f67317b8440(0000) GS:ffff88817bcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f67247a22a8 CR3: 000000004bfbc002 CR4: 0000000000370ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? slab_free_freelist_hook+0xea/0x1b0 ? trace_hardirqs_on+0x1c/0xe0 btrfs_setxattr_trans+0x3c/0xf0 __vfs_setxattr+0x63/0x80 smack_d_instantiate+0x2d3/0x360 security_d_instantiate+0x29/0x40 d_instantiate_new+0x38/0x90 btrfs_mkdir+0x1cf/0x1e0 vfs_mkdir+0x14f/0x200 do_mkdirat+0x6d/0x110 do_syscall_64+0x2d/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f673196ae6b Code: 8b 05 11 (...) RSP: 002b:00007ffc3c679b18 EFLAGS: 00000246 ORIG_RAX: 0000000000000053 RAX: ffffffffffffffda RBX: 00000000000001ff RCX: 00007f673196ae6b RDX: 0000000000000000 RSI: 00000000000001ff RDI: 00007ffc3c67a30d RBP: 00007ffc3c67a30d R08: 00000000000001ff R09: 0000000000000000 R10: 000055d3e39fe930 R11: 0000000000000246 R12: 0000000000000000 R13: 00007ffc3c679cd8 R14: 00007ffc3c67a30d R15: 00007ffc3c679ce0 irq event stamp: 11029 hardirqs last enabled at (11037): [] console_unlock+0x486/0x670 hardirqs last disabled at (11044): [] console_unlock+0xa1/0x670 softirqs last enabled at (8864): [] asm_call_on_stack+0xf/0x20 softirqs last disabled at (8851): [] asm_call_on_stack+0xf/0x20 This happens because at btrfs_mkdir() we call d_instantiate_new() while holding a transaction handle, which results in the following call chain: btrfs_mkdir() trans = btrfs_start_transaction(root, 5); d_instantiate_new() smack_d_instantiate() __vfs_setxattr() btrfs_setxattr_trans() btrfs_start_transaction() start_transaction() WARN_ON() --> a tansaction start has TRANS_EXTWRITERS set in its type h->orig_rsv = h->block_rsv h->block_rsv = NULL btrfs_end_transaction(trans) Besides the warning triggered at start_transaction, we set the handle's block_rsv to NULL which may cause some surprises later on. So fix this by making btrfs_setxattr_trans() not start a transaction when we already have a handle on one, stored in current->journal_info, and use that handle. We are good to use the handle because at btrfs_mkdir() we did reserve space for the xattr and the inode item. Reported-by: Casey Schaufler CC: stable@vger.kernel.org # 5.4+ Acked-by: Casey Schaufler Tested-by: Casey Schaufler Link: https://lore.kernel.org/linux-btrfs/434d856f-bd7b-4889-a6ec-e81aaebfa735@schaufler-ca.com/ Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/xattr.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index e51774201d53..f1a60bcdb3db 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -229,11 +229,33 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; + const bool start_trans = (current->journal_info == NULL); int ret; - trans = btrfs_start_transaction(root, 2); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (start_trans) { + /* + * 1 unit for inserting/updating/deleting the xattr + * 1 unit for the inode item update + */ + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } else { + /* + * This can happen when smack is enabled and a directory is being + * created. It happens through d_instantiate_new(), which calls + * smack_d_instantiate(), which in turn calls __vfs_setxattr() to + * set the transmute xattr (XATTR_NAME_SMACKTRANSMUTE) on the + * inode. We have already reserved space for the xattr and inode + * update at btrfs_mkdir(), so just use the transaction handle. + * We don't join or start a transaction, as that will reset the + * block_rsv of the handle and trigger a warning for the start + * case. + */ + ASSERT(strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN) == 0); + trans = current->journal_info; + } ret = btrfs_setxattr(trans, inode, name, value, size, flags); if (ret) @@ -244,7 +266,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); out: - btrfs_end_transaction(trans); + if (start_trans) + btrfs_end_transaction(trans); return ret; } From c6e5800bdf7041803de7aaee73534b0885dbc018 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Feb 2021 19:23:27 +0100 Subject: [PATCH 16/43] PM: runtime: Update device status before letting suppliers suspend commit 44cc89f764646b2f1f2ea5d1a08b230131707851 upstream. Because the PM-runtime status of the device is not updated in __rpm_callback(), attempts to suspend the suppliers of the given device triggered by rpm_put_suppliers() called by it may fail. Fix this by making __rpm_callback() update the device's status to RPM_SUSPENDED before calling rpm_put_suppliers() if the current status of the device is RPM_SUSPENDING and the callback just invoked by it has returned 0 (success). While at it, modify the code in __rpm_callback() to always check the device's PM-runtime status under its PM lock. Link: https://lore.kernel.org/linux-pm/CAPDyKFqm06KDw_p8WXsM4dijDbho4bb6T4k50UqqvR1_COsp8g@mail.gmail.com/ Fixes: 21d5c57b3726 ("PM / runtime: Use device links") Reported-by: Elaine Zhang Diagnosed-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki Tested-by: Elaine Zhang Reviewed-by: Ulf Hansson Cc: 4.10+ # 4.10+ Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/base/power/runtime.c | 62 +++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index bfda153b1a41..87682dcb64ec 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct device *dev) static int __rpm_callback(int (*cb)(struct device *), struct device *dev) __releases(&dev->power.lock) __acquires(&dev->power.lock) { - int retval, idx; bool use_links = dev->power.links_count > 0; + bool get = false; + int retval, idx; + bool put; if (dev->power.irq_safe) { spin_unlock(&dev->power.lock); + } else if (!use_links) { + spin_unlock_irq(&dev->power.lock); } else { + get = dev->power.runtime_status == RPM_RESUMING; + spin_unlock_irq(&dev->power.lock); - /* - * Resume suppliers if necessary. - * - * The device's runtime PM status cannot change until this - * routine returns, so it is safe to read the status outside of - * the lock. - */ - if (use_links && dev->power.runtime_status == RPM_RESUMING) { + /* Resume suppliers if necessary. */ + if (get) { idx = device_links_read_lock(); retval = rpm_get_suppliers(dev); @@ -355,24 +355,36 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) if (dev->power.irq_safe) { spin_lock(&dev->power.lock); - } else { - /* - * If the device is suspending and the callback has returned - * success, drop the usage counters of the suppliers that have - * been reference counted on its resume. - * - * Do that if resume fails too. - */ - if (use_links - && ((dev->power.runtime_status == RPM_SUSPENDING && !retval) - || (dev->power.runtime_status == RPM_RESUMING && retval))) { - idx = device_links_read_lock(); + return retval; + } - fail: - rpm_put_suppliers(dev); + spin_lock_irq(&dev->power.lock); - device_links_read_unlock(idx); - } + if (!use_links) + return retval; + + /* + * If the device is suspending and the callback has returned success, + * drop the usage counters of the suppliers that have been reference + * counted on its resume. + * + * Do that if the resume fails too. + */ + put = dev->power.runtime_status == RPM_SUSPENDING && !retval; + if (put) + __update_runtime_status(dev, RPM_SUSPENDED); + else + put = get && retval; + + if (put) { + spin_unlock_irq(&dev->power.lock); + + idx = device_links_read_lock(); + +fail: + rpm_put_suppliers(dev); + + device_links_read_unlock(idx); spin_lock_irq(&dev->power.lock); } From 3301afbfef697077a647381c26cdaf9d90458965 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 3 Mar 2021 18:03:52 -0500 Subject: [PATCH 17/43] ring-buffer: Force before_stamp and write_stamp to be different on discard commit 6f6be606e763f2da9fc21de00538c97fe4ca1492 upstream. Part of the logic of the new time stamp code depends on the before_stamp and the write_stamp to be different if the write_stamp does not match the last event on the buffer, as it will be used to calculate the delta of the next event written on the buffer. The discard logic depends on this, as the next event to come in needs to inject a full timestamp as it can not rely on the last event timestamp in the buffer because it is unknown due to events after it being discarded. But by changing the write_stamp back to the time before it, it forces the next event to use a full time stamp, instead of relying on it. The issue came when a full time stamp was used for the event, and rb_time_delta() returns zero in that case. The update to the write_stamp (which subtracts delta) made it not change. Then when the event is removed from the buffer, because the before_stamp and write_stamp still match, the next event written would calculate its delta from the write_stamp, but that would be wrong as the write_stamp is of the time of the event that was discarded. In the case that the delta change being made to write_stamp is zero, set the before_stamp to zero as well, and this will force the next event to inject a full timestamp and not use the current write_stamp. Cc: stable@vger.kernel.org Fixes: a389d86f7fd09 ("ring-buffer: Have nested events still record running time stamp") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/ring_buffer.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ddeb865706ba..b12e5f4721ca 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2836,6 +2836,17 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, write_stamp, write_stamp - delta)) return 0; + /* + * It's possible that the event time delta is zero + * (has the same time stamp as the previous event) + * in which case write_stamp and before_stamp could + * be the same. In such a case, force before_stamp + * to be different than write_stamp. It doesn't + * matter what it is, as long as its different. + */ + if (!delta) + rb_time_set(&cpu_buffer->before_stamp, 0); + /* * If an event were to come in now, it would see that the * write_stamp and the before_stamp are different, and assume From a2501d87663b84c288188d63da4632018b627829 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 28 Feb 2021 16:07:30 -0700 Subject: [PATCH 18/43] io_uring: ignore double poll add on the same waitqueue head commit 1c3b3e6527e57156bf4082f11c2151957560fe6a upstream. syzbot reports a deadlock, attempting to lock the same spinlock twice: ============================================ WARNING: possible recursive locking detected 5.11.0-syzkaller #0 Not tainted -------------------------------------------- swapper/1/0 is trying to acquire lock: ffff88801b2b1130 (&runtime->sleep){..-.}-{2:2}, at: spin_lock include/linux/spinlock.h:354 [inline] ffff88801b2b1130 (&runtime->sleep){..-.}-{2:2}, at: io_poll_double_wake+0x25f/0x6a0 fs/io_uring.c:4960 but task is already holding lock: ffff88801b2b3130 (&runtime->sleep){..-.}-{2:2}, at: __wake_up_common_lock+0xb4/0x130 kernel/sched/wait.c:137 other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&runtime->sleep); lock(&runtime->sleep); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by swapper/1/0: #0: ffff888147474908 (&group->lock){..-.}-{2:2}, at: _snd_pcm_stream_lock_irqsave+0x9f/0xd0 sound/core/pcm_native.c:170 #1: ffff88801b2b3130 (&runtime->sleep){..-.}-{2:2}, at: __wake_up_common_lock+0xb4/0x130 kernel/sched/wait.c:137 stack backtrace: CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.11.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0xfa/0x151 lib/dump_stack.c:120 print_deadlock_bug kernel/locking/lockdep.c:2829 [inline] check_deadlock kernel/locking/lockdep.c:2872 [inline] validate_chain kernel/locking/lockdep.c:3661 [inline] __lock_acquire.cold+0x14c/0x3b4 kernel/locking/lockdep.c:4900 lock_acquire kernel/locking/lockdep.c:5510 [inline] lock_acquire+0x1ab/0x730 kernel/locking/lockdep.c:5475 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:354 [inline] io_poll_double_wake+0x25f/0x6a0 fs/io_uring.c:4960 __wake_up_common+0x147/0x650 kernel/sched/wait.c:108 __wake_up_common_lock+0xd0/0x130 kernel/sched/wait.c:138 snd_pcm_update_state+0x46a/0x540 sound/core/pcm_lib.c:203 snd_pcm_update_hw_ptr0+0xa75/0x1a50 sound/core/pcm_lib.c:464 snd_pcm_period_elapsed+0x160/0x250 sound/core/pcm_lib.c:1805 dummy_hrtimer_callback+0x94/0x1b0 sound/drivers/dummy.c:378 __run_hrtimer kernel/time/hrtimer.c:1519 [inline] __hrtimer_run_queues+0x609/0xe40 kernel/time/hrtimer.c:1583 hrtimer_run_softirq+0x17b/0x360 kernel/time/hrtimer.c:1600 __do_softirq+0x29b/0x9f6 kernel/softirq.c:345 invoke_softirq kernel/softirq.c:221 [inline] __irq_exit_rcu kernel/softirq.c:422 [inline] irq_exit_rcu+0x134/0x200 kernel/softirq.c:434 sysvec_apic_timer_interrupt+0x93/0xc0 arch/x86/kernel/apic/apic.c:1100 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:632 RIP: 0010:native_save_fl arch/x86/include/asm/irqflags.h:29 [inline] RIP: 0010:arch_local_save_flags arch/x86/include/asm/irqflags.h:70 [inline] RIP: 0010:arch_irqs_disabled arch/x86/include/asm/irqflags.h:137 [inline] RIP: 0010:acpi_safe_halt drivers/acpi/processor_idle.c:111 [inline] RIP: 0010:acpi_idle_do_entry+0x1c9/0x250 drivers/acpi/processor_idle.c:516 Code: dd 38 6e f8 84 db 75 ac e8 54 32 6e f8 e8 0f 1c 74 f8 e9 0c 00 00 00 e8 45 32 6e f8 0f 00 2d 4e 4a c5 00 e8 39 32 6e f8 fb f4 <9c> 5b 81 e3 00 02 00 00 fa 31 ff 48 89 de e8 14 3a 6e f8 48 85 db RSP: 0018:ffffc90000d47d18 EFLAGS: 00000293 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff8880115c3780 RSI: ffffffff89052537 RDI: 0000000000000000 RBP: ffff888141127064 R08: 0000000000000001 R09: 0000000000000001 R10: ffffffff81794168 R11: 0000000000000000 R12: 0000000000000001 R13: ffff888141127000 R14: ffff888141127064 R15: ffff888143331804 acpi_idle_enter+0x361/0x500 drivers/acpi/processor_idle.c:647 cpuidle_enter_state+0x1b1/0xc80 drivers/cpuidle/cpuidle.c:237 cpuidle_enter+0x4a/0xa0 drivers/cpuidle/cpuidle.c:351 call_cpuidle kernel/sched/idle.c:158 [inline] cpuidle_idle_call kernel/sched/idle.c:239 [inline] do_idle+0x3e1/0x590 kernel/sched/idle.c:300 cpu_startup_entry+0x14/0x20 kernel/sched/idle.c:397 start_secondary+0x274/0x350 arch/x86/kernel/smpboot.c:272 secondary_startup_64_no_verify+0xb0/0xbb which is due to the driver doing poll_wait() twice on the same wait_queue_head. That is perfectly valid, but from checking the rest of the kernel tree, it's the only driver that does this. We can handle this just fine, we just need to ignore the second addition as we'll get woken just fine on the first one. Cc: stable@vger.kernel.org # 5.8+ Fixes: 18bceab101ad ("io_uring: allow POLL_ADD with double poll_wait() users") Reported-by: syzbot+28abd693db9e92c160d8@syzkaller.appspotmail.com Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index d0172cc4f642..691c99869143 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5083,6 +5083,9 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, pt->error = -EINVAL; return; } + /* double add on the same waitqueue head, ignore */ + if (poll->head == head) + return; poll = kmalloc(sizeof(*poll), GFP_ATOMIC); if (!poll) { pt->error = -ENOMEM; From 7bda53f46387bc2b40a80befb1a5648dd9840e9c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 23 Feb 2021 21:21:20 +0100 Subject: [PATCH 19/43] dm bufio: subtract the number of initial sectors in dm_bufio_get_device_size commit a14e5ec66a7a66e57b24e2469f9212a78460207e upstream. dm_bufio_get_device_size returns the device size in blocks. Before returning the value, we must subtract the nubmer of starting sectors. The number of starting sectors may not be divisible by block size. Note that currently, no target is using dm_bufio_set_sector_offset and dm_bufio_get_device_size simultaneously, so this change has no effect. However, an upcoming dm-verity-fec fix needs this change. Signed-off-by: Mikulas Patocka Reviewed-by: Milan Broz Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-bufio.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index fce4cbf9529d..50f3e673729c 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1526,6 +1526,10 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) { sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT; + if (s >= c->start) + s -= c->start; + else + s = 0; if (likely(c->sectors_per_block_bits >= 0)) s >>= c->sectors_per_block_bits; else From ce1cca17381f9395c2b27d24fcfe553efa0bf466 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Tue, 23 Feb 2021 21:21:21 +0100 Subject: [PATCH 20/43] dm verity: fix FEC for RS roots unaligned to block size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit df7b59ba9245c4a3115ebaa905e3e5719a3810da upstream. Optional Forward Error Correction (FEC) code in dm-verity uses Reed-Solomon code and should support roots from 2 to 24. The error correction parity bytes (of roots lengths per RS block) are stored on a separate device in sequence without any padding. Currently, to access FEC device, the dm-verity-fec code uses dm-bufio client with block size set to verity data block (usually 4096 or 512 bytes). Because this block size is not divisible by some (most!) of the roots supported lengths, data repair cannot work for partially stored parity bytes. This fix changes FEC device dm-bufio block size to "roots << SECTOR_SHIFT" where we can be sure that the full parity data is always available. (There cannot be partial FEC blocks because parity must cover whole sectors.) Because the optional FEC starting offset could be unaligned to this new block size, we have to use dm_bufio_set_sector_offset() to configure it. The problem is easily reproduced using veritysetup, e.g. for roots=13: # create verity device with RS FEC dd if=/dev/urandom of=data.img bs=4096 count=8 status=none veritysetup format data.img hash.img --fec-device=fec.img --fec-roots=13 | awk '/^Root hash/{ print $3 }' >roothash # create an erasure that should be always repairable with this roots setting dd if=/dev/zero of=data.img conv=notrunc bs=1 count=8 seek=4088 status=none # try to read it through dm-verity veritysetup open data.img test hash.img --fec-device=fec.img --fec-roots=13 $(cat roothash) dd if=/dev/mapper/test of=/dev/null bs=4096 status=noxfer # wait for possible recursive recovery in kernel udevadm settle veritysetup close test With this fix, errors are properly repaired. device-mapper: verity-fec: 7:1: FEC 0: corrected 8 errors ... Without it, FEC code usually ends on unrecoverable failure in RS decoder: device-mapper: verity-fec: 7:1: FEC 0: failed to correct: -74 ... This problem is present in all kernels since the FEC code's introduction (kernel 4.5). It is thought that this problem is not visible in Android ecosystem because it always uses a default RS roots=2. Depends-on: a14e5ec66a7a ("dm bufio: subtract the number of initial sectors in dm_bufio_get_device_size") Signed-off-by: Milan Broz Tested-by: Jérôme Carretero Reviewed-by: Sami Tolvanen Cc: stable@vger.kernel.org # 4.5+ Signed-off-by: Mike Snitzer Signed-off-by: Greg Kroah-Hartman --- drivers/md/dm-verity-fec.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index fb41b4f23c48..66f4c6398f67 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -61,19 +61,18 @@ static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio, static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, unsigned *offset, struct dm_buffer **buf) { - u64 position, block; + u64 position, block, rem; u8 *res; position = (index + rsb) * v->fec->roots; - block = position >> v->data_dev_block_bits; - *offset = (unsigned)(position - (block << v->data_dev_block_bits)); + block = div64_u64_rem(position, v->fec->roots << SECTOR_SHIFT, &rem); + *offset = (unsigned)rem; - res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf); + res = dm_bufio_read(v->fec->bufio, block, buf); if (IS_ERR(res)) { DMERR("%s: FEC %llu: parity read failed (block %llu): %ld", v->data_dev->name, (unsigned long long)rsb, - (unsigned long long)(v->fec->start + block), - PTR_ERR(res)); + (unsigned long long)block, PTR_ERR(res)); *buf = NULL; } @@ -155,7 +154,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, /* read the next block when we run out of parity bytes */ offset += v->fec->roots; - if (offset >= 1 << v->data_dev_block_bits) { + if (offset >= v->fec->roots << SECTOR_SHIFT) { dm_bufio_release(buf); par = fec_read_parity(v, rsb, block_offset, &offset, &buf); @@ -674,7 +673,7 @@ int verity_fec_ctr(struct dm_verity *v) { struct dm_verity_fec *f = v->fec; struct dm_target *ti = v->ti; - u64 hash_blocks; + u64 hash_blocks, fec_blocks; int ret; if (!verity_fec_is_enabled(v)) { @@ -744,15 +743,17 @@ int verity_fec_ctr(struct dm_verity *v) } f->bufio = dm_bufio_client_create(f->dev->bdev, - 1 << v->data_dev_block_bits, + f->roots << SECTOR_SHIFT, 1, 0, NULL, NULL); if (IS_ERR(f->bufio)) { ti->error = "Cannot initialize FEC bufio client"; return PTR_ERR(f->bufio); } - if (dm_bufio_get_device_size(f->bufio) < - ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) { + dm_bufio_set_sector_offset(f->bufio, f->start << (v->data_dev_block_bits - SECTOR_SHIFT)); + + fec_blocks = div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT); + if (dm_bufio_get_device_size(f->bufio) < fec_blocks) { ti->error = "FEC device is too small"; return -E2BIG; } From c2fee74ea801ad847589fac0d9ca3ba905579d95 Mon Sep 17 00:00:00 2001 From: "Asher.Song" Date: Wed, 24 Feb 2021 18:41:34 +0800 Subject: [PATCH 21/43] drm/amdgpu:disable VCN for Navi12 SKU commit 0c61ac8134ffc851681ce5d4bd60d97c3d5aed27 upstream. Navi12 0x7360/C7 SKU has no video support, so remove it. Reviewed-by: Guchun Chen Signed-off-by: Asher.Song Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/nv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 8eeba8096493..0af4774ddd61 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -459,7 +459,8 @@ static bool nv_is_headless_sku(struct pci_dev *pdev) { if ((pdev->device == 0x731E && (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || - (pdev->device == 0x7340 && pdev->revision == 0xC9)) + (pdev->device == 0x7340 && pdev->revision == 0xC9) || + (pdev->device == 0x7360 && pdev->revision == 0xC7)) return true; return false; } @@ -524,7 +525,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + if (!nv_is_headless_sku(adev->pdev)) + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); break; From 3868a277e6fcd36eeb175ee43773f24388417fa1 Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Tue, 2 Mar 2021 15:54:00 +0800 Subject: [PATCH 22/43] drm/amdgpu: fix parameter error of RREG32_PCIE() in amdgpu_regs_pcie commit 1aa46901ee51c1c5779b3b239ea0374a50c6d9ff upstream. the register offset isn't needed division by 4 to pass RREG32_PCIE() Signed-off-by: Kevin Wang Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 2d125b8b15ee..00a190929b55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -355,7 +355,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, while (size) { uint32_t value; - value = RREG32_PCIE(*pos >> 2); + value = RREG32_PCIE(*pos); r = put_user(value, (uint32_t *)buf); if (r) { pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -422,7 +422,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user return r; } - WREG32_PCIE(*pos >> 2, value); + WREG32_PCIE(*pos, value); result += 4; buf += 4; From 85433307d8a18e9a20ec4d12f0f5c9f20a8f4e17 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 13 Jan 2021 10:11:35 +0100 Subject: [PATCH 23/43] crypto - shash: reduce minimum alignment of shash_desc structure commit 660d2062190db131d2feaf19914e90f868fe285c upstream. Unlike many other structure types defined in the crypto API, the 'shash_desc' structure is permitted to live on the stack, which implies its contents may not be accessed by DMA masters. (This is due to the fact that the stack may be located in the vmalloc area, which requires a different virtual-to-physical translation than the one implemented by the DMA subsystem) Our definition of CRYPTO_MINALIGN_ATTR is based on ARCH_KMALLOC_MINALIGN, which may take DMA constraints into account on architectures that support non-cache coherent DMA such as ARM and arm64. In this case, the value is chosen to reflect the largest cacheline size in the system, in order to ensure that explicit cache maintenance as required by non-coherent DMA masters does not affect adjacent, unrelated slab allocations. On arm64, this value is currently set at 128 bytes. This means that applying CRYPTO_MINALIGN_ATTR to struct shash_desc is both unnecessary (as it is never used for DMA), and undesirable, given that it wastes stack space (on arm64, performing the alignment costs 112 bytes in the worst case, and the hole between the 'tfm' and '__ctx' members takes up another 120 bytes, resulting in an increased stack footprint of up to 232 bytes.) So instead, let's switch to the minimum SLAB alignment, which does not take DMA constraints into account. Note that this is a no-op for x86. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- include/crypto/hash.h | 8 ++++---- include/linux/crypto.h | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index af2ff31ff619..13f8a6a54ca8 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -149,7 +149,7 @@ struct ahash_alg { struct shash_desc { struct crypto_shash *tfm; - void *__ctx[] CRYPTO_MINALIGN_ATTR; + void *__ctx[] __aligned(ARCH_SLAB_MINALIGN); }; #define HASH_MAX_DIGESTSIZE 64 @@ -162,9 +162,9 @@ struct shash_desc { #define HASH_MAX_STATESIZE 512 -#define SHASH_DESC_ON_STACK(shash, ctx) \ - char __##shash##_desc[sizeof(struct shash_desc) + \ - HASH_MAX_DESCSIZE] CRYPTO_MINALIGN_ATTR; \ +#define SHASH_DESC_ON_STACK(shash, ctx) \ + char __##shash##_desc[sizeof(struct shash_desc) + HASH_MAX_DESCSIZE] \ + __aligned(__alignof__(struct shash_desc)); \ struct shash_desc *shash = (struct shash_desc *)__##shash##_desc /** diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ef90e07c9635..e3abd1f8646a 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -151,9 +151,12 @@ * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual * declaration) is used to ensure that the crypto_tfm context structure is * aligned correctly for the given architecture so that there are no alignment - * faults for C data types. In particular, this is required on platforms such - * as arm where pointers are 32-bit aligned but there are data types such as - * u64 which require 64-bit alignment. + * faults for C data types. On architectures that support non-cache coherent + * DMA, such as ARM or arm64, it also takes into account the minimal alignment + * that is required to ensure that the context struct member does not share any + * cachelines with the rest of the struct. This is needed to ensure that cache + * maintenance for non-coherent DMA (cache invalidation in particular) does not + * affect data that may be accessed by the CPU concurrently. */ #define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN From 407b173adfacea0dfd2474ff349d81abc6538173 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 3 Mar 2021 15:33:13 +0800 Subject: [PATCH 24/43] arm64: mm: Move reserve_crashkernel() into mem_init() commit 0a30c53573b07d5561457e41fb0ab046cd857da5 upstream crashkernel might reserve memory located in ZONE_DMA. We plan to delay ZONE_DMA's initialization after unflattening the devicetree and ACPI's boot table initialization, so move it later in the boot process. Specifically into bootmem_init() since request_standard_resources() depends on it. Signed-off-by: Nicolas Saenz Julienne Tested-by: Jeremy Linton Link: https://lore.kernel.org/r/20201119175400.9995-2-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas Cc: Signed-off-by: Jing Xiangfeng Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 00576a960f11..686653e33250 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -386,8 +386,6 @@ void __init arm64_memblock_init(void) else arm64_dma32_phys_limit = PHYS_MASK + 1; - reserve_crashkernel(); - reserve_elfcorehdr(); high_memory = __va(memblock_end_of_DRAM() - 1) + 1; @@ -427,6 +425,12 @@ void __init bootmem_init(void) sparse_init(); zone_sizes_init(min, max); + /* + * request_standard_resources() depends on crashkernel's memory being + * reserved, so do it here. + */ + reserve_crashkernel(); + memblock_dump_all(); } From 3fbe62ffbb548cb1c857cffe936ef0b2771df578 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 3 Mar 2021 15:33:14 +0800 Subject: [PATCH 25/43] arm64: mm: Move zone_dma_bits initialization into zone_sizes_init() commit 9804f8c69b04a39d0ba41d19e6bdc6aa91c19725 upstream zone_dma_bits's initialization happens earlier that it's actually needed, in arm64_memblock_init(). So move it into the more suitable zone_sizes_init(). Signed-off-by: Nicolas Saenz Julienne Tested-by: Jeremy Linton Link: https://lore.kernel.org/r/20201119175400.9995-3-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas Cc: Signed-off-by: Jing Xiangfeng Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 686653e33250..7da912bf4222 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -190,6 +190,8 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; #ifdef CONFIG_ZONE_DMA + zone_dma_bits = ARM64_ZONE_DMA_BITS; + arm64_dma_phys_limit = max_zone_phys(zone_dma_bits); max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif #ifdef CONFIG_ZONE_DMA32 @@ -376,11 +378,6 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (IS_ENABLED(CONFIG_ZONE_DMA)) { - zone_dma_bits = ARM64_ZONE_DMA_BITS; - arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS); - } - if (IS_ENABLED(CONFIG_ZONE_DMA32)) arm64_dma32_phys_limit = max_zone_phys(32); else From 18bf6e998d08b943172aba8969b5f9b2122d17ae Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 3 Mar 2021 15:33:15 +0800 Subject: [PATCH 26/43] of/address: Introduce of_dma_get_max_cpu_address() commit 964db79d6c186cc2ecc6ae46f98eed7e0ea8cf71 upstream Introduce of_dma_get_max_cpu_address(), which provides the highest CPU physical address addressable by all DMA masters in the system. It's specially useful for setting memory zones sizes at early boot time. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201119175400.9995-4-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas Cc: Signed-off-by: Jing Xiangfeng Signed-off-by: Greg Kroah-Hartman --- drivers/of/address.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/linux/of.h | 7 +++++++ 2 files changed, 49 insertions(+) diff --git a/drivers/of/address.c b/drivers/of/address.c index 1c3257a2d4e3..73ddf2540f3f 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -1024,6 +1024,48 @@ out: } #endif /* CONFIG_HAS_DMA */ +/** + * of_dma_get_max_cpu_address - Gets highest CPU address suitable for DMA + * @np: The node to start searching from or NULL to start from the root + * + * Gets the highest CPU physical address that is addressable by all DMA masters + * in the sub-tree pointed by np, or the whole tree if NULL is passed. If no + * DMA constrained device is found, it returns PHYS_ADDR_MAX. + */ +phys_addr_t __init of_dma_get_max_cpu_address(struct device_node *np) +{ + phys_addr_t max_cpu_addr = PHYS_ADDR_MAX; + struct of_range_parser parser; + phys_addr_t subtree_max_addr; + struct device_node *child; + struct of_range range; + const __be32 *ranges; + u64 cpu_end = 0; + int len; + + if (!np) + np = of_root; + + ranges = of_get_property(np, "dma-ranges", &len); + if (ranges && len) { + of_dma_range_parser_init(&parser, np); + for_each_of_range(&parser, &range) + if (range.cpu_addr + range.size > cpu_end) + cpu_end = range.cpu_addr + range.size - 1; + + if (max_cpu_addr > cpu_end) + max_cpu_addr = cpu_end; + } + + for_each_available_child_of_node(np, child) { + subtree_max_addr = of_dma_get_max_cpu_address(child); + if (max_cpu_addr > subtree_max_addr) + max_cpu_addr = subtree_max_addr; + } + + return max_cpu_addr; +} + /** * of_dma_is_coherent - Check if device is coherent * @np: device node diff --git a/include/linux/of.h b/include/linux/of.h index af655d264f10..0f4e81e6fb23 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -558,6 +558,8 @@ int of_map_id(struct device_node *np, u32 id, const char *map_name, const char *map_mask_name, struct device_node **target, u32 *id_out); +phys_addr_t of_dma_get_max_cpu_address(struct device_node *np); + #else /* CONFIG_OF */ static inline void of_core_init(void) @@ -995,6 +997,11 @@ static inline int of_map_id(struct device_node *np, u32 id, return -EINVAL; } +static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np) +{ + return PHYS_ADDR_MAX; +} + #define of_match_ptr(_ptr) NULL #define of_match_node(_matches, _node) NULL #endif /* CONFIG_OF */ From a9861e7fa4f81f613d20c9ab4285fbb43a7d2603 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 3 Mar 2021 15:33:16 +0800 Subject: [PATCH 27/43] of: unittest: Add test for of_dma_get_max_cpu_address() commit 07d13a1d6120d453c3c1f020578693d072deded5 upstream Introduce a test for of_dma_get_max_cup_address(), it uses the same DT data as the rest of dma-ranges unit tests. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20201119175400.9995-5-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas Cc: Signed-off-by: Jing Xiangfeng Signed-off-by: Greg Kroah-Hartman --- drivers/of/unittest.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 06cc988faf78..98cc0163301b 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -869,6 +869,23 @@ static void __init of_unittest_changeset(void) #endif } +static void __init of_unittest_dma_get_max_cpu_address(void) +{ + struct device_node *np; + phys_addr_t cpu_addr; + + np = of_find_node_by_path("/testcase-data/address-tests"); + if (!np) { + pr_err("missing testcase data\n"); + return; + } + + cpu_addr = of_dma_get_max_cpu_address(np); + unittest(cpu_addr == 0x4fffffff, + "of_dma_get_max_cpu_address: wrong CPU addr %pad (expecting %x)\n", + &cpu_addr, 0x4fffffff); +} + static void __init of_unittest_dma_ranges_one(const char *path, u64 expect_dma_addr, u64 expect_paddr) { @@ -3266,6 +3283,7 @@ static int __init of_unittest(void) of_unittest_changeset(); of_unittest_parse_interrupts(); of_unittest_parse_interrupts_extended(); + of_unittest_dma_get_max_cpu_address(); of_unittest_parse_dma_ranges(); of_unittest_pci_dma_ranges(); of_unittest_match_node(); From 35ec3d09ff6a49ee90e1bfd09166596f017eb5bb Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 3 Mar 2021 15:33:17 +0800 Subject: [PATCH 28/43] arm64: mm: Set ZONE_DMA size based on devicetree's dma-ranges commit 8424ecdde7df99d5426e1a1fd9f0fb36f4183032 upstream We recently introduced a 1 GB sized ZONE_DMA to cater for platforms incorporating masters that can address less than 32 bits of DMA, in particular the Raspberry Pi 4, which has 4 or 8 GB of DRAM, but has peripherals that can only address up to 1 GB (and its PCIe host bridge can only access the bottom 3 GB) The DMA layer also needs to be able to allocate memory that is guaranteed to meet those DMA constraints, for bounce buffering as well as allocating the backing for consistent mappings. This is why the 1 GB ZONE_DMA was introduced recently. Unfortunately, it turns out the having a 1 GB ZONE_DMA as well as a ZONE_DMA32 causes problems with kdump, and potentially in other places where allocations cannot cross zone boundaries. Therefore, we should avoid having two separate DMA zones when possible. So, with the help of of_dma_get_max_cpu_address() get the topmost physical address accessible to all DMA masters in system and use that information to fine-tune ZONE_DMA's size. In the absence of addressing limited masters ZONE_DMA will span the whole 32-bit address space, otherwise, in the case of the Raspberry Pi 4 it'll only span the 30-bit address space, and have ZONE_DMA32 cover the rest of the 32-bit address space. Signed-off-by: Nicolas Saenz Julienne Link: https://lore.kernel.org/r/20201119175400.9995-6-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas Cc: Signed-off-by: Jing Xiangfeng Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7da912bf4222..05a1c2773629 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -42,8 +42,6 @@ #include #include -#define ARM64_ZONE_DMA_BITS 30 - /* * We need to be able to catch inadvertent references to memstart_addr * that occur (potentially in generic code) before arm64_memblock_init() @@ -188,9 +186,11 @@ static phys_addr_t __init max_zone_phys(unsigned int zone_bits) static void __init zone_sizes_init(unsigned long min, unsigned long max) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; + unsigned int __maybe_unused dt_zone_dma_bits; #ifdef CONFIG_ZONE_DMA - zone_dma_bits = ARM64_ZONE_DMA_BITS; + dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL)); + zone_dma_bits = min(32U, dt_zone_dma_bits); arm64_dma_phys_limit = max_zone_phys(zone_dma_bits); max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif From 8eaef922e9381af7b923c4e3c57a632a87101acb Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 3 Mar 2021 15:33:18 +0800 Subject: [PATCH 29/43] arm64: mm: Set ZONE_DMA size based on early IORT scan commit 2b8652936f0ca9ca2e6c984ae76c7bfcda1b3f22 upstream We recently introduced a 1 GB sized ZONE_DMA to cater for platforms incorporating masters that can address less than 32 bits of DMA, in particular the Raspberry Pi 4, which has 4 or 8 GB of DRAM, but has peripherals that can only address up to 1 GB (and its PCIe host bridge can only access the bottom 3 GB) Instructing the DMA layer about these limitations is straight-forward, even though we had to fix some issues regarding memory limits set in the IORT for named components, and regarding the handling of ACPI _DMA methods. However, the DMA layer also needs to be able to allocate memory that is guaranteed to meet those DMA constraints, for bounce buffering as well as allocating the backing for consistent mappings. This is why the 1 GB ZONE_DMA was introduced recently. Unfortunately, it turns out the having a 1 GB ZONE_DMA as well as a ZONE_DMA32 causes problems with kdump, and potentially in other places where allocations cannot cross zone boundaries. Therefore, we should avoid having two separate DMA zones when possible. So let's do an early scan of the IORT, and only create the ZONE_DMA if we encounter any devices that need it. This puts the burden on the firmware to describe such limitations in the IORT, which may be redundant (and less precise) if _DMA methods are also being provided. However, it should be noted that this situation is highly unusual for arm64 ACPI machines. Also, the DMA subsystem still gives precedence to the _DMA method if implemented, and so we will not lose the ability to perform streaming DMA outside the ZONE_DMA if the _DMA method permits it. [nsaenz: unified implementation with DT's counterpart] Signed-off-by: Ard Biesheuvel Signed-off-by: Nicolas Saenz Julienne Tested-by: Jeremy Linton Acked-by: Lorenzo Pieralisi Acked-by: Hanjun Guo Cc: Jeremy Linton Cc: Lorenzo Pieralisi Cc: Nicolas Saenz Julienne Cc: Rob Herring Cc: Christoph Hellwig Cc: Robin Murphy Cc: Hanjun Guo Cc: Sudeep Holla Cc: Anshuman Khandual Link: https://lore.kernel.org/r/20201119175400.9995-7-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas Cc: Signed-off-by: Jing Xiangfeng Signed-off-by: Greg Kroah-Hartman --- arch/arm64/mm/init.c | 5 +++- drivers/acpi/arm64/iort.c | 55 +++++++++++++++++++++++++++++++++++++++ include/linux/acpi_iort.h | 4 +++ 3 files changed, 63 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 05a1c2773629..b913844ab740 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -186,11 +187,13 @@ static phys_addr_t __init max_zone_phys(unsigned int zone_bits) static void __init zone_sizes_init(unsigned long min, unsigned long max) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; + unsigned int __maybe_unused acpi_zone_dma_bits; unsigned int __maybe_unused dt_zone_dma_bits; #ifdef CONFIG_ZONE_DMA + acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address()); dt_zone_dma_bits = fls64(of_dma_get_max_cpu_address(NULL)); - zone_dma_bits = min(32U, dt_zone_dma_bits); + zone_dma_bits = min3(32U, dt_zone_dma_bits, acpi_zone_dma_bits); arm64_dma_phys_limit = max_zone_phys(zone_dma_bits); max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 94f34109695c..2494138a6905 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1730,3 +1730,58 @@ void __init acpi_iort_init(void) iort_init_platform_devices(); } + +#ifdef CONFIG_ZONE_DMA +/* + * Extract the highest CPU physical address accessible to all DMA masters in + * the system. PHYS_ADDR_MAX is returned when no constrained device is found. + */ +phys_addr_t __init acpi_iort_dma_get_max_cpu_address(void) +{ + phys_addr_t limit = PHYS_ADDR_MAX; + struct acpi_iort_node *node, *end; + struct acpi_table_iort *iort; + acpi_status status; + int i; + + if (acpi_disabled) + return limit; + + status = acpi_get_table(ACPI_SIG_IORT, 0, + (struct acpi_table_header **)&iort); + if (ACPI_FAILURE(status)) + return limit; + + node = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->node_offset); + end = ACPI_ADD_PTR(struct acpi_iort_node, iort, iort->header.length); + + for (i = 0; i < iort->node_count; i++) { + if (node >= end) + break; + + switch (node->type) { + struct acpi_iort_named_component *ncomp; + struct acpi_iort_root_complex *rc; + phys_addr_t local_limit; + + case ACPI_IORT_NODE_NAMED_COMPONENT: + ncomp = (struct acpi_iort_named_component *)node->node_data; + local_limit = DMA_BIT_MASK(ncomp->memory_address_limit); + limit = min_not_zero(limit, local_limit); + break; + + case ACPI_IORT_NODE_PCI_ROOT_COMPLEX: + if (node->revision < 1) + break; + + rc = (struct acpi_iort_root_complex *)node->node_data; + local_limit = DMA_BIT_MASK(rc->memory_address_limit); + limit = min_not_zero(limit, local_limit); + break; + } + node = ACPI_ADD_PTR(struct acpi_iort_node, node, node->length); + } + acpi_put_table(&iort->header); + return limit; +} +#endif diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 20a32120bb88..1a12baa58e40 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -38,6 +38,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size); const struct iommu_ops *iort_iommu_configure_id(struct device *dev, const u32 *id_in); int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head); +phys_addr_t acpi_iort_dma_get_max_cpu_address(void); #else static inline void acpi_iort_init(void) { } static inline u32 iort_msi_map_id(struct device *dev, u32 id) @@ -55,6 +56,9 @@ static inline const struct iommu_ops *iort_iommu_configure_id( static inline int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head) { return 0; } + +static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void) +{ return PHYS_ADDR_MAX; } #endif #endif /* __ACPI_IORT_H__ */ From 4d7ed9a49b0ce079af2255b0c49b0d4b5a5ceff0 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 3 Mar 2021 15:33:19 +0800 Subject: [PATCH 30/43] mm: Remove examples from enum zone_type comment commit 04435217f96869ac3a8f055ff68c5237a60bcd7e upstream We can't really list every setup in common code. On top of that they are unlikely to stay true for long as things change in the arch trees independently of this comment. Suggested-by: Christoph Hellwig Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20201119175400.9995-8-nsaenzjulienne@suse.de Signed-off-by: Catalin Marinas Cc: Signed-off-by: Jing Xiangfeng Signed-off-by: Greg Kroah-Hartman --- include/linux/mmzone.h | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fb3bf696c05e..9d0c454d23cd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -354,26 +354,6 @@ enum zone_type { * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit * platforms may need both zones as they support peripherals with * different DMA addressing limitations. - * - * Some examples: - * - * - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the - * rest of the lower 4G. - * - * - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on - * the specific device. - * - * - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the - * lower 4G. - * - * - powerpc only uses ZONE_DMA, the size, up to 2G, may vary - * depending on the specific device. - * - * - s390 uses ZONE_DMA fixed to the lower 2G. - * - * - ia64 and riscv only use ZONE_DMA32. - * - * - parisc uses neither. */ #ifdef CONFIG_ZONE_DMA ZONE_DMA, From 253b7a11ce26d30f375fb8b7d51e988bc2dc9cd0 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 27 Feb 2021 00:15:27 +0000 Subject: [PATCH 31/43] ALSA: ctxfi: cthw20k2: fix mask on conf to allow 4 bits [ Upstream commit 26a9630c72ebac7c564db305a6aee54a8edde70e ] Currently the mask operation on variable conf is just 3 bits so the switch statement case value of 8 is unreachable dead code. The function daio_mgr_dao_init can be passed a 4 bit value, function dao_rsc_init calls it with conf set to: conf = (desc->msr & 0x7) | (desc->passthru << 3); so clearly when desc->passthru is set to 1 then conf can be at least 8. Fix this by changing the mask to 0xf. Fixes: 8cc72361481f ("ALSA: SB X-Fi driver merge") Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20210227001527.1077484-1-colin.king@canonical.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/ctxfi/cthw20k2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c index fc1bc18caee9..85d1fc76f59e 100644 --- a/sound/pci/ctxfi/cthw20k2.c +++ b/sound/pci/ctxfi/cthw20k2.c @@ -991,7 +991,7 @@ static int daio_mgr_dao_init(void *blk, unsigned int idx, unsigned int conf) if (idx < 4) { /* S/PDIF output */ - switch ((conf & 0x7)) { + switch ((conf & 0xf)) { case 1: set_field(&ctl->txctl[idx], ATXCTL_NUC, 0); break; From 8850a814d42d2fb2b6414469fd2e3effcf9bafe9 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 1 Mar 2021 10:18:44 +0200 Subject: [PATCH 32/43] RDMA/cm: Fix IRQ restore in ib_send_cm_sidr_rep [ Upstream commit 221384df6123747d2a75517dd06cc01752f81518 ] ib_send_cm_sidr_rep() { spin_lock_irqsave() cm_send_sidr_rep_locked() { ... spin_lock_irq() .... spin_unlock_irq() <--- this will enable interrupts } spin_unlock_irqrestore() } spin_unlock_irqrestore() expects interrupts to be disabled but the internal spin_unlock_irq() will always enable hard interrupts. Fix this by replacing the internal spin_{lock,unlock}_irq() with irqsave/restore variants. It fixes the following kernel trace: raw_local_irq_restore() called with IRQs enabled WARNING: CPU: 2 PID: 20001 at kernel/locking/irqflag-debug.c:10 warn_bogus_irq_restore+0x1d/0x20 Call Trace: _raw_spin_unlock_irqrestore+0x4e/0x50 ib_send_cm_sidr_rep+0x3a/0x50 [ib_cm] cma_send_sidr_rep+0xa1/0x160 [rdma_cm] rdma_accept+0x25e/0x350 [rdma_cm] ucma_accept+0x132/0x1cc [rdma_ucm] ucma_write+0xbf/0x140 [rdma_ucm] vfs_write+0xc1/0x340 ksys_write+0xb3/0xe0 do_syscall_64+0x2d/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 87c4c774cbef ("RDMA/cm: Protect access to remote_sidr_table") Link: https://lore.kernel.org/r/20210301081844.445823-1-leon@kernel.org Signed-off-by: Saeed Mahameed Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/core/cm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 8e578f73a074..bbba0cd42c89 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3650,6 +3650,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param) { struct ib_mad_send_buf *msg; + unsigned long flags; int ret; lockdep_assert_held(&cm_id_priv->lock); @@ -3675,12 +3676,12 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, return ret; } cm_id_priv->id.state = IB_CM_IDLE; - spin_lock_irq(&cm.lock); + spin_lock_irqsave(&cm.lock, flags); if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); } - spin_unlock_irq(&cm.lock); + spin_unlock_irqrestore(&cm.lock, flags); return 0; } From f767104af4965608b03f51942d4661d9c0332335 Mon Sep 17 00:00:00 2001 From: Julian Braha Date: Fri, 19 Feb 2021 18:32:26 -0500 Subject: [PATCH 33/43] RDMA/rxe: Fix missing kconfig dependency on CRYPTO [ Upstream commit 475f23b8c66d2892ad6acbf90ed757cafab13de7 ] When RDMA_RXE is enabled and CRYPTO is disabled, Kbuild gives the following warning: WARNING: unmet direct dependencies detected for CRYPTO_CRC32 Depends on [n]: CRYPTO [=n] Selected by [y]: - RDMA_RXE [=y] && (INFINIBAND_USER_ACCESS [=y] || !INFINIBAND_USER_ACCESS [=y]) && INET [=y] && PCI [=y] && INFINIBAND [=y] && INFINIBAND_VIRT_DMA [=y] This is because RDMA_RXE selects CRYPTO_CRC32, without depending on or selecting CRYPTO, despite that config option being subordinate to CRYPTO. Fixes: cee2688e3cd6 ("IB/rxe: Offload CRC calculation when possible") Signed-off-by: Julian Braha Link: https://lore.kernel.org/r/21525878.NYvzQUHefP@ubuntu-mate-laptop Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/sw/rxe/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 452149066792..06b8dc5093f7 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -4,6 +4,7 @@ config RDMA_RXE depends on INET && PCI && INFINIBAND depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL + select CRYPTO select CRYPTO_CRC32 help This driver implements the InfiniBand RDMA transport over From 7ef9d23bc3121be3e77ed7fbcf9d3c9ecbf7005e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 22 Feb 2021 20:23:43 +0800 Subject: [PATCH 34/43] IB/mlx5: Add missing error code [ Upstream commit 3a9b3d4536e0c25bd3906a28c1f584177e49dd0f ] Set err to -ENOMEM if kzalloc fails instead of 0. Fixes: 759738537142 ("IB/mlx5: Enable subscription for device events over DEVX") Link: https://lore.kernel.org/r/20210222122343.19720-1-yuehaibing@huawei.com Signed-off-by: YueHaibing Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx5/devx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 26564e7d3457..efb9ec99b68b 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1973,8 +1973,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( num_alloc_xa_entries++; event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL); - if (!event_sub) + if (!event_sub) { + err = -ENOMEM; goto err; + } list_add_tail(&event_sub->event_list, &sub_list); uverbs_uobject_get(&ev_file->uobj); From a14c6ea662110daa866450b5a2df867185982803 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 1 Mar 2021 18:01:46 -0600 Subject: [PATCH 35/43] ALSA: hda: intel-nhlt: verify config type [ Upstream commit a864e8f159b13babf552aff14a5fbe11abc017e4 ] Multiple bug reports report issues with the SOF and SST drivers when dealing with single microphone cases. We currently read the DMIC array information unconditionally but we don't check that the configuration type is actually a mic array. When the DMIC link does not rely on a mic array configuration, the recommendation is to check the format information to infer the maximum number of channels, and map this to the number of microphones. This leaves a potential for a mismatch between actual microphones available in hardware and what the ACPI table contains, but we have no other source of information. Note that single microphone configurations can alternatively be handled with a 'mic array' configuration along with a 'vendor-defined' geometry. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201251 BugLink: https://github.com/thesofproject/linux/issues/2725 Fixes: 7a33ea70e1868 ('ALSA: hda: intel-nhlt: handle NHLT VENDOR_DEFINED DMIC geometry') Signed-off-by: Pierre-Louis Bossart Reviewed-by: Guennadi Liakhovetski Reviewed-by: Rander Wang Reviewed-by: Kai Vehmanen Link: https://lore.kernel.org/r/20210302000146.1177770-1-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- include/sound/intel-nhlt.h | 5 ++++ sound/hda/intel-nhlt.c | 54 +++++++++++++++++++++++++++++++------- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h index 743c2f442280..d0574805865f 100644 --- a/include/sound/intel-nhlt.h +++ b/include/sound/intel-nhlt.h @@ -112,6 +112,11 @@ struct nhlt_vendor_dmic_array_config { /* TODO add vendor mic config */ } __packed; +enum { + NHLT_CONFIG_TYPE_GENERIC = 0, + NHLT_CONFIG_TYPE_MIC_ARRAY = 1 +}; + enum { NHLT_MIC_ARRAY_2CH_SMALL = 0xa, NHLT_MIC_ARRAY_2CH_BIG = 0xb, diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 059aaf04f536..d053beccfaec 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -31,18 +31,44 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt) struct nhlt_endpoint *epnt; struct nhlt_dmic_array_config *cfg; struct nhlt_vendor_dmic_array_config *cfg_vendor; + struct nhlt_fmt *fmt_configs; unsigned int dmic_geo = 0; - u8 j; + u16 max_ch = 0; + u8 i, j; if (!nhlt) return 0; - epnt = (struct nhlt_endpoint *)nhlt->desc; + for (j = 0, epnt = nhlt->desc; j < nhlt->endpoint_count; j++, + epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length)) { - for (j = 0; j < nhlt->endpoint_count; j++) { - if (epnt->linktype == NHLT_LINK_DMIC) { - cfg = (struct nhlt_dmic_array_config *) - (epnt->config.caps); + if (epnt->linktype != NHLT_LINK_DMIC) + continue; + + cfg = (struct nhlt_dmic_array_config *)(epnt->config.caps); + fmt_configs = (struct nhlt_fmt *)(epnt->config.caps + epnt->config.size); + + /* find max number of channels based on format_configuration */ + if (fmt_configs->fmt_count) { + dev_dbg(dev, "%s: found %d format definitions\n", + __func__, fmt_configs->fmt_count); + + for (i = 0; i < fmt_configs->fmt_count; i++) { + struct wav_fmt_ext *fmt_ext; + + fmt_ext = &fmt_configs->fmt_config[i].fmt_ext; + + if (fmt_ext->fmt.channels > max_ch) + max_ch = fmt_ext->fmt.channels; + } + dev_dbg(dev, "%s: max channels found %d\n", __func__, max_ch); + } else { + dev_dbg(dev, "%s: No format information found\n", __func__); + } + + if (cfg->device_config.config_type != NHLT_CONFIG_TYPE_MIC_ARRAY) { + dmic_geo = max_ch; + } else { switch (cfg->array_type) { case NHLT_MIC_ARRAY_2CH_SMALL: case NHLT_MIC_ARRAY_2CH_BIG: @@ -59,13 +85,23 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt) dmic_geo = cfg_vendor->nb_mics; break; default: - dev_warn(dev, "undefined DMIC array_type 0x%0x\n", - cfg->array_type); + dev_warn(dev, "%s: undefined DMIC array_type 0x%0x\n", + __func__, cfg->array_type); + } + + if (dmic_geo > 0) { + dev_dbg(dev, "%s: Array with %d dmics\n", __func__, dmic_geo); + } + if (max_ch > dmic_geo) { + dev_dbg(dev, "%s: max channels %d exceed dmic number %d\n", + __func__, max_ch, dmic_geo); } } - epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); } + dev_dbg(dev, "%s: dmic number %d max_ch %d\n", + __func__, dmic_geo, max_ch); + return dmic_geo; } EXPORT_SYMBOL_GPL(intel_nhlt_get_dmic_geo); From e88c1b25f2bc9c1c9f5a847d9ce5338560c39445 Mon Sep 17 00:00:00 2001 From: Chen Jun Date: Mon, 22 Feb 2021 13:58:40 +0000 Subject: [PATCH 36/43] ftrace: Have recordmcount use w8 to read relp->r_info in arm64_is_fake_mcount [ Upstream commit 999340d51174ce4141dd723105d4cef872b13ee9 ] On little endian system, Use aarch64_be(gcc v7.3) downloaded from linaro.org to build image with CONFIG_CPU_BIG_ENDIAN = y, CONFIG_FTRACE = y, CONFIG_DYNAMIC_FTRACE = y. gcc will create symbols of _mcount but recordmcount can not create mcount_loc for *.o. aarch64_be-linux-gnu-objdump -r fs/namei.o | grep mcount 00000000000000d0 R_AARCH64_CALL26 _mcount ... 0000000000007190 R_AARCH64_CALL26 _mcount The reason is than funciton arm64_is_fake_mcount can not work correctly. A symbol of _mcount in *.o compiled with big endian compiler likes: 00 00 00 2d 00 00 01 1b w(rp->r_info) will return 0x2d instead of 0x011b. Because w() takes uint32_t as parameter, which truncates rp->r_info. Use w8() instead w() to read relp->r_info Link: https://lkml.kernel.org/r/20210222135840.56250-1-chenjun102@huawei.com Fixes: ea0eada45632 ("recordmcount: only record relocation of type R_AARCH64_CALL26 on arm64.") Acked-by: Will Deacon Signed-off-by: Chen Jun Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Sasha Levin --- scripts/recordmcount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index b9c2ee7ab43f..cce12e1971d8 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -438,7 +438,7 @@ static int arm_is_fake_mcount(Elf32_Rel const *rp) static int arm64_is_fake_mcount(Elf64_Rel const *rp) { - return ELF64_R_TYPE(w(rp->r_info)) != R_AARCH64_CALL26; + return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26; } /* 64-bit EM_MIPS has weird ELF64_Rela.r_info. From 5cc0813e7bb991781e6795d6faee5b30722047d5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Mar 2021 13:59:12 +0300 Subject: [PATCH 37/43] rsxx: Return -EFAULT if copy_to_user() fails [ Upstream commit 77516d25f54912a7baedeeac1b1b828b6f285152 ] The copy_to_user() function returns the number of bytes remaining but we want to return -EFAULT to the user if it can't complete the copy. The "st" variable only holds zero on success or negative error codes on failure so the type should be int. Fixes: 36f988e978f8 ("rsxx: Adding in debugfs entries.") Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/rsxx/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 63f549889f87..5ac1881396af 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -165,15 +165,17 @@ static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf, { struct rsxx_cardinfo *card = file_inode(fp)->i_private; char *buf; - ssize_t st; + int st; buf = kzalloc(cnt, GFP_KERNEL); if (!buf) return -ENOMEM; st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1); - if (!st) - st = copy_to_user(ubuf, buf, cnt); + if (!st) { + if (copy_to_user(ubuf, buf, cnt)) + st = -EFAULT; + } kfree(buf); if (st) return st; From a7cefacccafa5d099e6ff3c9b31056f863a1f58f Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 27 Feb 2021 15:39:09 +0800 Subject: [PATCH 38/43] iommu/vt-d: Fix status code for Allocate/Free PASID command [ Upstream commit 444d66a23c1f1e4c4d12aed4812681d0ad835d60 ] As per Intel vt-d spec, Rev 3.0 (section 10.4.45 "Virtual Command Response Register"), the status code of "No PASID available" error in response to the Allocate PASID command is 2, not 1. The same for "Invalid PASID" error in response to the Free PASID command. We will otherwise see confusing kernel log under the command failure from guest side. Fix it. Fixes: 24f27d32ab6b ("iommu/vt-d: Enlightened PASID allocation") Signed-off-by: Zenghui Yu Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210227073909.432-1-yuzenghui@huawei.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel/pasid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 97dfcffbf495..444c0bec221a 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -30,8 +30,8 @@ #define VCMD_VRSP_IP 0x1 #define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3) #define VCMD_VRSP_SC_SUCCESS 0 -#define VCMD_VRSP_SC_NO_PASID_AVAIL 1 -#define VCMD_VRSP_SC_INVALID_PASID 1 +#define VCMD_VRSP_SC_NO_PASID_AVAIL 2 +#define VCMD_VRSP_SC_INVALID_PASID 2 #define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff) #define VCMD_CMD_OPERAND(e) ((e) << 8) /* From b70e6aacbe3207abfc32e923036bd2667f96aee4 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 26 Jan 2021 09:09:51 +0100 Subject: [PATCH 39/43] Revert "arm64: dts: amlogic: add missing ethernet reset ID" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 19f6fe976a61f9afc289b062b7ef67f99b72e7b9 upstream. It has been reported on IRC and in KernelCI boot tests, this change breaks internal PHY support on the Amlogic G12A/SM1 Based boards. We suspect the added signal to reset more than the Ethernet MAC but also the MDIO/(RG)MII mux used to redirect the MAC signals to the internal PHY. This reverts commit f3362f0c18174a1f334a419ab7d567a36bd1b3f3 while we find and acceptable solution to cleanly reset the Ethernet MAC. Reported-by: Corentin Labbe Acked-by: Jérôme Brunet Signed-off-by: Neil Armstrong Signed-off-by: Kevin Hilman Link: https://lore.kernel.org/r/20210126080951.2383740-1-narmstrong@baylibre.com Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/amlogic/meson-axg.dtsi | 2 -- arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi | 2 -- arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 3 --- 3 files changed, 7 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index 724ee179b316..fae48efae83e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -227,8 +227,6 @@ "timing-adjustment"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; - resets = <&reset RESET_ETHERNET>; - reset-names = "stmmaceth"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index a6127002573b..959b299344e5 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -224,8 +224,6 @@ "timing-adjustment"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; - resets = <&reset RESET_ETHERNET>; - reset-names = "stmmaceth"; status = "disabled"; mdio0: mdio { diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 726b91d3a905..0edd137151f8 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -13,7 +13,6 @@ #include #include #include -#include #include / { @@ -576,8 +575,6 @@ interrupt-names = "macirq"; rx-fifo-depth = <4096>; tx-fifo-depth = <2048>; - resets = <&reset RESET_ETHERNET>; - reset-names = "stmmaceth"; power-domains = <&pwrc PWRC_GXBB_ETHERNET_MEM_ID>; status = "disabled"; }; From 4645271c32b440d1e0031e979a39dad46f396401 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 1 Dec 2020 12:47:25 +0000 Subject: [PATCH 40/43] of: unittest: Fix build on architectures without CONFIG_OF_ADDRESS commit aed5041ef9a3f594ed9dc0bb5ee7e1bbccfd3366 upstream. of_dma_get_max_cpu_address() is not defined if !CONFIG_OF_ADDRESS, so return early in of_unittest_dma_get_max_cpu_address(). Fixes: 07d13a1d6120 ("of: unittest: Add test for of_dma_get_max_cpu_address()") Reported-by: kernel test robot Signed-off-by: Catalin Marinas Cc: Jing Xiangfeng Signed-off-by: Greg Kroah-Hartman --- drivers/of/unittest.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 98cc0163301b..eb51bc147440 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -874,6 +874,9 @@ static void __init of_unittest_dma_get_max_cpu_address(void) struct device_node *np; phys_addr_t cpu_addr; + if (!IS_ENABLED(CONFIG_OF_ADDRESS)) + return; + np = of_find_node_by_path("/testcase-data/address-tests"); if (!np) { pr_err("missing testcase data\n"); From aa40f5e33ce337a037ec06cc11aae49be64c306c Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 1 Feb 2021 11:53:05 +0900 Subject: [PATCH 41/43] tomoyo: recognize kernel threads correctly commit 9c83465f3245c2faa82ffeb7016f40f02bfaa0ad upstream. Commit db68ce10c4f0a27c ("new helper: uaccess_kernel()") replaced segment_eq(get_fs(), KERNEL_DS) with uaccess_kernel(). But the correct method for tomoyo to check whether current is a kernel thread in order to assume that kernel threads are privileged for socket operations was (current->flags & PF_KTHREAD). Now that uaccess_kernel() became 0 on x86, tomoyo has to fix this problem. Do like commit 942cb357ae7d9249 ("Smack: Handle io_uring kernel thread privileges") does. Signed-off-by: Tetsuo Handa Signed-off-by: Greg Kroah-Hartman --- security/tomoyo/network.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/tomoyo/network.c b/security/tomoyo/network.c index a89ed55d85d4..478f757ff843 100644 --- a/security/tomoyo/network.c +++ b/security/tomoyo/network.c @@ -613,7 +613,7 @@ static int tomoyo_check_unix_address(struct sockaddr *addr, static bool tomoyo_kernel_service(void) { /* Nothing to do if I am a kernel service. */ - return uaccess_kernel(); + return (current->flags & (PF_KTHREAD | PF_IO_WORKER)) == PF_KTHREAD; } /** From b31a9f0c366d7bf2d6d696c99919e603639a9f0a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 14 Feb 2021 17:38:30 +0100 Subject: [PATCH 42/43] r8169: fix resuming from suspend on RTL8105e if machine runs on battery commit d2a04370817fc7b0172dad2ef2decf907e1a304e upstream. Armin reported that after referenced commit his RTL8105e is dead when resuming from suspend and machine runs on battery. This patch has been confirmed to fix the issue. Fixes: e80bd76fbf56 ("r8169: work around power-saving bug on some chip versions") Reported-by: Armin Wolf Tested-by: Armin Wolf Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index cfcc3ac61318..b7d5eaa70a67 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2244,6 +2244,7 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_29 ... RTL_GIGA_MAC_VER_30: case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: @@ -2271,6 +2272,7 @@ static void rtl_pll_power_up(struct rtl8169_private *tp) { switch (tp->mac_version) { case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_29 ... RTL_GIGA_MAC_VER_30: case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: From b672142f76cb904067e3ac6d04002cac3a976f46 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 9 Mar 2021 11:11:15 +0100 Subject: [PATCH 43/43] Linux 5.10.22 Tested-by: Jon Hunter Tested-by: Florian Fainelli Tested-by: Jason Self Tested-by: Pavel Machek (CIP) Tested-by: Guenter Roeck Tested-by: Hulk Robot Tested-by: Linux Kernel Functional Testing Link: https://lore.kernel.org/r/20210308122718.120213856@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 98ae9007e8a5..12a2a7271fcb 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 21 +SUBLEVEL = 22 EXTRAVERSION = NAME = Dare mighty things