From c956b92ee19b978bd8da50e30271a0a4c62bea28 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 1 Apr 2022 19:18:36 +0200 Subject: [PATCH 01/26] ata: pata_mpc52xx: Prepare cleanup of powerpc's asm/prom.h powerpc's asm/prom.h brings some headers that it doesn't need itself. In order to clean it up, first add missing headers in users of asm/prom.h Signed-off-by: Christophe Leroy Reviewed-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_mpc52xx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/ata/pata_mpc52xx.c b/drivers/ata/pata_mpc52xx.c index 3250ef317df6..03b6ae37a578 100644 --- a/drivers/ata/pata_mpc52xx.c +++ b/drivers/ata/pata_mpc52xx.c @@ -19,11 +19,12 @@ #include #include #include +#include +#include #include #include #include -#include #include #include From 4f1a22ee7b576a38dc5705837c9b0de0c7b5b064 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 8 Apr 2022 17:04:12 +0800 Subject: [PATCH 02/26] libata: Improve ATA queued command allocation Improve ATA queued command allocation as follows: - For attaining a qc tag for a SAS host we need to allocate a bit in ata_port.sas_tag_allocated bitmap. However we already have a unique tag per device in range [0, ATA_MAX_QUEUE -1] in the scsi cmnd budget token, so just use that instead. - It is a bit pointless to have ata_qc_new_init() in libata-core.c since it pokes scsi internals, so inline it in ata_scsi_qc_new() (in libata-scsi.c). Also update Doc accordingly. - Use standard SCSI helpers set_host_byte() and set_status_byte() in ata_scsi_qc_new(). Christoph Hellwig originally contributed the change to inline ata_qc_new_init(). Signed-off-by: John Garry Reviewed-by: Christoph Hellwig Signed-off-by: Damien Le Moal --- Documentation/driver-api/libata.rst | 11 ------- drivers/ata/libata-core.c | 48 +---------------------------- drivers/ata/libata-sata.c | 25 --------------- drivers/ata/libata-scsi.c | 46 ++++++++++++++++++++------- drivers/ata/libata.h | 13 -------- include/linux/libata.h | 1 - 6 files changed, 36 insertions(+), 108 deletions(-) diff --git a/Documentation/driver-api/libata.rst b/Documentation/driver-api/libata.rst index d477e296bda5..311af516a3fd 100644 --- a/Documentation/driver-api/libata.rst +++ b/Documentation/driver-api/libata.rst @@ -424,12 +424,6 @@ How commands are issued ----------------------- Internal commands - First, qc is allocated and initialized using :c:func:`ata_qc_new_init`. - Although :c:func:`ata_qc_new_init` doesn't implement any wait or retry - mechanism when qc is not available, internal commands are currently - issued only during initialization and error recovery, so no other - command is active and allocation is guaranteed to succeed. - Once allocated qc's taskfile is initialized for the command to be executed. qc currently has two mechanisms to notify completion. One is via ``qc->complete_fn()`` callback and the other is completion @@ -447,11 +441,6 @@ SCSI commands translated. No qc is involved in processing a simulated scmd. The result is computed right away and the scmd is completed. - For a translated scmd, :c:func:`ata_qc_new_init` is invoked to allocate a - qc and the scmd is translated into the qc. SCSI midlayer's - completion notification function pointer is stored into - ``qc->scsidone``. - ``qc->complete_fn()`` callback is used for completion notification. ATA commands use :c:func:`ata_scsi_qc_complete` while ATAPI commands use :c:func:`atapi_qc_complete`. Both functions end up calling ``qc->scsidone`` diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index ca64837641be..23d1dc5faf73 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4566,42 +4566,6 @@ void swap_buf_le16(u16 *buf, unsigned int buf_words) #endif /* __BIG_ENDIAN */ } -/** - * ata_qc_new_init - Request an available ATA command, and initialize it - * @dev: Device from whom we request an available command structure - * @tag: tag - * - * LOCKING: - * None. - */ - -struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) -{ - struct ata_port *ap = dev->link->ap; - struct ata_queued_cmd *qc; - - /* no command while frozen */ - if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) - return NULL; - - /* libsas case */ - if (ap->flags & ATA_FLAG_SAS_HOST) { - tag = ata_sas_allocate_tag(ap); - if (tag < 0) - return NULL; - } - - qc = __ata_qc_from_tag(ap, tag); - qc->tag = qc->hw_tag = tag; - qc->scsicmd = NULL; - qc->ap = ap; - qc->dev = dev; - - ata_qc_reinit(qc); - - return qc; -} - /** * ata_qc_free - free unused ata_queued_cmd * @qc: Command to complete @@ -4614,19 +4578,9 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag) */ void ata_qc_free(struct ata_queued_cmd *qc) { - struct ata_port *ap; - unsigned int tag; - - WARN_ON_ONCE(qc == NULL); /* ata_qc_from_tag _might_ return NULL */ - ap = qc->ap; - qc->flags = 0; - tag = qc->tag; - if (ata_tag_valid(tag)) { + if (ata_tag_valid(qc->tag)) qc->tag = ATA_TAG_POISON; - if (ap->flags & ATA_FLAG_SAS_HOST) - ata_sas_free_tag(tag, ap); - } } void __ata_qc_complete(struct ata_queued_cmd *qc) diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 044a16daa2d4..7a5fe41aa5ae 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1268,31 +1268,6 @@ int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap) } EXPORT_SYMBOL_GPL(ata_sas_queuecmd); -int ata_sas_allocate_tag(struct ata_port *ap) -{ - unsigned int max_queue = ap->host->n_tags; - unsigned int i, tag; - - for (i = 0, tag = ap->sas_last_tag + 1; i < max_queue; i++, tag++) { - tag = tag < max_queue ? tag : 0; - - /* the last tag is reserved for internal command. */ - if (ata_tag_internal(tag)) - continue; - - if (!test_and_set_bit(tag, &ap->sas_tag_allocated)) { - ap->sas_last_tag = tag; - return tag; - } - } - return -1; -} - -void ata_sas_free_tag(unsigned int tag, struct ata_port *ap) -{ - clear_bit(tag, &ap->sas_tag_allocated); -} - /** * sata_async_notification - SATA async notification handler * @ap: ATA port where async notification is received diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 06c9d90238d9..42cecf95a4e5 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -638,24 +638,48 @@ EXPORT_SYMBOL_GPL(ata_scsi_ioctl); static struct ata_queued_cmd *ata_scsi_qc_new(struct ata_device *dev, struct scsi_cmnd *cmd) { + struct ata_port *ap = dev->link->ap; struct ata_queued_cmd *qc; + int tag; - qc = ata_qc_new_init(dev, scsi_cmd_to_rq(cmd)->tag); - if (qc) { - qc->scsicmd = cmd; - qc->scsidone = scsi_done; + if (unlikely(ap->pflags & ATA_PFLAG_FROZEN)) + goto fail; - qc->sg = scsi_sglist(cmd); - qc->n_elem = scsi_sg_count(cmd); - - if (scsi_cmd_to_rq(cmd)->rq_flags & RQF_QUIET) - qc->flags |= ATA_QCFLAG_QUIET; + if (ap->flags & ATA_FLAG_SAS_HOST) { + /* + * SAS hosts may queue > ATA_MAX_QUEUE commands so use + * unique per-device budget token as a tag. + */ + if (WARN_ON_ONCE(cmd->budget_token >= ATA_MAX_QUEUE)) + goto fail; + tag = cmd->budget_token; } else { - cmd->result = (DID_OK << 16) | SAM_STAT_TASK_SET_FULL; - scsi_done(cmd); + tag = scsi_cmd_to_rq(cmd)->tag; } + qc = __ata_qc_from_tag(ap, tag); + qc->tag = qc->hw_tag = tag; + qc->ap = ap; + qc->dev = dev; + + ata_qc_reinit(qc); + + qc->scsicmd = cmd; + qc->scsidone = scsi_done; + + qc->sg = scsi_sglist(cmd); + qc->n_elem = scsi_sg_count(cmd); + + if (scsi_cmd_to_rq(cmd)->rq_flags & RQF_QUIET) + qc->flags |= ATA_QCFLAG_QUIET; + return qc; + +fail: + set_host_byte(cmd, DID_OK); + set_status_byte(cmd, SAM_STAT_TASK_SET_FULL); + scsi_done(cmd); + return NULL; } static void ata_qc_set_pc_nbytes(struct ata_queued_cmd *qc) diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index c9c2496d91ea..926a7f41303d 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -44,7 +44,6 @@ static inline void ata_force_cbl(struct ata_port *ap) { } #endif extern u64 ata_tf_to_lba(const struct ata_taskfile *tf); extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf); -extern struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag); extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev, u64 block, u32 n_block, unsigned int tf_flags, unsigned int tag, int class); @@ -91,18 +90,6 @@ extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log, #define to_ata_port(d) container_of(d, struct ata_port, tdev) -/* libata-sata.c */ -#ifdef CONFIG_SATA_HOST -int ata_sas_allocate_tag(struct ata_port *ap); -void ata_sas_free_tag(unsigned int tag, struct ata_port *ap); -#else -static inline int ata_sas_allocate_tag(struct ata_port *ap) -{ - return -EOPNOTSUPP; -} -static inline void ata_sas_free_tag(unsigned int tag, struct ata_port *ap) { } -#endif - /* libata-acpi.c */ #ifdef CONFIG_ATA_ACPI extern unsigned int ata_acpi_gtf_filter; diff --git a/include/linux/libata.h b/include/linux/libata.h index 9b1d3d8b1252..16107122e587 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -820,7 +820,6 @@ struct ata_port { unsigned int cbl; /* cable type; ATA_CBL_xxx */ struct ata_queued_cmd qcmd[ATA_MAX_QUEUE + 1]; - unsigned long sas_tag_allocated; /* for sas tag allocation only */ u64 qc_active; int nr_active_links; /* #links with active qcs */ unsigned int sas_last_tag; /* track next tag hw expects */ From a28c1ab312712c26a8d004af1f68628d625dafac Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Sat, 9 Apr 2022 22:13:56 +0300 Subject: [PATCH 03/26] ata: libata-core: fix parameter type in ata_xfer_mode2shift() The data transfer mode that corresponds to the 'xfer_mode' parameter for ata_xfer_mode2shift() is a 8-bit *unsigned* value. Using *unsigned long* to declare the parameter leads to a problematic implicit *int* to *unsigned long* cast and was most probably a result of a copy/paste mistake -- use the 'u8' type instead, as in ata_xfer_mode2mask()... Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 2 +- include/linux/libata.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 23d1dc5faf73..eb8ff7b22616 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -898,7 +898,7 @@ EXPORT_SYMBOL_GPL(ata_xfer_mode2mask); * RETURNS: * Matching xfer_shift, -1 if no match found. */ -int ata_xfer_mode2shift(unsigned long xfer_mode) +int ata_xfer_mode2shift(u8 xfer_mode) { const struct ata_xfer_ent *ent; diff --git a/include/linux/libata.h b/include/linux/libata.h index 16107122e587..732de9014626 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1110,7 +1110,7 @@ extern void ata_unpack_xfermask(unsigned long xfer_mask, unsigned long *udma_mask); extern u8 ata_xfer_mask2mode(unsigned long xfer_mask); extern unsigned long ata_xfer_mode2mask(u8 xfer_mode); -extern int ata_xfer_mode2shift(unsigned long xfer_mode); +extern int ata_xfer_mode2shift(u8 xfer_mode); extern const char *ata_mode_string(unsigned long xfer_mask); extern unsigned long ata_id_xfermask(const u16 *id); extern int ata_std_qc_defer(struct ata_queued_cmd *qc); From dafbbf5c57dd6ae01d20b894bc2200e9d9834c4e Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Tue, 12 Apr 2022 20:26:46 +0300 Subject: [PATCH 04/26] ata: pata_sil680: fix result type of sil680_sel{dev|reg}() sil680_sel{dev|reg}() return a PCI config space address but needlessly use the *unsigned long* type for that, whereas the PCI config space accessors take *int* for the address parameter. Switch these functions to returning *int*, updating the local variables at their call sites. Get rid of the 'base' local variables in these functions, while at it... Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_sil680.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c index 0da58ce20d82..67ef2e26d7df 100644 --- a/drivers/ata/pata_sil680.c +++ b/drivers/ata/pata_sil680.c @@ -47,11 +47,9 @@ * criticial. */ -static unsigned long sil680_selreg(struct ata_port *ap, int r) +static int sil680_selreg(struct ata_port *ap, int r) { - unsigned long base = 0xA0 + r; - base += (ap->port_no << 4); - return base; + return 0xA0 + (ap->port_no << 4) + r; } /** @@ -65,12 +63,9 @@ static unsigned long sil680_selreg(struct ata_port *ap, int r) * the unit shift. */ -static unsigned long sil680_seldev(struct ata_port *ap, struct ata_device *adev, int r) +static int sil680_seldev(struct ata_port *ap, struct ata_device *adev, int r) { - unsigned long base = 0xA0 + r; - base += (ap->port_no << 4); - base |= adev->devno ? 2 : 0; - return base; + return 0xA0 + (ap->port_no << 4) + r + (adev->devno << 1); } @@ -85,8 +80,9 @@ static unsigned long sil680_seldev(struct ata_port *ap, struct ata_device *adev, static int sil680_cable_detect(struct ata_port *ap) { struct pci_dev *pdev = to_pci_dev(ap->host->dev); - unsigned long addr = sil680_selreg(ap, 0); + int addr = sil680_selreg(ap, 0); u8 ata66; + pci_read_config_byte(pdev, addr, &ata66); if (ata66 & 1) return ATA_CBL_PATA80; @@ -113,9 +109,9 @@ static void sil680_set_piomode(struct ata_port *ap, struct ata_device *adev) 0x328A, 0x2283, 0x1281, 0x10C3, 0x10C1 }; - unsigned long tfaddr = sil680_selreg(ap, 0x02); - unsigned long addr = sil680_seldev(ap, adev, 0x04); - unsigned long addr_mask = 0x80 + 4 * ap->port_no; + int tfaddr = sil680_selreg(ap, 0x02); + int addr = sil680_seldev(ap, adev, 0x04); + int addr_mask = 0x80 + 4 * ap->port_no; struct pci_dev *pdev = to_pci_dev(ap->host->dev); int pio = adev->pio_mode - XFER_PIO_0; int lowest_pio = pio; @@ -165,9 +161,9 @@ static void sil680_set_dmamode(struct ata_port *ap, struct ata_device *adev) static const u16 dma_table[3] = { 0x2208, 0x10C2, 0x10C1 }; struct pci_dev *pdev = to_pci_dev(ap->host->dev); - unsigned long ma = sil680_seldev(ap, adev, 0x08); - unsigned long ua = sil680_seldev(ap, adev, 0x0C); - unsigned long addr_mask = 0x80 + 4 * ap->port_no; + int ma = sil680_seldev(ap, adev, 0x08); + int ua = sil680_seldev(ap, adev, 0x0C); + int addr_mask = 0x80 + 4 * ap->port_no; int port_shift = adev->devno * 4; u8 scsc, mode; u16 multi, ultra; @@ -219,7 +215,7 @@ static void sil680_sff_exec_command(struct ata_port *ap, static bool sil680_sff_irq_check(struct ata_port *ap) { struct pci_dev *pdev = to_pci_dev(ap->host->dev); - unsigned long addr = sil680_selreg(ap, 1); + int addr = sil680_selreg(ap, 1); u8 val; pci_read_config_byte(pdev, addr, &val); From 35577381b55ffb4d87cdc9c0d0ada0e81a7d3657 Mon Sep 17 00:00:00 2001 From: Sergey Shtylyov Date: Tue, 12 Apr 2022 23:39:52 +0300 Subject: [PATCH 05/26] ata: pata_via: fix sloppy typing in via_do_set_mode() The local variables 'T' and 'UT' are needlessly declared as *unsigned* *long* -- the corresponding parameters of ata_timing_compute() are both declared as *int*. While fixing up those declarations, also make the 'via_clock' and 'T' variables *const* as they are never re-assigned after initialization -- the object code should remain the same as gcc previously used copy propagation anyway... Found by Linux Verification Center (linuxtesting.org) with the SVACE static analysis tool. Signed-off-by: Sergey Shtylyov Signed-off-by: Damien Le Moal --- drivers/ata/pata_via.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 439ca882f73c..215c02d4056a 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -248,9 +248,9 @@ static void via_do_set_mode(struct ata_port *ap, struct ata_device *adev, struct pci_dev *pdev = to_pci_dev(ap->host->dev); struct ata_device *peer = ata_dev_pair(adev); struct ata_timing t, p; - static int via_clock = 33333; /* Bus clock in kHZ */ - unsigned long T = 1000000000 / via_clock; - unsigned long UT = T; + const int via_clock = 33333; /* Bus clock in kHz */ + const int T = 1000000000 / via_clock; + int UT = T; int ut; int offset = 3 - (2*ap->port_no) - adev->devno; From 75d8cce128c516fe6cf4b8683e8fe1a59e919902 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sun, 10 Apr 2022 14:49:36 +0200 Subject: [PATCH 06/26] lib/irq_poll: Prevent softirq pending leak in irq_poll_cpu_dead() irq_poll_cpu_dead() pulls the blk_cpu_iopoll backlog from the dead CPU and raises the POLL softirq with __raise_softirq_irqoff() on the CPU it is running on. That just sets the bit in the pending softirq mask. This means the handling of the softirq is delayed until the next interrupt or a local_bh_disable/enable() pair. As a consequence the CPU on which this code runs can reach idle with the POLL softirq pending, which triggers a warning in the NOHZ idle code. Add a local_bh_disable/enable() pair around the interrupts disabled section in irq_poll_cpu_dead(). local_bh_enable will handle the pending softirq. [tglx: Massaged changelog and comment] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87k0bxgl27.ffs@tglx --- lib/irq_poll.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/irq_poll.c b/lib/irq_poll.c index 2f17b488d58e..2d5329a42105 100644 --- a/lib/irq_poll.c +++ b/lib/irq_poll.c @@ -188,14 +188,18 @@ EXPORT_SYMBOL(irq_poll_init); static int irq_poll_cpu_dead(unsigned int cpu) { /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq + * If a CPU goes away, splice its entries to the current CPU and + * set the POLL softirq bit. The local_bh_disable()/enable() pair + * ensures that it is handled. Otherwise the current CPU could + * reach idle with the POLL softirq pending. */ + local_bh_disable(); local_irq_disable(); list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), this_cpu_ptr(&blk_cpu_iopoll)); __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); local_irq_enable(); + local_bh_enable(); return 0; } From 8feecea4de7ee1bd9712947bb85d495b1d1e438d Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Mon, 11 Apr 2022 14:43:05 +0200 Subject: [PATCH 07/26] dt-bindings: ata: renesas,rcar-sata: Add r8a774e1 support Document SATA support for the RZ/G2H SoC, no driver change required. Signed-off-by: Lad Prabhakar Reviewed-by: Marian-Cristian Rotariu Acked-by: Rob Herring Signed-off-by: Geert Uytterhoeven Acked-by: Damien Le Moal Signed-off-by: Damien Le Moal --- Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml index c060c7914cae..c4e4a9eab658 100644 --- a/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml +++ b/Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml @@ -26,6 +26,7 @@ properties: - items: - enum: - renesas,sata-r8a774b1 # RZ/G2N + - renesas,sata-r8a774e1 # RZ/G2H - renesas,sata-r8a7795 # R-Car H3 - renesas,sata-r8a77965 # R-Car M3-N - const: renesas,rcar-gen3-sata # generic R-Car Gen3 or RZ/G2 From b954ebba296bb2eb2e38322f17aaa6426934bd7e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Apr 2022 20:52:35 +0900 Subject: [PATCH 08/26] zonefs: Clear inode information flags on inode creation Ensure that the i_flags field of struct zonefs_inode_info is cleared to 0 when initializing a zone file inode, avoiding seeing the flag ZONEFS_ZONE_OPEN being incorrectly set. Fixes: b5c00e975779 ("zonefs: open/close zone on file open/close") Cc: Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hans Holmberg --- fs/zonefs/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 3614c7834007..75d8dabe0807 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1142,6 +1142,7 @@ static struct inode *zonefs_alloc_inode(struct super_block *sb) inode_init_once(&zi->i_vnode); mutex_init(&zi->i_truncate_mutex); zi->i_wr_refcnt = 0; + zi->i_flags = 0; return &zi->i_vnode; } From 19139539207934aef6335bdef09c9e4bd70d1808 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Apr 2022 17:41:37 +0900 Subject: [PATCH 09/26] zonefs: Fix management of open zones The mount option "explicit_open" manages the device open zone resources to ensure that if an application opens a sequential file for writing, the file zone can always be written by explicitly opening the zone and accounting for that state with the s_open_zones counter. However, if some zones are already open when mounting, the device open zone resource usage status will be larger than the initial s_open_zones value of 0. Ensure that this inconsistency does not happen by closing any sequential zone that is open when mounting. Furthermore, with ZNS drives, closing an explicitly open zone that has not been written will change the zone state to "closed", that is, the zone will remain in an active state. Since this can then cause failures of explicit open operations on other zones if the drive active zone resources are exceeded, we need to make sure that the zone is not active anymore by resetting it instead of closing it. To address this, zonefs_zone_mgmt() is modified to change a REQ_OP_ZONE_CLOSE request into a REQ_OP_ZONE_RESET for sequential zones that have not been written. Fixes: b5c00e975779 ("zonefs: open/close zone on file open/close") Cc: Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Hans Holmberg --- fs/zonefs/super.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 75d8dabe0807..e20e7c841489 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -35,6 +35,17 @@ static inline int zonefs_zone_mgmt(struct inode *inode, lockdep_assert_held(&zi->i_truncate_mutex); + /* + * With ZNS drives, closing an explicitly open zone that has not been + * written will change the zone state to "closed", that is, the zone + * will remain active. Since this can then cause failure of explicit + * open operation on other zones if the drive active zone resources + * are exceeded, make sure that the zone does not remain active by + * resetting it. + */ + if (op == REQ_OP_ZONE_CLOSE && !zi->i_wpoffset) + op = REQ_OP_ZONE_RESET; + trace_zonefs_zone_mgmt(inode, op); ret = blkdev_zone_mgmt(inode->i_sb->s_bdev, op, zi->i_zsector, zi->i_zone_size >> SECTOR_SHIFT, GFP_NOFS); @@ -1294,12 +1305,13 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode, inc_nlink(parent); } -static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, - enum zonefs_ztype type) +static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, + enum zonefs_ztype type) { struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_inode_info *zi = ZONEFS_I(inode); + int ret = 0; inode->i_ino = zone->start >> sbi->s_zone_sectors_shift; inode->i_mode = S_IFREG | sbi->s_perm; @@ -1324,6 +1336,22 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, sb->s_maxbytes = max(zi->i_max_size, sb->s_maxbytes); sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits; sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits; + + /* + * For sequential zones, make sure that any open zone is closed first + * to ensure that the initial number of open zones is 0, in sync with + * the open zone accounting done when the mount option + * ZONEFS_MNTOPT_EXPLICIT_OPEN is used. + */ + if (type == ZONEFS_ZTYPE_SEQ && + (zone->cond == BLK_ZONE_COND_IMP_OPEN || + zone->cond == BLK_ZONE_COND_EXP_OPEN)) { + mutex_lock(&zi->i_truncate_mutex); + ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); + mutex_unlock(&zi->i_truncate_mutex); + } + + return ret; } static struct dentry *zonefs_create_inode(struct dentry *parent, @@ -1333,6 +1361,7 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, struct inode *dir = d_inode(parent); struct dentry *dentry; struct inode *inode; + int ret; dentry = d_alloc_name(parent, name); if (!dentry) @@ -1343,10 +1372,16 @@ static struct dentry *zonefs_create_inode(struct dentry *parent, goto dput; inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime; - if (zone) - zonefs_init_file_inode(inode, zone, type); - else + if (zone) { + ret = zonefs_init_file_inode(inode, zone, type); + if (ret) { + iput(inode); + goto dput; + } + } else { zonefs_init_dir_inode(dir, inode, type); + } + d_add(dentry, inode); dir->i_size++; From 2b95a23c4f50c42fe85f0d345612075d0f2c3118 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Apr 2022 17:00:13 +0900 Subject: [PATCH 10/26] zonefs: Rename super block information fields The s_open_zones field of struct zonefs_sb_info is used to count the number of files that are open for writing and may not necessarilly correspond to the number of open zones on the device. For instance, an application may open for writing a sequential zone file, fully write it and keep the file open. In such case, the zone of the file is not open anymore (it is in the full state). Avoid confusion about this counter meaning by renaming it to s_wro_seq_files. To keep things consistent, the field s_max_open_zones is renamed to s_max_wro_seq_files. Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Hans Holmberg --- fs/zonefs/super.c | 17 ++++++++++------- fs/zonefs/zonefs.h | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index e20e7c841489..dafacde65659 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1035,8 +1035,10 @@ static int zonefs_open_zone(struct inode *inode) mutex_lock(&zi->i_truncate_mutex); if (!zi->i_wr_refcnt) { - if (atomic_inc_return(&sbi->s_open_zones) > sbi->s_max_open_zones) { - atomic_dec(&sbi->s_open_zones); + unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files); + + if (wro > sbi->s_max_wro_seq_files) { + atomic_dec(&sbi->s_wro_seq_files); ret = -EBUSY; goto unlock; } @@ -1044,7 +1046,7 @@ static int zonefs_open_zone(struct inode *inode) if (i_size_read(inode) < zi->i_max_size) { ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); if (ret) { - atomic_dec(&sbi->s_open_zones); + atomic_dec(&sbi->s_wro_seq_files); goto unlock; } zi->i_flags |= ZONEFS_ZONE_OPEN; @@ -1108,7 +1110,7 @@ static void zonefs_close_zone(struct inode *inode) } zi->i_flags &= ~ZONEFS_ZONE_OPEN; dec: - atomic_dec(&sbi->s_open_zones); + atomic_dec(&sbi->s_wro_seq_files); } mutex_unlock(&zi->i_truncate_mutex); } @@ -1688,9 +1690,10 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) sbi->s_gid = GLOBAL_ROOT_GID; sbi->s_perm = 0640; sbi->s_mount_opts = ZONEFS_MNTOPT_ERRORS_RO; - sbi->s_max_open_zones = bdev_max_open_zones(sb->s_bdev); - atomic_set(&sbi->s_open_zones, 0); - if (!sbi->s_max_open_zones && + + atomic_set(&sbi->s_wro_seq_files, 0); + sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev); + if (!sbi->s_max_wro_seq_files && sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n"); sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index 7b147907c328..67fd00ab173f 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -182,8 +182,8 @@ struct zonefs_sb_info { loff_t s_blocks; loff_t s_used_blocks; - unsigned int s_max_open_zones; - atomic_t s_open_zones; + unsigned int s_max_wro_seq_files; + atomic_t s_wro_seq_files; }; static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb) From 7d6dfbe03bd3bef51ead25d129dabebd8bae1ec4 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Apr 2022 22:38:07 +0900 Subject: [PATCH 11/26] zonefs: Always do seq file write open accounting The explicit_open mount option forces an explicitly open of the zone of sequential files that are open for writing to ensure that the open file can be written without the device failing write operations due to open zone resources limit being exceeded. To implement this, zonefs accounts all write open seq file when this mount option is used. This accounting however can be easily performed even when the explicit_open mount option is not used, thus allowing applications to control zone resources on their own, without relying on open() system call failures from zonefs. To implement this, the helper zonefs_file_use_exp_open() is removed and replaced with the helper zonefs_seq_file_need_wro() which test if a file is a sequential file being open with write access. zonefs_open_zone() and zonefs_close_zone() are renamed respectively to zonefs_seq_file_write_open() and zonefs_seq_file_write_close() and modified to update the s_wro_seq_files counter regardless of the explicit_open mount option use. If the explicit_open mount option is used, zonefs_seq_file_write_open() execute an explicit zone open operation for a sequential file open for writing for the first time, as before. Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Hans Holmberg --- fs/zonefs/super.c | 80 +++++++++++++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 34 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index dafacde65659..02dbdec32b2f 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1009,13 +1009,13 @@ inode_unlock: return ret; } -static inline bool zonefs_file_use_exp_open(struct inode *inode, struct file *file) +/* + * Write open accounting is done only for sequential files. + */ +static inline bool zonefs_seq_file_need_wro(struct inode *inode, + struct file *file) { struct zonefs_inode_info *zi = ZONEFS_I(inode); - struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); - - if (!(sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN)) - return false; if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) return false; @@ -1026,30 +1026,33 @@ static inline bool zonefs_file_use_exp_open(struct inode *inode, struct file *fi return true; } -static int zonefs_open_zone(struct inode *inode) +static int zonefs_seq_file_write_open(struct inode *inode) { struct zonefs_inode_info *zi = ZONEFS_I(inode); - struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); int ret = 0; mutex_lock(&zi->i_truncate_mutex); if (!zi->i_wr_refcnt) { + struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); unsigned int wro = atomic_inc_return(&sbi->s_wro_seq_files); - if (wro > sbi->s_max_wro_seq_files) { - atomic_dec(&sbi->s_wro_seq_files); - ret = -EBUSY; - goto unlock; - } + if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) { - if (i_size_read(inode) < zi->i_max_size) { - ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); - if (ret) { + if (wro > sbi->s_max_wro_seq_files) { atomic_dec(&sbi->s_wro_seq_files); + ret = -EBUSY; goto unlock; } - zi->i_flags |= ZONEFS_ZONE_OPEN; + + if (i_size_read(inode) < zi->i_max_size) { + ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); + if (ret) { + atomic_dec(&sbi->s_wro_seq_files); + goto unlock; + } + zi->i_flags |= ZONEFS_ZONE_OPEN; + } } } @@ -1069,30 +1072,31 @@ static int zonefs_file_open(struct inode *inode, struct file *file) if (ret) return ret; - if (zonefs_file_use_exp_open(inode, file)) - return zonefs_open_zone(inode); + if (zonefs_seq_file_need_wro(inode, file)) + return zonefs_seq_file_write_open(inode); return 0; } -static void zonefs_close_zone(struct inode *inode) +static void zonefs_seq_file_write_close(struct inode *inode) { struct zonefs_inode_info *zi = ZONEFS_I(inode); + struct super_block *sb = inode->i_sb; + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); int ret = 0; mutex_lock(&zi->i_truncate_mutex); + zi->i_wr_refcnt--; - if (!zi->i_wr_refcnt) { - struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); - struct super_block *sb = inode->i_sb; - - /* - * If the file zone is full, it is not open anymore and we only - * need to decrement the open count. - */ - if (!(zi->i_flags & ZONEFS_ZONE_OPEN)) - goto dec; + if (zi->i_wr_refcnt) + goto unlock; + /* + * The file zone may not be open anymore (e.g. the file was truncated to + * its maximum size or it was fully written). For this case, we only + * need to decrement the write open count. + */ + if (zi->i_flags & ZONEFS_ZONE_OPEN) { ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); if (ret) { __zonefs_io_error(inode, false); @@ -1104,14 +1108,22 @@ static void zonefs_close_zone(struct inode *inode) */ if (zi->i_flags & ZONEFS_ZONE_OPEN && !(sb->s_flags & SB_RDONLY)) { - zonefs_warn(sb, "closing zone failed, remounting filesystem read-only\n"); + zonefs_warn(sb, + "closing zone at %llu failed %d\n", + zi->i_zsector, ret); + zonefs_warn(sb, + "remounting filesystem read-only\n"); sb->s_flags |= SB_RDONLY; } + goto unlock; } + zi->i_flags &= ~ZONEFS_ZONE_OPEN; -dec: - atomic_dec(&sbi->s_wro_seq_files); } + + atomic_dec(&sbi->s_wro_seq_files); + +unlock: mutex_unlock(&zi->i_truncate_mutex); } @@ -1123,8 +1135,8 @@ static int zonefs_file_release(struct inode *inode, struct file *file) * the zone has gone offline or read-only). Make sure we don't fail the * close(2) for user-space. */ - if (zonefs_file_use_exp_open(inode, file)) - zonefs_close_zone(inode); + if (zonefs_seq_file_need_wro(inode, file)) + zonefs_seq_file_write_close(inode); return 0; } From 9277a6d4fbd4aaa668b19b819015f87f0da53a38 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Apr 2022 16:25:34 +0900 Subject: [PATCH 12/26] zonefs: Export open zone resource information through sysfs To allow applications to easily check the current usage status of the open zone resources of the mounted device, export through sysfs the counter of write open sequential files s_wro_seq_files field of struct zonefs_sb_info. The attribute is named nr_wro_seq_files and is read only. The maximum number of write open sequential files (zones) indicated by the s_max_wro_seq_files field of struct zonefs_sb_info is also exported as the read only attribute max_wro_seq_files. Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Hans Holmberg --- fs/zonefs/Makefile | 2 +- fs/zonefs/super.c | 24 +++++++-- fs/zonefs/sysfs.c | 125 +++++++++++++++++++++++++++++++++++++++++++++ fs/zonefs/zonefs.h | 10 ++++ 4 files changed, 156 insertions(+), 5 deletions(-) create mode 100644 fs/zonefs/sysfs.c diff --git a/fs/zonefs/Makefile b/fs/zonefs/Makefile index 33c1a4f1132e..9fe54f5319f2 100644 --- a/fs/zonefs/Makefile +++ b/fs/zonefs/Makefile @@ -3,4 +3,4 @@ ccflags-y += -I$(src) obj-$(CONFIG_ZONEFS_FS) += zonefs.o -zonefs-y := super.o +zonefs-y := super.o sysfs.o diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 02dbdec32b2f..aa359f27102e 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1725,6 +1725,10 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) if (ret) goto cleanup; + ret = zonefs_sysfs_register(sb); + if (ret) + goto cleanup; + zonefs_info(sb, "Mounting %u zones", blkdev_nr_zones(sb->s_bdev->bd_disk)); @@ -1770,6 +1774,8 @@ static void zonefs_kill_super(struct super_block *sb) if (sb->s_root) d_genocide(sb->s_root); + + zonefs_sysfs_unregister(sb); kill_block_super(sb); kfree(sbi); } @@ -1817,16 +1823,26 @@ static int __init zonefs_init(void) return ret; ret = register_filesystem(&zonefs_type); - if (ret) { - zonefs_destroy_inodecache(); - return ret; - } + if (ret) + goto destroy_inodecache; + + ret = zonefs_sysfs_init(); + if (ret) + goto unregister_fs; return 0; + +unregister_fs: + unregister_filesystem(&zonefs_type); +destroy_inodecache: + zonefs_destroy_inodecache(); + + return ret; } static void __exit zonefs_exit(void) { + zonefs_sysfs_exit(); zonefs_destroy_inodecache(); unregister_filesystem(&zonefs_type); } diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c new file mode 100644 index 000000000000..eaeaf983ed87 --- /dev/null +++ b/fs/zonefs/sysfs.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Simple file system for zoned block devices exposing zones as files. + * + * Copyright (C) 2022 Western Digital Corporation or its affiliates. + */ +#include +#include +#include + +#include "zonefs.h" + +struct zonefs_sysfs_attr { + struct attribute attr; + ssize_t (*show)(struct zonefs_sb_info *sbi, char *buf); +}; + +static inline struct zonefs_sysfs_attr *to_attr(struct attribute *attr) +{ + return container_of(attr, struct zonefs_sysfs_attr, attr); +} + +#define ZONEFS_SYSFS_ATTR_RO(name) \ +static struct zonefs_sysfs_attr zonefs_sysfs_attr_##name = __ATTR_RO(name) + +#define ATTR_LIST(name) &zonefs_sysfs_attr_##name.attr + +static ssize_t zonefs_sysfs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct zonefs_sb_info *sbi = + container_of(kobj, struct zonefs_sb_info, s_kobj); + struct zonefs_sysfs_attr *zonefs_attr = + container_of(attr, struct zonefs_sysfs_attr, attr); + + if (!zonefs_attr->show) + return 0; + + return zonefs_attr->show(sbi, buf); +} + +static ssize_t max_wro_seq_files_show(struct zonefs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%u\n", sbi->s_max_wro_seq_files); +} +ZONEFS_SYSFS_ATTR_RO(max_wro_seq_files); + +static ssize_t nr_wro_seq_files_show(struct zonefs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%d\n", atomic_read(&sbi->s_wro_seq_files)); +} +ZONEFS_SYSFS_ATTR_RO(nr_wro_seq_files); + +static struct attribute *zonefs_sysfs_attrs[] = { + ATTR_LIST(max_wro_seq_files), + ATTR_LIST(nr_wro_seq_files), + NULL, +}; +ATTRIBUTE_GROUPS(zonefs_sysfs); + +static void zonefs_sysfs_sb_release(struct kobject *kobj) +{ + struct zonefs_sb_info *sbi = + container_of(kobj, struct zonefs_sb_info, s_kobj); + + complete(&sbi->s_kobj_unregister); +} + +static const struct sysfs_ops zonefs_sysfs_attr_ops = { + .show = zonefs_sysfs_attr_show, +}; + +static struct kobj_type zonefs_sb_ktype = { + .default_groups = zonefs_sysfs_groups, + .sysfs_ops = &zonefs_sysfs_attr_ops, + .release = zonefs_sysfs_sb_release, +}; + +static struct kobject *zonefs_sysfs_root; + +int zonefs_sysfs_register(struct super_block *sb) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + int ret; + + init_completion(&sbi->s_kobj_unregister); + ret = kobject_init_and_add(&sbi->s_kobj, &zonefs_sb_ktype, + zonefs_sysfs_root, "%s", sb->s_id); + if (ret) { + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + return ret; + } + + sbi->s_sysfs_registered = true; + + return 0; +} + +void zonefs_sysfs_unregister(struct super_block *sb) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(sb); + + if (!sbi || !sbi->s_sysfs_registered) + return; + + kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); +} + +int __init zonefs_sysfs_init(void) +{ + zonefs_sysfs_root = kobject_create_and_add("zonefs", fs_kobj); + if (!zonefs_sysfs_root) + return -ENOMEM; + + return 0; +} + +void zonefs_sysfs_exit(void) +{ + kobject_put(zonefs_sysfs_root); + zonefs_sysfs_root = NULL; +} diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index 67fd00ab173f..77d2d153c59d 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -12,6 +12,7 @@ #include #include #include +#include /* * Maximum length of file names: this only needs to be large enough to fit @@ -184,6 +185,10 @@ struct zonefs_sb_info { unsigned int s_max_wro_seq_files; atomic_t s_wro_seq_files; + + bool s_sysfs_registered; + struct kobject s_kobj; + struct completion s_kobj_unregister; }; static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb) @@ -198,4 +203,9 @@ static inline struct zonefs_sb_info *ZONEFS_SB(struct super_block *sb) #define zonefs_warn(sb, format, args...) \ pr_warn("zonefs (%s) WARNING: " format, sb->s_id, ## args) +int zonefs_sysfs_register(struct super_block *sb); +void zonefs_sysfs_unregister(struct super_block *sb); +int zonefs_sysfs_init(void); +void zonefs_sysfs_exit(void); + #endif From 87c9ce3ffec9060cf7556ed4d3c9e582c8baf575 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 12 Apr 2022 18:54:39 +0900 Subject: [PATCH 13/26] zonefs: Add active seq file accounting Modify struct zonefs_sb_info to add the s_active_seq_files atomic to count the number of seq files representing a zone that is partially written or explicitly open, that is, to count sequential files with a zone that is in an active state on the device. The helper function zonefs_account_active() is introduced to update this counter whenever a file is written or truncated. This helper is also used in the zonefs_seq_file_write_open() and zonefs_seq_file_write_close() functions when the explicit_open mount option is used. The s_active_seq_files counter is exported through sysfs using the read-only attribute nr_active_seq_files. The device maximum number of active zones is also exported through sysfs with the read-only attribute max_active_seq_files. Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Hans Holmberg --- fs/zonefs/super.c | 71 ++++++++++++++++++++++++++++++++++++++++++---- fs/zonefs/sysfs.c | 14 +++++++++ fs/zonefs/zonefs.h | 4 +++ 3 files changed, 83 insertions(+), 6 deletions(-) diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index aa359f27102e..e65da43f1453 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -27,6 +27,39 @@ #define CREATE_TRACE_POINTS #include "trace.h" +/* + * Manage the active zone count. Called with zi->i_truncate_mutex held. + */ +static void zonefs_account_active(struct inode *inode) +{ + struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + lockdep_assert_held(&zi->i_truncate_mutex); + + if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) + return; + + /* + * If the zone is active, that is, if it is explicitly open or + * partially written, check if it was already accounted as active. + */ + if ((zi->i_flags & ZONEFS_ZONE_OPEN) || + (zi->i_wpoffset > 0 && zi->i_wpoffset < zi->i_max_size)) { + if (!(zi->i_flags & ZONEFS_ZONE_ACTIVE)) { + zi->i_flags |= ZONEFS_ZONE_ACTIVE; + atomic_inc(&sbi->s_active_seq_files); + } + return; + } + + /* The zone is not active. If it was, update the active count */ + if (zi->i_flags & ZONEFS_ZONE_ACTIVE) { + zi->i_flags &= ~ZONEFS_ZONE_ACTIVE; + atomic_dec(&sbi->s_active_seq_files); + } +} + static inline int zonefs_zone_mgmt(struct inode *inode, enum req_opf op) { @@ -68,8 +101,13 @@ static inline void zonefs_i_size_write(struct inode *inode, loff_t isize) * A full zone is no longer open/active and does not need * explicit closing. */ - if (isize >= zi->i_max_size) - zi->i_flags &= ~ZONEFS_ZONE_OPEN; + if (isize >= zi->i_max_size) { + struct zonefs_sb_info *sbi = ZONEFS_SB(inode->i_sb); + + if (zi->i_flags & ZONEFS_ZONE_ACTIVE) + atomic_dec(&sbi->s_active_seq_files); + zi->i_flags &= ~(ZONEFS_ZONE_OPEN | ZONEFS_ZONE_ACTIVE); + } } static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, @@ -397,6 +435,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, zonefs_update_stats(inode, data_size); zonefs_i_size_write(inode, data_size); zi->i_wpoffset = data_size; + zonefs_account_active(inode); return 0; } @@ -508,6 +547,7 @@ static int zonefs_file_truncate(struct inode *inode, loff_t isize) zonefs_update_stats(inode, isize); truncate_setsize(inode, isize); zi->i_wpoffset = isize; + zonefs_account_active(inode); unlock: mutex_unlock(&zi->i_truncate_mutex); @@ -866,8 +906,15 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) (ret > 0 || ret == -EIOCBQUEUED)) { if (ret > 0) count = ret; + + /* + * Update the zone write pointer offset assuming the write + * operation succeeded. If it did not, the error recovery path + * will correct it. Also do active seq file accounting. + */ mutex_lock(&zi->i_truncate_mutex); zi->i_wpoffset += count; + zonefs_account_active(inode); mutex_unlock(&zi->i_truncate_mutex); } @@ -1052,6 +1099,7 @@ static int zonefs_seq_file_write_open(struct inode *inode) goto unlock; } zi->i_flags |= ZONEFS_ZONE_OPEN; + zonefs_account_active(inode); } } } @@ -1119,6 +1167,7 @@ static void zonefs_seq_file_write_close(struct inode *inode) } zi->i_flags &= ~ZONEFS_ZONE_OPEN; + zonefs_account_active(inode); } atomic_dec(&sbi->s_wro_seq_files); @@ -1325,7 +1374,7 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, struct super_block *sb = inode->i_sb; struct zonefs_sb_info *sbi = ZONEFS_SB(sb); struct zonefs_inode_info *zi = ZONEFS_I(inode); - int ret = 0; + int ret; inode->i_ino = zone->start >> sbi->s_zone_sectors_shift; inode->i_mode = S_IFREG | sbi->s_perm; @@ -1351,6 +1400,8 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, sbi->s_blocks += zi->i_max_size >> sb->s_blocksize_bits; sbi->s_used_blocks += zi->i_wpoffset >> sb->s_blocksize_bits; + mutex_lock(&zi->i_truncate_mutex); + /* * For sequential zones, make sure that any open zone is closed first * to ensure that the initial number of open zones is 0, in sync with @@ -1360,12 +1411,17 @@ static int zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, if (type == ZONEFS_ZTYPE_SEQ && (zone->cond == BLK_ZONE_COND_IMP_OPEN || zone->cond == BLK_ZONE_COND_EXP_OPEN)) { - mutex_lock(&zi->i_truncate_mutex); ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_CLOSE); - mutex_unlock(&zi->i_truncate_mutex); + if (ret) + goto unlock; } - return ret; + zonefs_account_active(inode); + +unlock: + mutex_unlock(&zi->i_truncate_mutex); + + return 0; } static struct dentry *zonefs_create_inode(struct dentry *parent, @@ -1711,6 +1767,9 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent) sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN; } + atomic_set(&sbi->s_active_seq_files, 0); + sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev); + ret = zonefs_read_super(sb); if (ret) return ret; diff --git a/fs/zonefs/sysfs.c b/fs/zonefs/sysfs.c index eaeaf983ed87..9cb6755ce39a 100644 --- a/fs/zonefs/sysfs.c +++ b/fs/zonefs/sysfs.c @@ -51,9 +51,23 @@ static ssize_t nr_wro_seq_files_show(struct zonefs_sb_info *sbi, char *buf) } ZONEFS_SYSFS_ATTR_RO(nr_wro_seq_files); +static ssize_t max_active_seq_files_show(struct zonefs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%u\n", sbi->s_max_active_seq_files); +} +ZONEFS_SYSFS_ATTR_RO(max_active_seq_files); + +static ssize_t nr_active_seq_files_show(struct zonefs_sb_info *sbi, char *buf) +{ + return sysfs_emit(buf, "%d\n", atomic_read(&sbi->s_active_seq_files)); +} +ZONEFS_SYSFS_ATTR_RO(nr_active_seq_files); + static struct attribute *zonefs_sysfs_attrs[] = { ATTR_LIST(max_wro_seq_files), ATTR_LIST(nr_wro_seq_files), + ATTR_LIST(max_active_seq_files), + ATTR_LIST(nr_active_seq_files), NULL, }; ATTRIBUTE_GROUPS(zonefs_sysfs); diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h index 77d2d153c59d..4b3de66c3233 100644 --- a/fs/zonefs/zonefs.h +++ b/fs/zonefs/zonefs.h @@ -40,6 +40,7 @@ static inline enum zonefs_ztype zonefs_zone_type(struct blk_zone *zone) } #define ZONEFS_ZONE_OPEN (1 << 0) +#define ZONEFS_ZONE_ACTIVE (1 << 1) /* * In-memory inode data. @@ -186,6 +187,9 @@ struct zonefs_sb_info { unsigned int s_max_wro_seq_files; atomic_t s_wro_seq_files; + unsigned int s_max_active_seq_files; + atomic_t s_active_seq_files; + bool s_sysfs_registered; struct kobject s_kobj; struct completion s_kobj_unregister; From ae4303886652248ed2568c9cb2ab0da485bfd7a7 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 18 Apr 2022 09:06:17 +0900 Subject: [PATCH 14/26] documentation: zonefs: Cleanup the mount options section Use subsections to separate the descriptions of the "error=" and "explicit-open" mount sections. Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Hans Holmberg --- Documentation/filesystems/zonefs.rst | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/zonefs.rst b/Documentation/filesystems/zonefs.rst index 6b213fe9a33e..72d4baba0b6a 100644 --- a/Documentation/filesystems/zonefs.rst +++ b/Documentation/filesystems/zonefs.rst @@ -306,8 +306,15 @@ Further notes: Mount options ------------- -zonefs define the "errors=" mount option to allow the user to specify -zonefs behavior in response to I/O errors, inode size inconsistencies or zone +zonefs defines several mount options: +* errors= +* explicit-open + +"errors=" option +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The "errors=" option mount option allows the user to specify zonefs +behavior in response to I/O errors, inode size inconsistencies or zone condition changes. The defined behaviors are as follow: * remount-ro (default) @@ -326,6 +333,9 @@ discover the amount of data that has been written to the zone. In the case of a read-only zone discovered at run-time, as indicated in the previous section. The size of the zone file is left unchanged from its last updated value. +"explicit-open" option +~~~~~~~~~~~~~~~~~~~~~~ + A zoned block device (e.g. an NVMe Zoned Namespace device) may have limits on the number of zones that can be active, that is, zones that are in the implicit open, explicit open or closed conditions. This potential limitation From 5716fb0d403e4edaca5de76b548081f0da1c5c6a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 20 Apr 2022 20:27:26 -0700 Subject: [PATCH 15/26] ahci: Add a generic 'controller2' RAID id Intel server platforms that support 'RAID', i.e. have platform firmware support for software-RAID metadata + features that the kernel also understands, maintain the same device-ids for RAID from generation to generation. This is in contrast to client platforms that have tended to roll new device-ids every platform generation. However, even though server platform keep the ids there are still unique device-ids per controller instance. To date there have only been 2 controllers on these platforms, but platforms code named Emmitsburg add a third controller. Add the device-id for this third controller and collect it with the other generic server RAID ids. As mentioned here [1], the pain of continuing add new and different device-ids for RAID mode to this file [2] has been heard. Ideally this device-id would not matter and the class code would remain PCI_CLASS_STORAGE_SATA_AHCI regardless of the RAID mode, but other operating systems depend on the class code *not* being AHCI when the device is in RAID mode. That said, going forward there is little reason for new server RAID ids to be added as they can simply reuse one of the existing ids even for a new controller. Server software RAID features continue to be supported on Linux. Client software RAID features continue to be not supported and the recommendation there remains to set the device to AHCI mode in platform firmware. Link: https://lore.kernel.org/all/8e61fb0104422e8d70701e2ddc7b1ca53f009797.camel@intel.com [1] Link: https://lore.kernel.org/all/20201119165022.GA3582@infradead.org/ [2] Cc: Damien Le Moal Cc: Christoph Hellwig Cc: Mika Westerberg Signed-off-by: Dan Williams Reviewed-by: Christoph Hellwig Signed-off-by: Damien Le Moal --- drivers/ata/ahci.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 397dfd27c90d..c1eca72b4575 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -324,7 +324,6 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x1d02), board_ahci }, /* PBG AHCI */ { PCI_VDEVICE(INTEL, 0x1d04), board_ahci }, /* PBG RAID */ { PCI_VDEVICE(INTEL, 0x1d06), board_ahci }, /* PBG RAID */ - { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* PBG/Lewisburg RAID*/ { PCI_VDEVICE(INTEL, 0x2323), board_ahci }, /* DH89xxCC AHCI */ { PCI_VDEVICE(INTEL, 0x1e02), board_ahci }, /* Panther Point AHCI */ { PCI_VDEVICE(INTEL, 0x1e03), board_ahci_low_power }, /* Panther M AHCI */ @@ -367,7 +366,9 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x1f3e), board_ahci_avn }, /* Avoton RAID */ { PCI_VDEVICE(INTEL, 0x1f3f), board_ahci_avn }, /* Avoton RAID */ { PCI_VDEVICE(INTEL, 0x2823), board_ahci }, /* Wellsburg/Lewisburg AHCI*/ - { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* Wellsburg/Lewisburg RAID*/ + { PCI_VDEVICE(INTEL, 0x2826), board_ahci }, /* *burg SATA0 'RAID' */ + { PCI_VDEVICE(INTEL, 0x2827), board_ahci }, /* *burg SATA1 'RAID' */ + { PCI_VDEVICE(INTEL, 0x282f), board_ahci }, /* *burg SATA2 'RAID' */ { PCI_VDEVICE(INTEL, 0x43d4), board_ahci }, /* Rocket Lake PCH-H RAID */ { PCI_VDEVICE(INTEL, 0x43d5), board_ahci }, /* Rocket Lake PCH-H RAID */ { PCI_VDEVICE(INTEL, 0x43d6), board_ahci }, /* Rocket Lake PCH-H RAID */ From e0af10ac4dcce334ef8cc36c8e146c0b932b4f81 Mon Sep 17 00:00:00 2001 From: Diego Viola Date: Thu, 21 Apr 2022 01:54:23 -0300 Subject: [PATCH 16/26] ata: libata-core: replace "its" with "it is" and "isn't" with "is not". The former fixes the typo while the latter just uses the same formal language. Signed-off-by: Diego Viola Signed-off-by: Damien Le Moal --- drivers/ata/libata-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index eb8ff7b22616..95bd028aea4f 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -1398,7 +1398,7 @@ unsigned long ata_id_xfermask(const u16 *id) /* But wait.. there's more. Design your standards by * committee and you too can get a free iordy field to - * process. However its the speeds not the modes that + * process. However it is the speeds not the modes that * are supported... Note drivers using the timing API * will get this right anyway */ @@ -5559,7 +5559,7 @@ static void ata_finalize_port_ops(struct ata_port_operations *ops) * Start and then freeze ports of @host. Started status is * recorded in host->flags, so this function can be called * multiple times. Ports are guaranteed to get started only - * once. If host->ops isn't initialized yet, its set to the + * once. If host->ops is not initialized yet, it is set to the * first non-dummy port ops. * * LOCKING: From 0cb63670d5052767391e3e1df51ebbffef8b6420 Mon Sep 17 00:00:00 2001 From: Lv Ruyi Date: Thu, 21 Apr 2022 08:44:12 +0000 Subject: [PATCH 17/26] ata: Make use of the helper function devm_platform_ioremap_resource() Use the devm_platform_ioremap_resource() helper instead of calling platform_get_resource() and devm_ioremap_resource() separately.Make the code simpler without functional changes. Reported-by: Zeal Robot Signed-off-by: Lv Ruyi Signed-off-by: Damien Le Moal --- drivers/ata/sata_gemini.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c index 00e1c7941d0e..b729e9919bb0 100644 --- a/drivers/ata/sata_gemini.c +++ b/drivers/ata/sata_gemini.c @@ -318,7 +318,6 @@ static int gemini_sata_probe(struct platform_device *pdev) struct device_node *np = dev->of_node; struct sata_gemini *sg; struct regmap *map; - struct resource *res; enum gemini_muxmode muxmode; u32 gmode; u32 gmask; @@ -329,11 +328,7 @@ static int gemini_sata_probe(struct platform_device *pdev) return -ENOMEM; sg->dev = dev; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENODEV; - - sg->base = devm_ioremap_resource(dev, res); + sg->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(sg->base)) return PTR_ERR(sg->base); From 31a644b3c2ae6d0c47e84614ded3ce9bef1adb7a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 18 Apr 2022 09:32:57 +0900 Subject: [PATCH 18/26] documentation: zonefs: Document sysfs attributes Document the max_wro_seq_files, nr_wro_seq_files, max_active_seq_files and nr_active_seq_files sysfs attributes. Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Hans Holmberg Reviewed-by: Sergey Shtylyov --- Documentation/filesystems/zonefs.rst | 38 ++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/Documentation/filesystems/zonefs.rst b/Documentation/filesystems/zonefs.rst index 72d4baba0b6a..394b9f15dce0 100644 --- a/Documentation/filesystems/zonefs.rst +++ b/Documentation/filesystems/zonefs.rst @@ -351,6 +351,44 @@ guaranteed that write requests can be processed. Conversely, the to the device on the last close() of a zone file if the zone is not full nor empty. +Runtime sysfs attributes +------------------------ + +zonefs defines several sysfs attributes for mounted devices. All attributes +are user readable and can be found in the directory /sys/fs/zonefs//, +where is the name of the mounted zoned block device. + +The attributes defined are as follows. + +* **max_wro_seq_files**: This attribute reports the maximum number of + sequential zone files that can be open for writing. This number corresponds + to the maximum number of explicitly or implicitly open zones that the device + supports. A value of 0 means that the device has no limit and that any zone + (any file) can be open for writing and written at any time, regardless of the + state of other zones. When the *explicit-open* mount option is used, zonefs + will fail any open() system call requesting to open a sequential zone file for + writing when the number of sequential zone files already open for writing has + reached the *max_wro_seq_files* limit. +* **nr_wro_seq_files**: This attribute reports the current number of sequential + zone files open for writing. When the "explicit-open" mount option is used, + this number can never exceed *max_wro_seq_files*. If the *explicit-open* + mount option is not used, the reported number can be greater than + *max_wro_seq_files*. In such case, it is the responsibility of the + application to not write simultaneously more than *max_wro_seq_files* + sequential zone files. Failure to do so can result in write errors. +* **max_active_seq_files**: This attribute reports the maximum number of + sequential zone files that are in an active state, that is, sequential zone + files that are partially writen (not empty nor full) or that have a zone that + is explicitly open (which happens only if the *explicit-open* mount option is + used). This number is always equal to the maximum number of active zones that + the device supports. A value of 0 means that the mounted device has no limit + on the number of sequential zone files that can be active. +* **nr_active_seq_files**: This attributes reports the current number of + sequential zone files that are active. If *max_active_seq_files* is not 0, + then the value of *nr_active_seq_files* can never exceed the value of + *nr_active_seq_files*, regardless of the use of the *explicit-open* mount + option. + Zonefs User Space Tools ======================= From ec194bdbc5f9268db15832ccc6699be4728101c9 Mon Sep 17 00:00:00 2001 From: Minghao Chi Date: Thu, 5 May 2022 02:21:33 +0000 Subject: [PATCH 19/26] ata: simplify the return expression of brcm_ahci_remove Simplify the return expression. Reported-by: Zeal Robot Signed-off-by: Minghao Chi Signed-off-by: Damien Le Moal --- drivers/ata/ahci_brcm.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index ab8552b1ff2a..f61795c546cf 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -549,15 +549,10 @@ static int brcm_ahci_remove(struct platform_device *pdev) struct ata_host *host = dev_get_drvdata(&pdev->dev); struct ahci_host_priv *hpriv = host->private_data; struct brcm_ahci_priv *priv = hpriv->plat_data; - int ret; brcm_sata_phys_disable(priv); - ret = ata_platform_remove_one(pdev); - if (ret) - return ret; - - return 0; + return ata_platform_remove_one(pdev); } static void brcm_ahci_shutdown(struct platform_device *pdev) From ef1429c0da2656e97b77066a5032ec238b4ead03 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 18 Mar 2022 19:24:51 +0900 Subject: [PATCH 20/26] ata: libata-core: cleanup ata_device_blacklist Remove the unneeded comma after the last field of the array entries. Signed-off-by: Damien Le Moal Reviewed-by: Sergey Shtylyov Reviewed-by: Hannes Reinecke --- drivers/ata/libata-core.c | 96 +++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 95bd028aea4f..b75c1a3e92d3 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3898,7 +3898,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* Devices where NCQ should be avoided */ /* NCQ is slow */ { "WDC WD740ADFD-00", NULL, ATA_HORKAGE_NONCQ }, - { "WDC WD740ADFD-00NLR1", NULL, ATA_HORKAGE_NONCQ, }, + { "WDC WD740ADFD-00NLR1", NULL, ATA_HORKAGE_NONCQ }, /* http://thread.gmane.org/gmane.linux.ide/14907 */ { "FUJITSU MHT2060BH", NULL, ATA_HORKAGE_NONCQ }, /* NCQ is broken */ @@ -3924,23 +3924,23 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* drives which fail FPDMA_AA activation (some may freeze afterwards) the ST disks also have LPM issues */ { "ST1000LM024 HN-M101MBB", NULL, ATA_HORKAGE_BROKEN_FPDMA_AA | - ATA_HORKAGE_NOLPM, }, + ATA_HORKAGE_NOLPM }, { "VB0250EAVER", "HPG7", ATA_HORKAGE_BROKEN_FPDMA_AA }, /* Blacklist entries taken from Silicon Image 3124/3132 Windows driver .inf file - also several Linux problem reports */ - { "HTS541060G9SA00", "MB3OC60D", ATA_HORKAGE_NONCQ, }, - { "HTS541080G9SA00", "MB4OC60D", ATA_HORKAGE_NONCQ, }, - { "HTS541010G9SA00", "MBZOC60D", ATA_HORKAGE_NONCQ, }, + { "HTS541060G9SA00", "MB3OC60D", ATA_HORKAGE_NONCQ }, + { "HTS541080G9SA00", "MB4OC60D", ATA_HORKAGE_NONCQ }, + { "HTS541010G9SA00", "MBZOC60D", ATA_HORKAGE_NONCQ }, /* https://bugzilla.kernel.org/show_bug.cgi?id=15573 */ - { "C300-CTFDDAC128MAG", "0001", ATA_HORKAGE_NONCQ, }, + { "C300-CTFDDAC128MAG", "0001", ATA_HORKAGE_NONCQ }, /* Sandisk SD7/8/9s lock up hard on large trims */ - { "SanDisk SD[789]*", NULL, ATA_HORKAGE_MAX_TRIM_128M, }, + { "SanDisk SD[789]*", NULL, ATA_HORKAGE_MAX_TRIM_128M }, /* devices which puke on READ_NATIVE_MAX */ - { "HDS724040KLSA80", "KFAOA20N", ATA_HORKAGE_BROKEN_HPA, }, + { "HDS724040KLSA80", "KFAOA20N", ATA_HORKAGE_BROKEN_HPA }, { "WDC WD3200JD-00KLB0", "WD-WCAMR1130137", ATA_HORKAGE_BROKEN_HPA }, { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA }, { "MAXTOR 6L080L4", "A93.0500", ATA_HORKAGE_BROKEN_HPA }, @@ -3949,22 +3949,22 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "OCZ-VERTEX", "1.30", ATA_HORKAGE_BROKEN_HPA }, /* Devices which report 1 sector over size HPA */ - { "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE, }, - { "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE, }, - { "ST310211A", NULL, ATA_HORKAGE_HPA_SIZE, }, + { "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE }, + { "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE }, + { "ST310211A", NULL, ATA_HORKAGE_HPA_SIZE }, /* Devices which get the IVB wrong */ - { "QUANTUM FIREBALLlct10 05", "A03.0900", ATA_HORKAGE_IVB, }, + { "QUANTUM FIREBALLlct10 05", "A03.0900", ATA_HORKAGE_IVB }, /* Maybe we should just blacklist TSSTcorp... */ - { "TSSTcorp CDDVDW SH-S202[HJN]", "SB0[01]", ATA_HORKAGE_IVB, }, + { "TSSTcorp CDDVDW SH-S202[HJN]", "SB0[01]", ATA_HORKAGE_IVB }, /* Devices that do not need bridging limits applied */ - { "MTRON MSP-SATA*", NULL, ATA_HORKAGE_BRIDGE_OK, }, - { "BUFFALO HD-QSU2/R5", NULL, ATA_HORKAGE_BRIDGE_OK, }, + { "MTRON MSP-SATA*", NULL, ATA_HORKAGE_BRIDGE_OK }, + { "BUFFALO HD-QSU2/R5", NULL, ATA_HORKAGE_BRIDGE_OK }, /* Devices which aren't very happy with higher link speeds */ - { "WD My Book", NULL, ATA_HORKAGE_1_5_GBPS, }, - { "Seagate FreeAgent GoFlex", NULL, ATA_HORKAGE_1_5_GBPS, }, + { "WD My Book", NULL, ATA_HORKAGE_1_5_GBPS }, + { "Seagate FreeAgent GoFlex", NULL, ATA_HORKAGE_1_5_GBPS }, /* * Devices which choke on SETXFER. Applies only if both the @@ -3982,57 +3982,57 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* 512GB MX100 with MU01 firmware has both queued TRIM and LPM issues */ { "Crucial_CT512MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM | - ATA_HORKAGE_NOLPM, }, + ATA_HORKAGE_NOLPM }, /* 512GB MX100 with newer firmware has only LPM issues */ { "Crucial_CT512MX100*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM | - ATA_HORKAGE_NOLPM, }, + ATA_HORKAGE_NOLPM }, /* 480GB+ M500 SSDs have both queued TRIM and LPM issues */ { "Crucial_CT480M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM | - ATA_HORKAGE_NOLPM, }, + ATA_HORKAGE_NOLPM }, { "Crucial_CT960M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM | - ATA_HORKAGE_NOLPM, }, + ATA_HORKAGE_NOLPM }, /* These specific Samsung models/firmware-revs do not handle LPM well */ - { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, - { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, }, - { "SAMSUNG MZ7TD256HAFV-000L9", NULL, ATA_HORKAGE_NOLPM, }, - { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM, }, + { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM }, + { "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM }, + { "SAMSUNG MZ7TD256HAFV-000L9", NULL, ATA_HORKAGE_NOLPM }, + { "SAMSUNG MZ7TE512HMHP-000L1", "EXT06L0Q", ATA_HORKAGE_NOLPM }, /* devices that don't properly handle queued TRIM commands */ { "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM }, { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM }, { "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM }, { "Micron_M5[15]0_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM }, { "Crucial_CT*M550*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM }, { "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM }, { "Samsung SSD 840 EVO*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_NO_DMA_LOG | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM }, { "Samsung SSD 840*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM }, { "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM }, { "Samsung SSD 860*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM | - ATA_HORKAGE_NO_NCQ_ON_ATI, }, + ATA_HORKAGE_NO_NCQ_ON_ATI }, { "Samsung SSD 870*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM | - ATA_HORKAGE_NO_NCQ_ON_ATI, }, + ATA_HORKAGE_NO_NCQ_ON_ATI }, { "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM }, /* devices that don't properly handle TRIM commands */ - { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, }, - { "M88V29*", NULL, ATA_HORKAGE_NOTRIM, }, + { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM }, + { "M88V29*", NULL, ATA_HORKAGE_NOTRIM }, /* * As defined, the DRAT (Deterministic Read After Trim) and RZAT @@ -4050,16 +4050,16 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { * The intel 510 drive has buggy DRAT/RZAT. Explicitly exclude * that model before whitelisting all other intel SSDs. */ - { "INTEL*SSDSC2MH*", NULL, 0, }, + { "INTEL*SSDSC2MH*", NULL, 0 }, - { "Micron*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "Crucial*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "INTEL*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "SSD*INTEL*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "Samsung*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "SAMSUNG*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "SAMSUNG*MZ7KM*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, - { "ST[1248][0248]0[FH]*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM, }, + { "Micron*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM }, + { "Crucial*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM }, + { "INTEL*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM }, + { "SSD*INTEL*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM }, + { "Samsung*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM }, + { "SAMSUNG*SSD*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM }, + { "SAMSUNG*MZ7KM*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM }, + { "ST[1248][0248]0[FH]*", NULL, ATA_HORKAGE_ZERO_AFTER_TRIM }, /* * Some WD SATA-I drives spin up and down erratically when the link From 168af4afd1fce013f677b224180a19e33bd7d40f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 18 Mar 2022 13:09:22 +0900 Subject: [PATCH 21/26] ata: libata-core: Refactor force_tbl definition Introduce the macro definitions force_cbl(), force_spd_limit(), force_xfer(), force_lflag(), force_horkage_on() and force_horkage_onoff() to define with a more compact syntax the struct ata_force_param entries in the force_tbl array defined in the function ata_parse_force_one(). To reduce the indentation of the array declaration, force_tbl definition is also moved out of the ata_parse_force_one() function. The entries are also reordered to group them by type of the quirck that is applied. Finally, fix a comment in ata_parse_force_param() incorrectly referencing force_tbl instead of ata_force_tbl. Signed-off-by: Damien Le Moal Reviewed-by: Sergey Shtylyov Reviewed-by: Hannes Reinecke --- drivers/ata/libata-core.c | 139 ++++++++++++++++++++++---------------- 1 file changed, 81 insertions(+), 58 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index b75c1a3e92d3..0e9d24d1bc8e 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6100,67 +6100,90 @@ int ata_platform_remove_one(struct platform_device *pdev) EXPORT_SYMBOL_GPL(ata_platform_remove_one); #ifdef CONFIG_ATA_FORCE + +#define force_cbl(name, flag) \ + { #name, .cbl = (flag) } + +#define force_spd_limit(spd, val) \ + { #spd, .spd_limit = (val) } + +#define force_xfer(mode, shift) \ + { #mode, .xfer_mask = (1UL << (shift)) } + +#define force_lflag(name, flags) \ + { #name, .lflags = (flags) } + +#define force_horkage_on(name, flag) \ + { #name, .horkage_on = (flag) } + +#define force_horkage_onoff(name, flag) \ + { "no" #name, .horkage_on = (flag) }, \ + { #name, .horkage_off = (flag) } + +static const struct ata_force_param force_tbl[] __initconst = { + force_cbl(40c, ATA_CBL_PATA40), + force_cbl(80c, ATA_CBL_PATA80), + force_cbl(short40c, ATA_CBL_PATA40_SHORT), + force_cbl(unk, ATA_CBL_PATA_UNK), + force_cbl(ign, ATA_CBL_PATA_IGN), + force_cbl(sata, ATA_CBL_SATA), + + force_spd_limit(1.5Gbps, 1), + force_spd_limit(3.0Gbps, 2), + + force_xfer(pio0, ATA_SHIFT_PIO + 0), + force_xfer(pio1, ATA_SHIFT_PIO + 1), + force_xfer(pio2, ATA_SHIFT_PIO + 2), + force_xfer(pio3, ATA_SHIFT_PIO + 3), + force_xfer(pio4, ATA_SHIFT_PIO + 4), + force_xfer(pio5, ATA_SHIFT_PIO + 5), + force_xfer(pio6, ATA_SHIFT_PIO + 6), + force_xfer(mwdma0, ATA_SHIFT_MWDMA + 0), + force_xfer(mwdma1, ATA_SHIFT_MWDMA + 1), + force_xfer(mwdma2, ATA_SHIFT_MWDMA + 2), + force_xfer(mwdma3, ATA_SHIFT_MWDMA + 3), + force_xfer(mwdma4, ATA_SHIFT_MWDMA + 4), + force_xfer(udma0, ATA_SHIFT_UDMA + 0), + force_xfer(udma16, ATA_SHIFT_UDMA + 0), + force_xfer(udma/16, ATA_SHIFT_UDMA + 0), + force_xfer(udma1, ATA_SHIFT_UDMA + 1), + force_xfer(udma25, ATA_SHIFT_UDMA + 1), + force_xfer(udma/25, ATA_SHIFT_UDMA + 1), + force_xfer(udma2, ATA_SHIFT_UDMA + 2), + force_xfer(udma33, ATA_SHIFT_UDMA + 2), + force_xfer(udma/33, ATA_SHIFT_UDMA + 2), + force_xfer(udma3, ATA_SHIFT_UDMA + 3), + force_xfer(udma44, ATA_SHIFT_UDMA + 3), + force_xfer(udma/44, ATA_SHIFT_UDMA + 3), + force_xfer(udma4, ATA_SHIFT_UDMA + 4), + force_xfer(udma66, ATA_SHIFT_UDMA + 4), + force_xfer(udma/66, ATA_SHIFT_UDMA + 4), + force_xfer(udma5, ATA_SHIFT_UDMA + 5), + force_xfer(udma100, ATA_SHIFT_UDMA + 5), + force_xfer(udma/100, ATA_SHIFT_UDMA + 5), + force_xfer(udma6, ATA_SHIFT_UDMA + 6), + force_xfer(udma133, ATA_SHIFT_UDMA + 6), + force_xfer(udma/133, ATA_SHIFT_UDMA + 6), + force_xfer(udma7, ATA_SHIFT_UDMA + 7), + + force_lflag(nohrst, ATA_LFLAG_NO_HRST), + force_lflag(nosrst, ATA_LFLAG_NO_SRST), + force_lflag(norst, ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST), + force_lflag(rstonce, ATA_LFLAG_RST_ONCE), + + force_horkage_onoff(ncq, ATA_HORKAGE_NONCQ), + force_horkage_onoff(ncqtrim, ATA_HORKAGE_NO_NCQ_TRIM), + force_horkage_onoff(ncqati, ATA_HORKAGE_NO_NCQ_ON_ATI), + + force_horkage_on(dump_id, ATA_HORKAGE_DUMP_ID), + force_horkage_on(atapi_dmadir, ATA_HORKAGE_ATAPI_DMADIR), + force_horkage_on(disable, ATA_HORKAGE_DISABLE) +}; + static int __init ata_parse_force_one(char **cur, struct ata_force_ent *force_ent, const char **reason) { - static const struct ata_force_param force_tbl[] __initconst = { - { "40c", .cbl = ATA_CBL_PATA40 }, - { "80c", .cbl = ATA_CBL_PATA80 }, - { "short40c", .cbl = ATA_CBL_PATA40_SHORT }, - { "unk", .cbl = ATA_CBL_PATA_UNK }, - { "ign", .cbl = ATA_CBL_PATA_IGN }, - { "sata", .cbl = ATA_CBL_SATA }, - { "1.5Gbps", .spd_limit = 1 }, - { "3.0Gbps", .spd_limit = 2 }, - { "noncq", .horkage_on = ATA_HORKAGE_NONCQ }, - { "ncq", .horkage_off = ATA_HORKAGE_NONCQ }, - { "noncqtrim", .horkage_on = ATA_HORKAGE_NO_NCQ_TRIM }, - { "ncqtrim", .horkage_off = ATA_HORKAGE_NO_NCQ_TRIM }, - { "noncqati", .horkage_on = ATA_HORKAGE_NO_NCQ_ON_ATI }, - { "ncqati", .horkage_off = ATA_HORKAGE_NO_NCQ_ON_ATI }, - { "dump_id", .horkage_on = ATA_HORKAGE_DUMP_ID }, - { "pio0", .xfer_mask = 1 << (ATA_SHIFT_PIO + 0) }, - { "pio1", .xfer_mask = 1 << (ATA_SHIFT_PIO + 1) }, - { "pio2", .xfer_mask = 1 << (ATA_SHIFT_PIO + 2) }, - { "pio3", .xfer_mask = 1 << (ATA_SHIFT_PIO + 3) }, - { "pio4", .xfer_mask = 1 << (ATA_SHIFT_PIO + 4) }, - { "pio5", .xfer_mask = 1 << (ATA_SHIFT_PIO + 5) }, - { "pio6", .xfer_mask = 1 << (ATA_SHIFT_PIO + 6) }, - { "mwdma0", .xfer_mask = 1 << (ATA_SHIFT_MWDMA + 0) }, - { "mwdma1", .xfer_mask = 1 << (ATA_SHIFT_MWDMA + 1) }, - { "mwdma2", .xfer_mask = 1 << (ATA_SHIFT_MWDMA + 2) }, - { "mwdma3", .xfer_mask = 1 << (ATA_SHIFT_MWDMA + 3) }, - { "mwdma4", .xfer_mask = 1 << (ATA_SHIFT_MWDMA + 4) }, - { "udma0", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 0) }, - { "udma16", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 0) }, - { "udma/16", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 0) }, - { "udma1", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 1) }, - { "udma25", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 1) }, - { "udma/25", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 1) }, - { "udma2", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 2) }, - { "udma33", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 2) }, - { "udma/33", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 2) }, - { "udma3", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 3) }, - { "udma44", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 3) }, - { "udma/44", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 3) }, - { "udma4", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 4) }, - { "udma66", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 4) }, - { "udma/66", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 4) }, - { "udma5", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 5) }, - { "udma100", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 5) }, - { "udma/100", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 5) }, - { "udma6", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 6) }, - { "udma133", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 6) }, - { "udma/133", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 6) }, - { "udma7", .xfer_mask = 1 << (ATA_SHIFT_UDMA + 7) }, - { "nohrst", .lflags = ATA_LFLAG_NO_HRST }, - { "nosrst", .lflags = ATA_LFLAG_NO_SRST }, - { "norst", .lflags = ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST }, - { "rstonce", .lflags = ATA_LFLAG_RST_ONCE }, - { "atapi_dmadir", .horkage_on = ATA_HORKAGE_ATAPI_DMADIR }, - { "disable", .horkage_on = ATA_HORKAGE_DISABLE }, - }; char *start = *cur, *p = *cur; char *id, *val, *endp; const struct ata_force_param *match_fp = NULL; @@ -6242,7 +6265,7 @@ static void __init ata_parse_force_param(void) int last_port = -1, last_device = -1; char *p, *cur, *next; - /* calculate maximum number of params and allocate force_tbl */ + /* Calculate maximum number of params and allocate ata_force_tbl */ for (p = ata_force_param_buf; *p; p++) if (*p == ',') size++; From 3af9ca4d341d2b8756fa9056ca0715915480e251 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 7 Apr 2022 15:05:59 +0900 Subject: [PATCH 22/26] ata: libata-core: Improve link flags forced settings Similarly to the horkage flags, introduce the force_lflag_onoff() macro to define struct ata_force_param entries of the force_tbl array that allow turning on or off a link flag using the libata.force boot parameter. To be consistent with naming, the macro force_lflag() is renamed to force_lflag_on(). Using force_lflag_onoff(), define a new force_tbl entry for the ATA_LFLAG_NO_DEBOUNCE_DELAY link flag, thus allowing testing if an adapter requires a link debounce delay or not. Signed-off-by: Damien Le Moal Reviewed-by: Sergey Shtylyov Reviewed-by: Hannes Reinecke --- drivers/ata/libata-core.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 0e9d24d1bc8e..78591ba91851 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -96,7 +96,8 @@ struct ata_force_param { unsigned long xfer_mask; unsigned int horkage_on; unsigned int horkage_off; - u16 lflags; + u16 lflags_on; + u16 lflags_off; }; struct ata_force_ent { @@ -386,11 +387,17 @@ static void ata_force_link_limits(struct ata_link *link) } /* let lflags stack */ - if (fe->param.lflags) { - link->flags |= fe->param.lflags; + if (fe->param.lflags_on) { + link->flags |= fe->param.lflags_on; ata_link_notice(link, "FORCE: link flag 0x%x forced -> 0x%x\n", - fe->param.lflags, link->flags); + fe->param.lflags_on, link->flags); + } + if (fe->param.lflags_off) { + link->flags &= ~fe->param.lflags_off; + ata_link_notice(link, + "FORCE: link flag 0x%x cleared -> 0x%x\n", + fe->param.lflags_off, link->flags); } } } @@ -6110,8 +6117,12 @@ EXPORT_SYMBOL_GPL(ata_platform_remove_one); #define force_xfer(mode, shift) \ { #mode, .xfer_mask = (1UL << (shift)) } -#define force_lflag(name, flags) \ - { #name, .lflags = (flags) } +#define force_lflag_on(name, flags) \ + { #name, .lflags_on = (flags) } + +#define force_lflag_onoff(name, flags) \ + { "no" #name, .lflags_on = (flags) }, \ + { #name, .lflags_off = (flags) } #define force_horkage_on(name, flag) \ { #name, .horkage_on = (flag) } @@ -6166,10 +6177,11 @@ static const struct ata_force_param force_tbl[] __initconst = { force_xfer(udma/133, ATA_SHIFT_UDMA + 6), force_xfer(udma7, ATA_SHIFT_UDMA + 7), - force_lflag(nohrst, ATA_LFLAG_NO_HRST), - force_lflag(nosrst, ATA_LFLAG_NO_SRST), - force_lflag(norst, ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST), - force_lflag(rstonce, ATA_LFLAG_RST_ONCE), + force_lflag_on(nohrst, ATA_LFLAG_NO_HRST), + force_lflag_on(nosrst, ATA_LFLAG_NO_SRST), + force_lflag_on(norst, ATA_LFLAG_NO_HRST | ATA_LFLAG_NO_SRST), + force_lflag_on(rstonce, ATA_LFLAG_RST_ONCE), + force_lflag_onoff(dbdelay, ATA_LFLAG_NO_DEBOUNCE_DELAY), force_horkage_onoff(ncq, ATA_HORKAGE_NONCQ), force_horkage_onoff(ncqtrim, ATA_HORKAGE_NO_NCQ_TRIM), From 2c33bbdac28c0cddd5a3d5e5e1bf05d440e4fe7b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 18 Mar 2022 14:03:06 +0900 Subject: [PATCH 23/26] ata: libata-core: Allow forcing most horkage flags To facilitate debugging of drive issues in the field without kernel changes (e.g. temporary test patches), add an entry for most horkage flags in the force_tbl array to allow controlling these horkage settings with the libata.force kernel boot parameter. Signed-off-by: Damien Le Moal Reviewed-by: Sergey Shtylyov Reviewed-by: Hannes Reinecke --- drivers/ata/libata-core.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 78591ba91851..40e816419f48 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6187,9 +6187,27 @@ static const struct ata_force_param force_tbl[] __initconst = { force_horkage_onoff(ncqtrim, ATA_HORKAGE_NO_NCQ_TRIM), force_horkage_onoff(ncqati, ATA_HORKAGE_NO_NCQ_ON_ATI), - force_horkage_on(dump_id, ATA_HORKAGE_DUMP_ID), + force_horkage_onoff(trim, ATA_HORKAGE_NOTRIM), + force_horkage_on(trim_zero, ATA_HORKAGE_ZERO_AFTER_TRIM), + force_horkage_on(max_trim_128m, ATA_HORKAGE_MAX_TRIM_128M), + + force_horkage_onoff(dma, ATA_HORKAGE_NODMA), force_horkage_on(atapi_dmadir, ATA_HORKAGE_ATAPI_DMADIR), - force_horkage_on(disable, ATA_HORKAGE_DISABLE) + force_horkage_on(atapi_mod16_dma, ATA_HORKAGE_ATAPI_MOD16_DMA), + + force_horkage_onoff(dmalog, ATA_HORKAGE_NO_DMA_LOG), + force_horkage_onoff(iddevlog, ATA_HORKAGE_NO_ID_DEV_LOG), + force_horkage_onoff(logdir, ATA_HORKAGE_NO_LOG_DIR), + + force_horkage_on(max_sec_128, ATA_HORKAGE_MAX_SEC_128), + force_horkage_on(max_sec_1024, ATA_HORKAGE_MAX_SEC_1024), + force_horkage_on(max_sec_lba48, ATA_HORKAGE_MAX_SEC_LBA48), + + force_horkage_onoff(lpm, ATA_HORKAGE_NOLPM), + force_horkage_onoff(setxfer, ATA_HORKAGE_NOSETXFER), + force_horkage_on(dump_id, ATA_HORKAGE_DUMP_ID), + + force_horkage_on(disable, ATA_HORKAGE_DISABLE), }; static int __init ata_parse_force_one(char **cur, From fa82cabb888316fc419affb4b0cd91d671c5011b Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 18 Mar 2022 19:26:54 +0900 Subject: [PATCH 24/26] doc: admin-guide: Update libata kernel parameters Cleanup the text text describing the libata.force boot parameter and update the list of the values to include all supported horkage and link flag that can be forced. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke --- .../admin-guide/kernel-parameters.txt | 71 ++++++++++++++----- 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 3f1cc5e317ed..00fb37cab649 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2622,14 +2622,14 @@ when set. Format: - libata.force= [LIBATA] Force configurations. The format is comma- - separated list of "[ID:]VAL" where ID is - PORT[.DEVICE]. PORT and DEVICE are decimal numbers - matching port, link or device. Basically, it matches - the ATA ID string printed on console by libata. If - the whole ID part is omitted, the last PORT and DEVICE - values are used. If ID hasn't been specified yet, the - configuration applies to all ports, links and devices. + libata.force= [LIBATA] Force configurations. The format is a comma- + separated list of "[ID:]VAL" where ID is PORT[.DEVICE]. + PORT and DEVICE are decimal numbers matching port, link + or device. Basically, it matches the ATA ID string + printed on console by libata. If the whole ID part is + omitted, the last PORT and DEVICE values are used. If + ID hasn't been specified yet, the configuration applies + to all ports, links and devices. If only DEVICE is omitted, the parameter applies to the port and all links and devices behind it. DEVICE @@ -2639,7 +2639,7 @@ host link and device attached to it. The VAL specifies the configuration to force. As long - as there's no ambiguity shortcut notation is allowed. + as there is no ambiguity, shortcut notation is allowed. For example, both 1.5 and 1.5G would work for 1.5Gbps. The following configurations can be forced. @@ -2652,19 +2652,58 @@ udma[/][16,25,33,44,66,100,133] notation is also allowed. + * nohrst, nosrst, norst: suppress hard, soft and both + resets. + + * rstonce: only attempt one reset during hot-unplug + link recovery. + + * [no]dbdelay: Enable or disable the extra 200ms delay + before debouncing a link PHY and device presence + detection. + * [no]ncq: Turn on or off NCQ. - * [no]ncqtrim: Turn off queued DSM TRIM. + * [no]ncqtrim: Enable or disable queued DSM TRIM. - * nohrst, nosrst, norst: suppress hard, soft - and both resets. + * [no]ncqati: Enable or disable NCQ trim on ATI chipset. - * rstonce: only attempt one reset during - hot-unplug link recovery + * [no]trim: Enable or disable (unqueued) TRIM. - * dump_id: dump IDENTIFY data. + * trim_zero: Indicate that TRIM command zeroes data. - * atapi_dmadir: Enable ATAPI DMADIR bridge support + * max_trim_128m: Set 128M maximum trim size limit. + + * [no]dma: Turn on or off DMA transfers. + + * atapi_dmadir: Enable ATAPI DMADIR bridge support. + + * atapi_mod16_dma: Enable the use of ATAPI DMA for + commands that are not a multiple of 16 bytes. + + * [no]dmalog: Enable or disable the use of the + READ LOG DMA EXT command to access logs. + + * [no]iddevlog: Enable or disable access to the + identify device data log. + + * [no]logdir: Enable or disable access to the general + purpose log directory. + + * max_sec_128: Set transfer size limit to 128 sectors. + + * max_sec_1024: Set or clear transfer size limit to + 1024 sectors. + + * max_sec_lba48: Set or clear transfer size limit to + 65535 sectors. + + * [no]lpm: Enable or disable link power management. + + * [no]setxfer: Indicate if transfer speed mode setting + should be skipped. + + * dump_id: Dump IDENTIFY data. * disable: Disable this device. From 9e4a51ad8eee1d263666fd31ced39bd8e3770822 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 May 2022 10:53:41 +0200 Subject: [PATCH 25/26] debugobjects: Convert to SPDX license identifier Signed-off-by: Thomas Gleixner Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/87v8udpy3u.ffs@tglx --- lib/debugobjects.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 6946f8e204e3..337d797a7141 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -1,11 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Generic infrastructure for lifetime debugging of objects. * - * Started by Thomas Gleixner - * * Copyright (C) 2008, Thomas Gleixner - * - * For licencing details see kernel-base/COPYING */ #define pr_fmt(fmt) "ODEBUG: " fmt From 71abb4df29815d17f3dff98dce553fc1732d9738 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Fri, 13 May 2022 15:55:54 +0800 Subject: [PATCH 26/26] ata: pata_ftide010: Remove unneeded ERROR check before clk_disable_unprepare ERROR check is already in clk_disable() and clk_unprepare() by using IS_ERR_OR_NULL. Remove unneeded ERROR check for ftide->pclk here. Signed-off-by: Wan Jiabing Reviewed-by: Sergey Shtylyov Reviewed-by: Linus Walleij Signed-off-by: Damien Le Moal --- drivers/ata/pata_ftide010.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/ata/pata_ftide010.c b/drivers/ata/pata_ftide010.c index 2e35505b683c..0117df0fe3c5 100644 --- a/drivers/ata/pata_ftide010.c +++ b/drivers/ata/pata_ftide010.c @@ -536,8 +536,8 @@ static int pata_ftide010_probe(struct platform_device *pdev) return 0; err_dis_clk: - if (!IS_ERR(ftide->pclk)) - clk_disable_unprepare(ftide->pclk); + clk_disable_unprepare(ftide->pclk); + return ret; } @@ -547,8 +547,7 @@ static int pata_ftide010_remove(struct platform_device *pdev) struct ftide010 *ftide = host->private_data; ata_host_detach(ftide->host); - if (!IS_ERR(ftide->pclk)) - clk_disable_unprepare(ftide->pclk); + clk_disable_unprepare(ftide->pclk); return 0; }