diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 0b6a2e6e8fbb..7c2b846521f3 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -1,3 +1,12 @@ +What: /sys/bus/cxl/flush +Date: Januarry, 2022 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) If userspace manually unbinds a port the kernel schedules + all descendant memdevs for unbind. Writing '1' to this attribute + flushes that work. + What: /sys/bus/cxl/devices/memX/firmware_version Date: December, 2020 KernelVersion: v5.12 @@ -25,6 +34,24 @@ Description: identically named field in the Identify Memory Device Output Payload in the CXL-2.0 specification. +What: /sys/bus/cxl/devices/memX/serial +Date: January, 2022 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) 64-bit serial number per the PCIe Device Serial Number + capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2 + Memory Device PCIe Capabilities and Extended Capabilities. + +What: /sys/bus/cxl/devices/memX/numa_node +Date: January, 2022 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) If NUMA is enabled and the platform has affinitized the + host PCI device for this memory device, emit the CPU node + affinity for this device. + What: /sys/bus/cxl/devices/*/devtype Date: June, 2021 KernelVersion: v5.14 @@ -34,6 +61,15 @@ Description: the same value communicated in the DEVTYPE environment variable for uevents for devices on the "cxl" bus. +What: /sys/bus/cxl/devices/*/modalias +Date: December, 2021 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + CXL device objects export the modalias attribute which mirrors + the same value communicated in the MODALIAS environment variable + for uevents for devices on the "cxl" bus. + What: /sys/bus/cxl/devices/portX/uport Date: June, 2021 KernelVersion: v5.14 diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst index 3b8f41395f6b..db476bb170b6 100644 --- a/Documentation/driver-api/cxl/memory-devices.rst +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -14,6 +14,303 @@ that optionally define a device's contribution to an interleaved address range across multiple devices underneath a host-bridge or interleaved across host-bridges. +CXL Bus: Theory of Operation +============================ +Similar to how a RAID driver takes disk objects and assembles them into a new +logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and +assemble them into a CXL.mem decode topology. The need for runtime configuration +of the CXL.mem topology is also similar to RAID in that different environments +with the same hardware configuration may decide to assemble the topology in +contrasting ways. One may choose performance (RAID0) striping memory across +multiple Host Bridges and endpoints while another may opt for fault tolerance +and disable any striping in the CXL.mem topology. + +Platform firmware enumerates a menu of interleave options at the "CXL root port" +(Linux term for the top of the CXL decode topology). From there, PCIe topology +dictates which endpoints can participate in which Host Bridge decode regimes. +Each PCIe Switch in the path between the root and an endpoint introduces a point +at which the interleave can be split. For example platform firmware may say at a +given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn +interleave cycles across multiple Root Ports. An intervening Switch between a +port and an endpoint may interleave cycles across multiple Downstream Switch +Ports, etc. + +Here is a sample listing of a CXL topology defined by 'cxl_test'. The 'cxl_test' +module generates an emulated CXL topology of 2 Host Bridges each with 2 Root +Ports. Each of those Root Ports are connected to 2-way switches with endpoints +connected to those downstream ports for a total of 8 endpoints:: + + # cxl list -BEMPu -b cxl_test + { + "bus":"root3", + "provider":"cxl_test", + "ports:root3":[ + { + "port":"port5", + "host":"cxl_host_bridge.1", + "ports:port5":[ + { + "port":"port8", + "host":"cxl_switch_uport.1", + "endpoints:port8":[ + { + "endpoint":"endpoint9", + "host":"mem2", + "memdev":{ + "memdev":"mem2", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x1", + "numa_node":1, + "host":"cxl_mem.1" + } + }, + { + "endpoint":"endpoint15", + "host":"mem6", + "memdev":{ + "memdev":"mem6", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x5", + "numa_node":1, + "host":"cxl_mem.5" + } + } + ] + }, + { + "port":"port12", + "host":"cxl_switch_uport.3", + "endpoints:port12":[ + { + "endpoint":"endpoint17", + "host":"mem8", + "memdev":{ + "memdev":"mem8", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x7", + "numa_node":1, + "host":"cxl_mem.7" + } + }, + { + "endpoint":"endpoint13", + "host":"mem4", + "memdev":{ + "memdev":"mem4", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x3", + "numa_node":1, + "host":"cxl_mem.3" + } + } + ] + } + ] + }, + { + "port":"port4", + "host":"cxl_host_bridge.0", + "ports:port4":[ + { + "port":"port6", + "host":"cxl_switch_uport.0", + "endpoints:port6":[ + { + "endpoint":"endpoint7", + "host":"mem1", + "memdev":{ + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0", + "numa_node":0, + "host":"cxl_mem.0" + } + }, + { + "endpoint":"endpoint14", + "host":"mem5", + "memdev":{ + "memdev":"mem5", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x4", + "numa_node":0, + "host":"cxl_mem.4" + } + } + ] + }, + { + "port":"port10", + "host":"cxl_switch_uport.2", + "endpoints:port10":[ + { + "endpoint":"endpoint16", + "host":"mem7", + "memdev":{ + "memdev":"mem7", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x6", + "numa_node":0, + "host":"cxl_mem.6" + } + }, + { + "endpoint":"endpoint11", + "host":"mem3", + "memdev":{ + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + } + ] + } + ] + } + ] + } + +In that listing each "root", "port", and "endpoint" object correspond a kernel +'struct cxl_port' object. A 'cxl_port' is a device that can decode CXL.mem to +its descendants. So "root" claims non-PCIe enumerable platform decode ranges and +decodes them to "ports", "ports" decode to "endpoints", and "endpoints" +represent the decode from SPA (System Physical Address) to DPA (Device Physical +Address). + +Continuing the RAID analogy, disks have both topology metadata and on device +metadata that determine RAID set assembly. CXL Port topology and CXL Port link +status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated +by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches +the cxl_pci driver to a CXL Memory Expander there is no role for CXL Port +objects. Conversely for hot-unplug / removal scenarios, there is no need for +the Linux PCI core to tear down switch-level CXL resources because the endpoint +->remove() event cleans up the port data that was established to support that +Memory Expander. + +The port metadata and potential decode schemes that a give memory device may +participate can be determined via a command like:: + + # cxl list -BDMu -d root -m mem3 + { + "bus":"root3", + "provider":"cxl_test", + "decoders:root3":[ + { + "decoder":"decoder3.1", + "resource":"0x8030000000", + "size":"512.00 MiB (536.87 MB)", + "volatile_capable":true, + "nr_targets":2 + }, + { + "decoder":"decoder3.3", + "resource":"0x8060000000", + "size":"512.00 MiB (536.87 MB)", + "pmem_capable":true, + "nr_targets":2 + }, + { + "decoder":"decoder3.0", + "resource":"0x8020000000", + "size":"256.00 MiB (268.44 MB)", + "volatile_capable":true, + "nr_targets":1 + }, + { + "decoder":"decoder3.2", + "resource":"0x8050000000", + "size":"256.00 MiB (268.44 MB)", + "pmem_capable":true, + "nr_targets":1 + } + ], + "memdevs:root3":[ + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + ] + } + +...which queries the CXL topology to ask "given CXL Memory Expander with a kernel +device name of 'mem3' which platform level decode ranges may this device +participate". A given expander can participate in multiple CXL.mem interleave +sets simultaneously depending on how many decoder resource it has. In this +example mem3 can participate in one or more of a PMEM interleave that spans to +Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile +memory interleave that spans 2 Host Bridges, and a Volatile memory interleave +that only targets a single Host Bridge. + +Conversely the memory devices that can participate in a given platform level +decode scheme can be determined via a command like the following:: + + # cxl list -MDu -d 3.2 + [ + { + "memdevs":[ + { + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0", + "numa_node":0, + "host":"cxl_mem.0" + }, + { + "memdev":"mem5", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x4", + "numa_node":0, + "host":"cxl_mem.4" + }, + { + "memdev":"mem7", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x6", + "numa_node":0, + "host":"cxl_mem.6" + }, + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + ] + }, + { + "root decoders":[ + { + "decoder":"decoder3.2", + "resource":"0x8050000000", + "size":"256.00 MiB (268.44 MB)", + "pmem_capable":true, + "nr_targets":1 + } + ] + } + ] + +...where the naming scheme for decoders is "decoder.". + Driver Infrastructure ===================== @@ -28,6 +325,14 @@ CXL Memory Device .. kernel-doc:: drivers/cxl/pci.c :internal: +.. kernel-doc:: drivers/cxl/mem.c + :doc: cxl mem + +CXL Port +-------- +.. kernel-doc:: drivers/cxl/port.c + :doc: cxl port + CXL Core -------- .. kernel-doc:: drivers/cxl/cxl.h @@ -36,10 +341,16 @@ CXL Core .. kernel-doc:: drivers/cxl/cxl.h :internal: -.. kernel-doc:: drivers/cxl/core/bus.c +.. kernel-doc:: drivers/cxl/core/port.c :doc: cxl core -.. kernel-doc:: drivers/cxl/core/bus.c +.. kernel-doc:: drivers/cxl/core/port.c + :identifiers: + +.. kernel-doc:: drivers/cxl/core/pci.c + :doc: cxl core pci + +.. kernel-doc:: drivers/cxl/core/pci.c :identifiers: .. kernel-doc:: drivers/cxl/core/pmem.c diff --git a/MAINTAINERS b/MAINTAINERS index 42643062949b..227708eef4e7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4456,6 +4456,7 @@ F: drivers/power/supply/cw2015_battery.c CEPH COMMON CODE (LIBCEPH) M: Ilya Dryomov M: Jeff Layton +M: Xiubo Li L: ceph-devel@vger.kernel.org S: Supported W: http://ceph.com/ @@ -4466,6 +4467,7 @@ F: net/ceph/ CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH) M: Jeff Layton +M: Xiubo Li M: Ilya Dryomov L: ceph-devel@vger.kernel.org S: Supported diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 67c91378f2dd..b88ab956bb7c 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -13,25 +13,26 @@ menuconfig CXL_BUS if CXL_BUS -config CXL_MEM - tristate "CXL.mem: Memory Devices" +config CXL_PCI + tristate "PCI manageability" default CXL_BUS help - The CXL.mem protocol allows a device to act as a provider of - "System RAM" and/or "Persistent Memory" that is fully coherent - as if the memory was attached to the typical CPU memory - controller. + The CXL specification defines a "CXL memory device" sub-class in the + PCI "memory controller" base class of devices. Device's identified by + this class code provide support for volatile and / or persistent + memory to be mapped into the system address map (Host-managed Device + Memory (HDM)). - Say 'y/m' to enable a driver that will attach to CXL.mem devices for - configuration and management primarily via the mailbox interface. See - Chapter 2.3 Type 3 CXL Device in the CXL 2.0 specification for more - details. + Say 'y/m' to enable a driver that will attach to CXL memory expander + devices enumerated by the memory device class code for configuration + and management primarily via the mailbox interface. See Chapter 2.3 + Type 3 CXL Device in the CXL 2.0 specification for more details. If unsure say 'm'. config CXL_MEM_RAW_COMMANDS bool "RAW Command Interface for Memory Devices" - depends on CXL_MEM + depends on CXL_PCI help Enable CXL RAW command interface. @@ -76,4 +77,25 @@ config CXL_PMEM provisioning the persistent memory capacity of CXL memory expanders. If unsure say 'm'. + +config CXL_MEM + tristate "CXL: Memory Expansion" + depends on CXL_PCI + default CXL_BUS + help + The CXL.mem protocol allows a device to act as a provider of "System + RAM" and/or "Persistent Memory" that is fully coherent as if the + memory were attached to the typical CPU memory controller. This is + known as HDM "Host-managed Device Memory". + + Say 'y/m' to enable a driver that will attach to CXL.mem devices for + memory expansion and control of HDM. See Chapter 9.13 in the CXL 2.0 + specification for a detailed description of HDM. + + If unsure say 'm'. + +config CXL_PORT + default CXL_BUS + tristate + endif diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index d1aaabc940f3..ce267ef11d93 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -1,9 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CXL_BUS) += core/ -obj-$(CONFIG_CXL_MEM) += cxl_pci.o +obj-$(CONFIG_CXL_PCI) += cxl_pci.o +obj-$(CONFIG_CXL_MEM) += cxl_mem.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o +obj-$(CONFIG_CXL_PORT) += cxl_port.o +cxl_mem-y := mem.o cxl_pci-y := pci.o cxl_acpi-y := acpi.o cxl_pmem-y := pmem.o +cxl_port-y := port.o diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 3163167ecc3a..d15a6aec0331 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -6,6 +6,7 @@ #include #include #include +#include "cxlpci.h" #include "cxl.h" /* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ @@ -14,7 +15,7 @@ static unsigned long cfmws_to_decoder_flags(int restrictions) { - unsigned long flags = 0; + unsigned long flags = CXL_DECODER_F_ENABLE; if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2) flags |= CXL_DECODER_F_TYPE2; @@ -101,16 +102,14 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, for (i = 0; i < CFMWS_INTERLEAVE_WAYS(cfmws); i++) target_map[i] = cfmws->interleave_targets[i]; - cxld = cxl_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws)); + cxld = cxl_root_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws)); if (IS_ERR(cxld)) return 0; cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); cxld->target_type = CXL_DECODER_EXPANDER; - cxld->range = (struct range){ - .start = cfmws->base_hpa, - .end = cfmws->base_hpa + cfmws->window_size - 1, - }; + cxld->platform_res = (struct resource)DEFINE_RES_MEM(cfmws->base_hpa, + cfmws->window_size); cxld->interleave_ways = CFMWS_INTERLEAVE_WAYS(cfmws); cxld->interleave_granularity = CFMWS_INTERLEAVE_GRANULARITY(cfmws); @@ -120,67 +119,17 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, else rc = cxl_decoder_autoremove(dev, cxld); if (rc) { - dev_err(dev, "Failed to add decoder for %#llx-%#llx\n", - cfmws->base_hpa, - cfmws->base_hpa + cfmws->window_size - 1); + dev_err(dev, "Failed to add decoder for %pr\n", + &cxld->platform_res); return 0; } - dev_dbg(dev, "add: %s node: %d range %#llx-%#llx\n", - dev_name(&cxld->dev), phys_to_target_node(cxld->range.start), - cfmws->base_hpa, cfmws->base_hpa + cfmws->window_size - 1); + dev_dbg(dev, "add: %s node: %d range %pr\n", dev_name(&cxld->dev), + phys_to_target_node(cxld->platform_res.start), + &cxld->platform_res); return 0; } -__mock int match_add_root_ports(struct pci_dev *pdev, void *data) -{ - struct cxl_walk_context *ctx = data; - struct pci_bus *root_bus = ctx->root; - struct cxl_port *port = ctx->port; - int type = pci_pcie_type(pdev); - struct device *dev = ctx->dev; - u32 lnkcap, port_num; - int rc; - - if (pdev->bus != root_bus) - return 0; - if (!pci_is_pcie(pdev)) - return 0; - if (type != PCI_EXP_TYPE_ROOT_PORT) - return 0; - if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, - &lnkcap) != PCIBIOS_SUCCESSFUL) - return 0; - - /* TODO walk DVSEC to find component register base */ - port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE); - if (rc) { - ctx->error = rc; - return rc; - } - ctx->count++; - - dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); - - return 0; -} - -static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev) -{ - struct cxl_dport *dport; - - device_lock(&port->dev); - list_for_each_entry(dport, &port->dports, list) - if (dport->dport == dev) { - device_unlock(&port->dev); - return dport; - } - - device_unlock(&port->dev); - return NULL; -} - __mock struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) { @@ -204,83 +153,35 @@ static int add_host_bridge_uport(struct device *match, void *arg) struct device *host = root_port->dev.parent; struct acpi_device *bridge = to_cxl_host_bridge(host, match); struct acpi_pci_root *pci_root; - struct cxl_walk_context ctx; - int single_port_map[1], rc; - struct cxl_decoder *cxld; struct cxl_dport *dport; struct cxl_port *port; + int rc; if (!bridge) return 0; - dport = find_dport_by_dev(root_port, match); + dport = cxl_find_dport_by_dev(root_port, match); if (!dport) { dev_dbg(host, "host bridge expected and not found\n"); return 0; } + /* + * Note that this lookup already succeeded in + * to_cxl_host_bridge(), so no need to check for failure here + */ + pci_root = acpi_pci_find_root(bridge->handle); + rc = devm_cxl_register_pci_bus(host, match, pci_root->bus); + if (rc) + return rc; + port = devm_cxl_add_port(host, match, dport->component_reg_phys, root_port); if (IS_ERR(port)) return PTR_ERR(port); dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev)); - /* - * Note that this lookup already succeeded in - * to_cxl_host_bridge(), so no need to check for failure here - */ - pci_root = acpi_pci_find_root(bridge->handle); - ctx = (struct cxl_walk_context){ - .dev = host, - .root = pci_root->bus, - .port = port, - }; - pci_walk_bus(pci_root->bus, match_add_root_ports, &ctx); - - if (ctx.count == 0) - return -ENODEV; - if (ctx.error) - return ctx.error; - if (ctx.count > 1) - return 0; - - /* TODO: Scan CHBCR for HDM Decoder resources */ - - /* - * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability - * Structure) single ported host-bridges need not publish a decoder - * capability when a passthrough decode can be assumed, i.e. all - * transactions that the uport sees are claimed and passed to the single - * dport. Disable the range until the first CXL region is enumerated / - * activated. - */ - cxld = cxl_decoder_alloc(port, 1); - if (IS_ERR(cxld)) - return PTR_ERR(cxld); - - cxld->interleave_ways = 1; - cxld->interleave_granularity = PAGE_SIZE; - cxld->target_type = CXL_DECODER_EXPANDER; - cxld->range = (struct range) { - .start = 0, - .end = -1, - }; - - device_lock(&port->dev); - dport = list_first_entry(&port->dports, typeof(*dport), list); - device_unlock(&port->dev); - - single_port_map[0] = dport->port_id; - - rc = cxl_decoder_add(cxld, single_port_map); - if (rc) - put_device(&cxld->dev); - else - rc = cxl_decoder_autoremove(host, cxld); - - if (rc == 0) - dev_dbg(host, "add: %s\n", dev_name(&cxld->dev)); - return rc; + return 0; } struct cxl_chbs_context { @@ -309,9 +210,9 @@ static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg, static int add_host_bridge_dport(struct device *match, void *arg) { - int rc; acpi_status status; unsigned long long uid; + struct cxl_dport *dport; struct cxl_chbs_context ctx; struct cxl_port *root_port = arg; struct device *host = root_port->dev.parent; @@ -340,11 +241,11 @@ static int add_host_bridge_dport(struct device *match, void *arg) return 0; } - rc = cxl_add_dport(root_port, match, uid, ctx.chbcr); - if (rc) { + dport = devm_cxl_add_dport(root_port, match, uid, ctx.chbcr); + if (IS_ERR(dport)) { dev_err(host, "failed to add downstream port: %s\n", dev_name(match)); - return rc; + return PTR_ERR(dport); } dev_dbg(host, "add dport%llu: %s\n", uid, dev_name(match)); return 0; @@ -413,7 +314,8 @@ static int cxl_acpi_probe(struct platform_device *pdev) if (rc < 0) return rc; - return 0; + /* In case PCI is scanned before ACPI re-trigger memdev attach */ + return cxl_bus_rescan(); } static const struct acpi_device_id cxl_acpi_ids[] = { diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 40ab50318daf..6d37cd78b151 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -2,8 +2,10 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o ccflags-y += -I$(srctree)/drivers/cxl -cxl_core-y := bus.o +cxl_core-y := port.o cxl_core-y += pmem.o cxl_core-y += regs.o cxl_core-y += memdev.o cxl_core-y += mbox.o +cxl_core-y += pci.o +cxl_core-y += hdm.o diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c deleted file mode 100644 index 3f9b98ecd18b..000000000000 --- a/drivers/cxl/core/bus.c +++ /dev/null @@ -1,675 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "core.h" - -/** - * DOC: cxl core - * - * The CXL core provides a set of interfaces that can be consumed by CXL aware - * drivers. The interfaces allow for creation, modification, and destruction of - * regions, memory devices, ports, and decoders. CXL aware drivers must register - * with the CXL core via these interfaces in order to be able to participate in - * cross-device interleave coordination. The CXL core also establishes and - * maintains the bridge to the nvdimm subsystem. - * - * CXL core introduces sysfs hierarchy to control the devices that are - * instantiated by the core. - */ - -static DEFINE_IDA(cxl_port_ida); - -static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", dev->type->name); -} -static DEVICE_ATTR_RO(devtype); - -static struct attribute *cxl_base_attributes[] = { - &dev_attr_devtype.attr, - NULL, -}; - -struct attribute_group cxl_base_attribute_group = { - .attrs = cxl_base_attributes, -}; - -static ssize_t start_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct cxl_decoder *cxld = to_cxl_decoder(dev); - - return sysfs_emit(buf, "%#llx\n", cxld->range.start); -} -static DEVICE_ATTR_RO(start); - -static ssize_t size_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct cxl_decoder *cxld = to_cxl_decoder(dev); - - return sysfs_emit(buf, "%#llx\n", range_len(&cxld->range)); -} -static DEVICE_ATTR_RO(size); - -#define CXL_DECODER_FLAG_ATTR(name, flag) \ -static ssize_t name##_show(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct cxl_decoder *cxld = to_cxl_decoder(dev); \ - \ - return sysfs_emit(buf, "%s\n", \ - (cxld->flags & (flag)) ? "1" : "0"); \ -} \ -static DEVICE_ATTR_RO(name) - -CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM); -CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM); -CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2); -CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3); -CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK); - -static ssize_t target_type_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxl_decoder *cxld = to_cxl_decoder(dev); - - switch (cxld->target_type) { - case CXL_DECODER_ACCELERATOR: - return sysfs_emit(buf, "accelerator\n"); - case CXL_DECODER_EXPANDER: - return sysfs_emit(buf, "expander\n"); - } - return -ENXIO; -} -static DEVICE_ATTR_RO(target_type); - -static ssize_t target_list_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxl_decoder *cxld = to_cxl_decoder(dev); - ssize_t offset = 0; - int i, rc = 0; - - device_lock(dev); - for (i = 0; i < cxld->interleave_ways; i++) { - struct cxl_dport *dport = cxld->target[i]; - struct cxl_dport *next = NULL; - - if (!dport) - break; - - if (i + 1 < cxld->interleave_ways) - next = cxld->target[i + 1]; - rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id, - next ? "," : ""); - if (rc < 0) - break; - offset += rc; - } - device_unlock(dev); - - if (rc < 0) - return rc; - - rc = sysfs_emit_at(buf, offset, "\n"); - if (rc < 0) - return rc; - - return offset + rc; -} -static DEVICE_ATTR_RO(target_list); - -static struct attribute *cxl_decoder_base_attrs[] = { - &dev_attr_start.attr, - &dev_attr_size.attr, - &dev_attr_locked.attr, - &dev_attr_target_list.attr, - NULL, -}; - -static struct attribute_group cxl_decoder_base_attribute_group = { - .attrs = cxl_decoder_base_attrs, -}; - -static struct attribute *cxl_decoder_root_attrs[] = { - &dev_attr_cap_pmem.attr, - &dev_attr_cap_ram.attr, - &dev_attr_cap_type2.attr, - &dev_attr_cap_type3.attr, - NULL, -}; - -static struct attribute_group cxl_decoder_root_attribute_group = { - .attrs = cxl_decoder_root_attrs, -}; - -static const struct attribute_group *cxl_decoder_root_attribute_groups[] = { - &cxl_decoder_root_attribute_group, - &cxl_decoder_base_attribute_group, - &cxl_base_attribute_group, - NULL, -}; - -static struct attribute *cxl_decoder_switch_attrs[] = { - &dev_attr_target_type.attr, - NULL, -}; - -static struct attribute_group cxl_decoder_switch_attribute_group = { - .attrs = cxl_decoder_switch_attrs, -}; - -static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = { - &cxl_decoder_switch_attribute_group, - &cxl_decoder_base_attribute_group, - &cxl_base_attribute_group, - NULL, -}; - -static void cxl_decoder_release(struct device *dev) -{ - struct cxl_decoder *cxld = to_cxl_decoder(dev); - struct cxl_port *port = to_cxl_port(dev->parent); - - ida_free(&port->decoder_ida, cxld->id); - kfree(cxld); -} - -static const struct device_type cxl_decoder_switch_type = { - .name = "cxl_decoder_switch", - .release = cxl_decoder_release, - .groups = cxl_decoder_switch_attribute_groups, -}; - -static const struct device_type cxl_decoder_root_type = { - .name = "cxl_decoder_root", - .release = cxl_decoder_release, - .groups = cxl_decoder_root_attribute_groups, -}; - -bool is_root_decoder(struct device *dev) -{ - return dev->type == &cxl_decoder_root_type; -} -EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL); - -struct cxl_decoder *to_cxl_decoder(struct device *dev) -{ - if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release, - "not a cxl_decoder device\n")) - return NULL; - return container_of(dev, struct cxl_decoder, dev); -} -EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL); - -static void cxl_dport_release(struct cxl_dport *dport) -{ - list_del(&dport->list); - put_device(dport->dport); - kfree(dport); -} - -static void cxl_port_release(struct device *dev) -{ - struct cxl_port *port = to_cxl_port(dev); - struct cxl_dport *dport, *_d; - - device_lock(dev); - list_for_each_entry_safe(dport, _d, &port->dports, list) - cxl_dport_release(dport); - device_unlock(dev); - ida_free(&cxl_port_ida, port->id); - kfree(port); -} - -static const struct attribute_group *cxl_port_attribute_groups[] = { - &cxl_base_attribute_group, - NULL, -}; - -static const struct device_type cxl_port_type = { - .name = "cxl_port", - .release = cxl_port_release, - .groups = cxl_port_attribute_groups, -}; - -struct cxl_port *to_cxl_port(struct device *dev) -{ - if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type, - "not a cxl_port device\n")) - return NULL; - return container_of(dev, struct cxl_port, dev); -} - -static void unregister_port(void *_port) -{ - struct cxl_port *port = _port; - struct cxl_dport *dport; - - device_lock(&port->dev); - list_for_each_entry(dport, &port->dports, list) { - char link_name[CXL_TARGET_STRLEN]; - - if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", - dport->port_id) >= CXL_TARGET_STRLEN) - continue; - sysfs_remove_link(&port->dev.kobj, link_name); - } - device_unlock(&port->dev); - device_unregister(&port->dev); -} - -static void cxl_unlink_uport(void *_port) -{ - struct cxl_port *port = _port; - - sysfs_remove_link(&port->dev.kobj, "uport"); -} - -static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) -{ - int rc; - - rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport"); - if (rc) - return rc; - return devm_add_action_or_reset(host, cxl_unlink_uport, port); -} - -static struct cxl_port *cxl_port_alloc(struct device *uport, - resource_size_t component_reg_phys, - struct cxl_port *parent_port) -{ - struct cxl_port *port; - struct device *dev; - int rc; - - port = kzalloc(sizeof(*port), GFP_KERNEL); - if (!port) - return ERR_PTR(-ENOMEM); - - rc = ida_alloc(&cxl_port_ida, GFP_KERNEL); - if (rc < 0) - goto err; - port->id = rc; - - /* - * The top-level cxl_port "cxl_root" does not have a cxl_port as - * its parent and it does not have any corresponding component - * registers as its decode is described by a fixed platform - * description. - */ - dev = &port->dev; - if (parent_port) - dev->parent = &parent_port->dev; - else - dev->parent = uport; - - port->uport = uport; - port->component_reg_phys = component_reg_phys; - ida_init(&port->decoder_ida); - INIT_LIST_HEAD(&port->dports); - - device_initialize(dev); - device_set_pm_not_required(dev); - dev->bus = &cxl_bus_type; - dev->type = &cxl_port_type; - - return port; - -err: - kfree(port); - return ERR_PTR(rc); -} - -/** - * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy - * @host: host device for devm operations - * @uport: "physical" device implementing this upstream port - * @component_reg_phys: (optional) for configurable cxl_port instances - * @parent_port: next hop up in the CXL memory decode hierarchy - */ -struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, - resource_size_t component_reg_phys, - struct cxl_port *parent_port) -{ - struct cxl_port *port; - struct device *dev; - int rc; - - port = cxl_port_alloc(uport, component_reg_phys, parent_port); - if (IS_ERR(port)) - return port; - - dev = &port->dev; - if (parent_port) - rc = dev_set_name(dev, "port%d", port->id); - else - rc = dev_set_name(dev, "root%d", port->id); - if (rc) - goto err; - - rc = device_add(dev); - if (rc) - goto err; - - rc = devm_add_action_or_reset(host, unregister_port, port); - if (rc) - return ERR_PTR(rc); - - rc = devm_cxl_link_uport(host, port); - if (rc) - return ERR_PTR(rc); - - return port; - -err: - put_device(dev); - return ERR_PTR(rc); -} -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL); - -static struct cxl_dport *find_dport(struct cxl_port *port, int id) -{ - struct cxl_dport *dport; - - device_lock_assert(&port->dev); - list_for_each_entry (dport, &port->dports, list) - if (dport->port_id == id) - return dport; - return NULL; -} - -static int add_dport(struct cxl_port *port, struct cxl_dport *new) -{ - struct cxl_dport *dup; - - device_lock(&port->dev); - dup = find_dport(port, new->port_id); - if (dup) - dev_err(&port->dev, - "unable to add dport%d-%s non-unique port id (%s)\n", - new->port_id, dev_name(new->dport), - dev_name(dup->dport)); - else - list_add_tail(&new->list, &port->dports); - device_unlock(&port->dev); - - return dup ? -EEXIST : 0; -} - -/** - * cxl_add_dport - append downstream port data to a cxl_port - * @port: the cxl_port that references this dport - * @dport_dev: firmware or PCI device representing the dport - * @port_id: identifier for this dport in a decoder's target list - * @component_reg_phys: optional location of CXL component registers - * - * Note that all allocations and links are undone by cxl_port deletion - * and release. - */ -int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id, - resource_size_t component_reg_phys) -{ - char link_name[CXL_TARGET_STRLEN]; - struct cxl_dport *dport; - int rc; - - if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >= - CXL_TARGET_STRLEN) - return -EINVAL; - - dport = kzalloc(sizeof(*dport), GFP_KERNEL); - if (!dport) - return -ENOMEM; - - INIT_LIST_HEAD(&dport->list); - dport->dport = get_device(dport_dev); - dport->port_id = port_id; - dport->component_reg_phys = component_reg_phys; - dport->port = port; - - rc = add_dport(port, dport); - if (rc) - goto err; - - rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name); - if (rc) - goto err; - - return 0; -err: - cxl_dport_release(dport); - return rc; -} -EXPORT_SYMBOL_NS_GPL(cxl_add_dport, CXL); - -static int decoder_populate_targets(struct cxl_decoder *cxld, - struct cxl_port *port, int *target_map) -{ - int rc = 0, i; - - if (!target_map) - return 0; - - device_lock(&port->dev); - if (list_empty(&port->dports)) { - rc = -EINVAL; - goto out_unlock; - } - - for (i = 0; i < cxld->nr_targets; i++) { - struct cxl_dport *dport = find_dport(port, target_map[i]); - - if (!dport) { - rc = -ENXIO; - goto out_unlock; - } - cxld->target[i] = dport; - } - -out_unlock: - device_unlock(&port->dev); - - return rc; -} - -struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets) -{ - struct cxl_decoder *cxld; - struct device *dev; - int rc = 0; - - if (nr_targets > CXL_DECODER_MAX_INTERLEAVE || nr_targets < 1) - return ERR_PTR(-EINVAL); - - cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL); - if (!cxld) - return ERR_PTR(-ENOMEM); - - rc = ida_alloc(&port->decoder_ida, GFP_KERNEL); - if (rc < 0) - goto err; - - cxld->id = rc; - cxld->nr_targets = nr_targets; - dev = &cxld->dev; - device_initialize(dev); - device_set_pm_not_required(dev); - dev->parent = &port->dev; - dev->bus = &cxl_bus_type; - - /* root ports do not have a cxl_port_type parent */ - if (port->dev.parent->type == &cxl_port_type) - dev->type = &cxl_decoder_switch_type; - else - dev->type = &cxl_decoder_root_type; - - return cxld; -err: - kfree(cxld); - return ERR_PTR(rc); -} -EXPORT_SYMBOL_NS_GPL(cxl_decoder_alloc, CXL); - -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) -{ - struct cxl_port *port; - struct device *dev; - int rc; - - if (WARN_ON_ONCE(!cxld)) - return -EINVAL; - - if (WARN_ON_ONCE(IS_ERR(cxld))) - return PTR_ERR(cxld); - - if (cxld->interleave_ways < 1) - return -EINVAL; - - port = to_cxl_port(cxld->dev.parent); - rc = decoder_populate_targets(cxld, port, target_map); - if (rc) - return rc; - - dev = &cxld->dev; - rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id); - if (rc) - return rc; - - return device_add(dev); -} -EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL); - -static void cxld_unregister(void *dev) -{ - device_unregister(dev); -} - -int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld) -{ - return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev); -} -EXPORT_SYMBOL_NS_GPL(cxl_decoder_autoremove, CXL); - -/** - * __cxl_driver_register - register a driver for the cxl bus - * @cxl_drv: cxl driver structure to attach - * @owner: owning module/driver - * @modname: KBUILD_MODNAME for parent driver - */ -int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner, - const char *modname) -{ - if (!cxl_drv->probe) { - pr_debug("%s ->probe() must be specified\n", modname); - return -EINVAL; - } - - if (!cxl_drv->name) { - pr_debug("%s ->name must be specified\n", modname); - return -EINVAL; - } - - if (!cxl_drv->id) { - pr_debug("%s ->id must be specified\n", modname); - return -EINVAL; - } - - cxl_drv->drv.bus = &cxl_bus_type; - cxl_drv->drv.owner = owner; - cxl_drv->drv.mod_name = modname; - cxl_drv->drv.name = cxl_drv->name; - - return driver_register(&cxl_drv->drv); -} -EXPORT_SYMBOL_NS_GPL(__cxl_driver_register, CXL); - -void cxl_driver_unregister(struct cxl_driver *cxl_drv) -{ - driver_unregister(&cxl_drv->drv); -} -EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL); - -static int cxl_device_id(struct device *dev) -{ - if (dev->type == &cxl_nvdimm_bridge_type) - return CXL_DEVICE_NVDIMM_BRIDGE; - if (dev->type == &cxl_nvdimm_type) - return CXL_DEVICE_NVDIMM; - return 0; -} - -static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT, - cxl_device_id(dev)); -} - -static int cxl_bus_match(struct device *dev, struct device_driver *drv) -{ - return cxl_device_id(dev) == to_cxl_drv(drv)->id; -} - -static int cxl_bus_probe(struct device *dev) -{ - return to_cxl_drv(dev->driver)->probe(dev); -} - -static void cxl_bus_remove(struct device *dev) -{ - struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver); - - if (cxl_drv->remove) - cxl_drv->remove(dev); -} - -struct bus_type cxl_bus_type = { - .name = "cxl", - .uevent = cxl_bus_uevent, - .match = cxl_bus_match, - .probe = cxl_bus_probe, - .remove = cxl_bus_remove, -}; -EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL); - -static __init int cxl_core_init(void) -{ - int rc; - - cxl_mbox_init(); - - rc = cxl_memdev_init(); - if (rc) - return rc; - - rc = bus_register(&cxl_bus_type); - if (rc) - goto err; - return 0; - -err: - cxl_memdev_exit(); - cxl_mbox_exit(); - return rc; -} - -static void cxl_core_exit(void) -{ - bus_unregister(&cxl_bus_type); - cxl_memdev_exit(); - cxl_mbox_exit(); -} - -module_init(cxl_core_init); -module_exit(cxl_core_exit); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index e0c9aacc4e9c..1a50c0fc399c 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -14,6 +14,8 @@ struct cxl_mem_query_commands; int cxl_query_cmd(struct cxl_memdev *cxlmd, struct cxl_mem_query_commands __user *q); int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s); +void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, + resource_size_t length); int cxl_memdev_init(void); void cxl_memdev_exit(void); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c new file mode 100644 index 000000000000..0e89a7a932d4 --- /dev/null +++ b/drivers/cxl/core/hdm.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include + +#include "cxlmem.h" +#include "core.h" + +/** + * DOC: cxl core hdm + * + * Compute Express Link Host Managed Device Memory, starting with the + * CXL 2.0 specification, is managed by an array of HDM Decoder register + * instances per CXL port and per CXL endpoint. Define common helpers + * for enumerating these registers and capabilities. + */ + +static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, + int *target_map) +{ + int rc; + + rc = cxl_decoder_add_locked(cxld, target_map); + if (rc) { + put_device(&cxld->dev); + dev_err(&port->dev, "Failed to add decoder\n"); + return rc; + } + + rc = cxl_decoder_autoremove(&port->dev, cxld); + if (rc) + return rc; + + dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev)); + + return 0; +} + +/* + * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability Structure) + * single ported host-bridges need not publish a decoder capability when a + * passthrough decode can be assumed, i.e. all transactions that the uport sees + * are claimed and passed to the single dport. Disable the range until the first + * CXL region is enumerated / activated. + */ +int devm_cxl_add_passthrough_decoder(struct cxl_port *port) +{ + struct cxl_decoder *cxld; + struct cxl_dport *dport; + int single_port_map[1]; + + cxld = cxl_switch_decoder_alloc(port, 1); + if (IS_ERR(cxld)) + return PTR_ERR(cxld); + + device_lock_assert(&port->dev); + + dport = list_first_entry(&port->dports, typeof(*dport), list); + single_port_map[0] = dport->port_id; + + return add_hdm_decoder(port, cxld, single_port_map); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, CXL); + +static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) +{ + u32 hdm_cap; + + hdm_cap = readl(cxlhdm->regs.hdm_decoder + CXL_HDM_DECODER_CAP_OFFSET); + cxlhdm->decoder_count = cxl_hdm_decoder_count(hdm_cap); + cxlhdm->target_count = + FIELD_GET(CXL_HDM_DECODER_TARGET_COUNT_MASK, hdm_cap); + if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_11_8, hdm_cap)) + cxlhdm->interleave_mask |= GENMASK(11, 8); + if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap)) + cxlhdm->interleave_mask |= GENMASK(14, 12); +} + +static void __iomem *map_hdm_decoder_regs(struct cxl_port *port, + void __iomem *crb) +{ + struct cxl_component_reg_map map; + + cxl_probe_component_regs(&port->dev, crb, &map); + if (!map.hdm_decoder.valid) { + dev_err(&port->dev, "HDM decoder registers invalid\n"); + return IOMEM_ERR_PTR(-ENXIO); + } + + return crb + map.hdm_decoder.offset; +} + +/** + * devm_cxl_setup_hdm - map HDM decoder component registers + * @port: cxl_port to map + */ +struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port) +{ + struct device *dev = &port->dev; + void __iomem *crb, *hdm; + struct cxl_hdm *cxlhdm; + + cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); + if (!cxlhdm) + return ERR_PTR(-ENOMEM); + + cxlhdm->port = port; + crb = devm_cxl_iomap_block(dev, port->component_reg_phys, + CXL_COMPONENT_REG_BLOCK_SIZE); + if (!crb) { + dev_err(dev, "No component registers mapped\n"); + return ERR_PTR(-ENXIO); + } + + hdm = map_hdm_decoder_regs(port, crb); + if (IS_ERR(hdm)) + return ERR_CAST(hdm); + cxlhdm->regs.hdm_decoder = hdm; + + parse_hdm_decoder_caps(cxlhdm); + if (cxlhdm->decoder_count == 0) { + dev_err(dev, "Spec violation. Caps invalid\n"); + return ERR_PTR(-ENXIO); + } + + return cxlhdm; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL); + +static int to_interleave_granularity(u32 ctrl) +{ + int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl); + + return 256 << val; +} + +static int to_interleave_ways(u32 ctrl) +{ + int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl); + + switch (val) { + case 0 ... 4: + return 1 << val; + case 8 ... 10: + return 3 << (val - 8); + default: + return 0; + } +} + +static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, + int *target_map, void __iomem *hdm, int which) +{ + u64 size, base; + u32 ctrl; + int i; + union { + u64 value; + unsigned char target_id[8]; + } target_list; + + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which)); + base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); + size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which)); + + if (!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)) + size = 0; + if (base == U64_MAX || size == U64_MAX) { + dev_warn(&port->dev, "decoder%d.%d: Invalid resource range\n", + port->id, cxld->id); + return -ENXIO; + } + + cxld->decoder_range = (struct range) { + .start = base, + .end = base + size - 1, + }; + + /* switch decoders are always enabled if committed */ + if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) { + cxld->flags |= CXL_DECODER_F_ENABLE; + if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK) + cxld->flags |= CXL_DECODER_F_LOCK; + } + cxld->interleave_ways = to_interleave_ways(ctrl); + if (!cxld->interleave_ways) { + dev_warn(&port->dev, + "decoder%d.%d: Invalid interleave ways (ctrl: %#x)\n", + port->id, cxld->id, ctrl); + return -ENXIO; + } + cxld->interleave_granularity = to_interleave_granularity(ctrl); + + if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl)) + cxld->target_type = CXL_DECODER_EXPANDER; + else + cxld->target_type = CXL_DECODER_ACCELERATOR; + + if (is_cxl_endpoint(to_cxl_port(cxld->dev.parent))) + return 0; + + target_list.value = + ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which)); + for (i = 0; i < cxld->interleave_ways; i++) + target_map[i] = target_list.target_id[i]; + + return 0; +} + +/** + * devm_cxl_enumerate_decoders - add decoder objects per HDM register set + * @cxlhdm: Structure to populate with HDM capabilities + */ +int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) +{ + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + struct cxl_port *port = cxlhdm->port; + int i, committed, failed; + u32 ctrl; + + /* + * Since the register resource was recently claimed via request_region() + * be careful about trusting the "not-committed" status until the commit + * timeout has elapsed. The commit timeout is 10ms (CXL 2.0 + * 8.2.5.12.20), but double it to be tolerant of any clock skew between + * host and target. + */ + for (i = 0, committed = 0; i < cxlhdm->decoder_count; i++) { + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) + committed++; + } + + /* ensure that future checks of committed can be trusted */ + if (committed != cxlhdm->decoder_count) + msleep(20); + + for (i = 0, failed = 0; i < cxlhdm->decoder_count; i++) { + int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; + int rc, target_count = cxlhdm->target_count; + struct cxl_decoder *cxld; + + if (is_cxl_endpoint(port)) + cxld = cxl_endpoint_decoder_alloc(port); + else + cxld = cxl_switch_decoder_alloc(port, target_count); + if (IS_ERR(cxld)) { + dev_warn(&port->dev, + "Failed to allocate the decoder\n"); + return PTR_ERR(cxld); + } + + rc = init_hdm_decoder(port, cxld, target_map, + cxlhdm->regs.hdm_decoder, i); + if (rc) { + put_device(&cxld->dev); + failed++; + continue; + } + rc = add_hdm_decoder(port, cxld, target_map); + if (rc) { + dev_warn(&port->dev, + "Failed to add decoder to port\n"); + return rc; + } + } + + if (failed == cxlhdm->decoder_count) { + dev_err(&port->dev, "No valid decoders found\n"); + return -ENXIO; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, CXL); diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 61029cb7ac62..1f76b28f9826 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -89,10 +89,29 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, static struct device_attribute dev_attr_pmem_size = __ATTR(size, 0444, pmem_size_show, NULL); +static ssize_t serial_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + + return sysfs_emit(buf, "%#llx\n", cxlds->serial); +} +static DEVICE_ATTR_RO(serial); + +static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", dev_to_node(dev)); +} +static DEVICE_ATTR_RO(numa_node); + static struct attribute *cxl_memdev_attributes[] = { + &dev_attr_serial.attr, &dev_attr_firmware_version.attr, &dev_attr_payload_max.attr, &dev_attr_label_storage_size.attr, + &dev_attr_numa_node.attr, NULL, }; @@ -106,8 +125,17 @@ static struct attribute *cxl_memdev_ram_attributes[] = { NULL, }; +static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a, + int n) +{ + if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr) + return 0; + return a->mode; +} + static struct attribute_group cxl_memdev_attribute_group = { .attrs = cxl_memdev_attributes, + .is_visible = cxl_memdev_visible, }; static struct attribute_group cxl_memdev_ram_attribute_group = { @@ -134,6 +162,12 @@ static const struct device_type cxl_memdev_type = { .groups = cxl_memdev_attribute_groups, }; +bool is_cxl_memdev(struct device *dev) +{ + return dev->type == &cxl_memdev_type; +} +EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL); + /** * set_exclusive_cxl_commands() - atomically disable user cxl commands * @cxlds: The device state to operate on @@ -185,6 +219,15 @@ static void cxl_memdev_unregister(void *_cxlmd) put_device(dev); } +static void detach_memdev(struct work_struct *work) +{ + struct cxl_memdev *cxlmd; + + cxlmd = container_of(work, typeof(*cxlmd), detach_work); + device_release_driver(&cxlmd->dev); + put_device(&cxlmd->dev); +} + static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, const struct file_operations *fops) { @@ -209,6 +252,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev->devt = MKDEV(cxl_mem_major, cxlmd->id); dev->type = &cxl_memdev_type; device_set_pm_not_required(dev); + INIT_WORK(&cxlmd->detach_work, detach_memdev); cdev = &cxlmd->cdev; cdev_init(cdev, fops); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c new file mode 100644 index 000000000000..c9a494d6976a --- /dev/null +++ b/drivers/cxl/core/pci.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include +#include "core.h" + +/** + * DOC: cxl core pci + * + * Compute Express Link protocols are layered on top of PCIe. CXL core provides + * a set of helpers for CXL interactions which occur via PCIe. + */ + +struct cxl_walk_context { + struct pci_bus *bus; + struct cxl_port *port; + int type; + int error; + int count; +}; + +static int match_add_dports(struct pci_dev *pdev, void *data) +{ + struct cxl_walk_context *ctx = data; + struct cxl_port *port = ctx->port; + int type = pci_pcie_type(pdev); + struct cxl_register_map map; + struct cxl_dport *dport; + u32 lnkcap, port_num; + int rc; + + if (pdev->bus != ctx->bus) + return 0; + if (!pci_is_pcie(pdev)) + return 0; + if (type != ctx->type) + return 0; + if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, + &lnkcap)) + return 0; + + rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + dev_dbg(&port->dev, "failed to find component registers\n"); + + port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); + dport = devm_cxl_add_dport(port, &pdev->dev, port_num, + cxl_regmap_to_base(pdev, &map)); + if (IS_ERR(dport)) { + ctx->error = PTR_ERR(dport); + return PTR_ERR(dport); + } + ctx->count++; + + dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); + + return 0; +} + +/** + * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port + * @port: cxl_port whose ->uport is the upstream of dports to be enumerated + * + * Returns a positive number of dports enumerated or a negative error + * code. + */ +int devm_cxl_port_enumerate_dports(struct cxl_port *port) +{ + struct pci_bus *bus = cxl_port_to_pci_bus(port); + struct cxl_walk_context ctx; + int type; + + if (!bus) + return -ENXIO; + + if (pci_is_root_bus(bus)) + type = PCI_EXP_TYPE_ROOT_PORT; + else + type = PCI_EXP_TYPE_DOWNSTREAM; + + ctx = (struct cxl_walk_context) { + .port = port, + .bus = bus, + .type = type, + }; + pci_walk_bus(bus, match_add_dports, &ctx); + + if (ctx.count == 0) + return -ENODEV; + if (ctx.error) + return ctx.error; + return ctx.count; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL); diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index b5fca97b0a07..8de240c4d96b 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -57,24 +57,30 @@ bool is_cxl_nvdimm_bridge(struct device *dev) } EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm_bridge, CXL); -__mock int match_nvdimm_bridge(struct device *dev, const void *data) +static int match_nvdimm_bridge(struct device *dev, void *data) { return is_cxl_nvdimm_bridge(dev); } struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd) { + struct cxl_port *port = find_cxl_root(&cxl_nvd->dev); struct device *dev; - dev = bus_find_device(&cxl_bus_type, NULL, cxl_nvd, match_nvdimm_bridge); + if (!port) + return NULL; + + dev = device_find_child(&port->dev, NULL, match_nvdimm_bridge); + put_device(&port->dev); + if (!dev) return NULL; + return to_cxl_nvdimm_bridge(dev); } EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL); -static struct cxl_nvdimm_bridge * -cxl_nvdimm_bridge_alloc(struct cxl_port *port) +static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) { struct cxl_nvdimm_bridge *cxl_nvb; struct device *dev; @@ -115,10 +121,10 @@ static void unregister_nvb(void *_cxl_nvb) * work to flush. Once the state has been changed to 'dead' then no new * work can be queued by user-triggered bind. */ - device_lock(&cxl_nvb->dev); + cxl_device_lock(&cxl_nvb->dev); flush = cxl_nvb->state != CXL_NVB_NEW; cxl_nvb->state = CXL_NVB_DEAD; - device_unlock(&cxl_nvb->dev); + cxl_device_unlock(&cxl_nvb->dev); /* * Even though the device core will trigger device_release_driver() diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c new file mode 100644 index 000000000000..2ab1ba4499b3 --- /dev/null +++ b/drivers/cxl/core/port.c @@ -0,0 +1,1568 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "core.h" + +/** + * DOC: cxl core + * + * The CXL core provides a set of interfaces that can be consumed by CXL aware + * drivers. The interfaces allow for creation, modification, and destruction of + * regions, memory devices, ports, and decoders. CXL aware drivers must register + * with the CXL core via these interfaces in order to be able to participate in + * cross-device interleave coordination. The CXL core also establishes and + * maintains the bridge to the nvdimm subsystem. + * + * CXL core introduces sysfs hierarchy to control the devices that are + * instantiated by the core. + */ + +static DEFINE_IDA(cxl_port_ida); +static DEFINE_XARRAY(cxl_root_buses); + +static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%s\n", dev->type->name); +} +static DEVICE_ATTR_RO(devtype); + +static int cxl_device_id(struct device *dev) +{ + if (dev->type == &cxl_nvdimm_bridge_type) + return CXL_DEVICE_NVDIMM_BRIDGE; + if (dev->type == &cxl_nvdimm_type) + return CXL_DEVICE_NVDIMM; + if (is_cxl_port(dev)) { + if (is_cxl_root(to_cxl_port(dev))) + return CXL_DEVICE_ROOT; + return CXL_DEVICE_PORT; + } + if (is_cxl_memdev(dev)) + return CXL_DEVICE_MEMORY_EXPANDER; + return 0; +} + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, CXL_MODALIAS_FMT "\n", cxl_device_id(dev)); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *cxl_base_attributes[] = { + &dev_attr_devtype.attr, + &dev_attr_modalias.attr, + NULL, +}; + +struct attribute_group cxl_base_attribute_group = { + .attrs = cxl_base_attributes, +}; + +static ssize_t start_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + u64 start; + + if (is_root_decoder(dev)) + start = cxld->platform_res.start; + else + start = cxld->decoder_range.start; + + return sysfs_emit(buf, "%#llx\n", start); +} +static DEVICE_ATTR_ADMIN_RO(start); + +static ssize_t size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + u64 size; + + if (is_root_decoder(dev)) + size = resource_size(&cxld->platform_res); + else + size = range_len(&cxld->decoder_range); + + return sysfs_emit(buf, "%#llx\n", size); +} +static DEVICE_ATTR_RO(size); + +#define CXL_DECODER_FLAG_ATTR(name, flag) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct cxl_decoder *cxld = to_cxl_decoder(dev); \ + \ + return sysfs_emit(buf, "%s\n", \ + (cxld->flags & (flag)) ? "1" : "0"); \ +} \ +static DEVICE_ATTR_RO(name) + +CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM); +CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM); +CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2); +CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3); +CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK); + +static ssize_t target_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + + switch (cxld->target_type) { + case CXL_DECODER_ACCELERATOR: + return sysfs_emit(buf, "accelerator\n"); + case CXL_DECODER_EXPANDER: + return sysfs_emit(buf, "expander\n"); + } + return -ENXIO; +} +static DEVICE_ATTR_RO(target_type); + +static ssize_t emit_target_list(struct cxl_decoder *cxld, char *buf) +{ + ssize_t offset = 0; + int i, rc = 0; + + for (i = 0; i < cxld->interleave_ways; i++) { + struct cxl_dport *dport = cxld->target[i]; + struct cxl_dport *next = NULL; + + if (!dport) + break; + + if (i + 1 < cxld->interleave_ways) + next = cxld->target[i + 1]; + rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id, + next ? "," : ""); + if (rc < 0) + return rc; + offset += rc; + } + + return offset; +} + +static ssize_t target_list_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + ssize_t offset; + unsigned int seq; + int rc; + + do { + seq = read_seqbegin(&cxld->target_lock); + rc = emit_target_list(cxld, buf); + } while (read_seqretry(&cxld->target_lock, seq)); + + if (rc < 0) + return rc; + offset = rc; + + rc = sysfs_emit_at(buf, offset, "\n"); + if (rc < 0) + return rc; + + return offset + rc; +} +static DEVICE_ATTR_RO(target_list); + +static struct attribute *cxl_decoder_base_attrs[] = { + &dev_attr_start.attr, + &dev_attr_size.attr, + &dev_attr_locked.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_base_attribute_group = { + .attrs = cxl_decoder_base_attrs, +}; + +static struct attribute *cxl_decoder_root_attrs[] = { + &dev_attr_cap_pmem.attr, + &dev_attr_cap_ram.attr, + &dev_attr_cap_type2.attr, + &dev_attr_cap_type3.attr, + &dev_attr_target_list.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_root_attribute_group = { + .attrs = cxl_decoder_root_attrs, +}; + +static const struct attribute_group *cxl_decoder_root_attribute_groups[] = { + &cxl_decoder_root_attribute_group, + &cxl_decoder_base_attribute_group, + &cxl_base_attribute_group, + NULL, +}; + +static struct attribute *cxl_decoder_switch_attrs[] = { + &dev_attr_target_type.attr, + &dev_attr_target_list.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_switch_attribute_group = { + .attrs = cxl_decoder_switch_attrs, +}; + +static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = { + &cxl_decoder_switch_attribute_group, + &cxl_decoder_base_attribute_group, + &cxl_base_attribute_group, + NULL, +}; + +static struct attribute *cxl_decoder_endpoint_attrs[] = { + &dev_attr_target_type.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_endpoint_attribute_group = { + .attrs = cxl_decoder_endpoint_attrs, +}; + +static const struct attribute_group *cxl_decoder_endpoint_attribute_groups[] = { + &cxl_decoder_base_attribute_group, + &cxl_decoder_endpoint_attribute_group, + &cxl_base_attribute_group, + NULL, +}; + +static void cxl_decoder_release(struct device *dev) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + struct cxl_port *port = to_cxl_port(dev->parent); + + ida_free(&port->decoder_ida, cxld->id); + kfree(cxld); + put_device(&port->dev); +} + +static const struct device_type cxl_decoder_endpoint_type = { + .name = "cxl_decoder_endpoint", + .release = cxl_decoder_release, + .groups = cxl_decoder_endpoint_attribute_groups, +}; + +static const struct device_type cxl_decoder_switch_type = { + .name = "cxl_decoder_switch", + .release = cxl_decoder_release, + .groups = cxl_decoder_switch_attribute_groups, +}; + +static const struct device_type cxl_decoder_root_type = { + .name = "cxl_decoder_root", + .release = cxl_decoder_release, + .groups = cxl_decoder_root_attribute_groups, +}; + +static bool is_endpoint_decoder(struct device *dev) +{ + return dev->type == &cxl_decoder_endpoint_type; +} + +bool is_root_decoder(struct device *dev) +{ + return dev->type == &cxl_decoder_root_type; +} +EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL); + +bool is_cxl_decoder(struct device *dev) +{ + return dev->type && dev->type->release == cxl_decoder_release; +} +EXPORT_SYMBOL_NS_GPL(is_cxl_decoder, CXL); + +struct cxl_decoder *to_cxl_decoder(struct device *dev) +{ + if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release, + "not a cxl_decoder device\n")) + return NULL; + return container_of(dev, struct cxl_decoder, dev); +} +EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL); + +static void cxl_ep_release(struct cxl_ep *ep) +{ + if (!ep) + return; + list_del(&ep->list); + put_device(ep->ep); + kfree(ep); +} + +static void cxl_port_release(struct device *dev) +{ + struct cxl_port *port = to_cxl_port(dev); + struct cxl_ep *ep, *_e; + + cxl_device_lock(dev); + list_for_each_entry_safe(ep, _e, &port->endpoints, list) + cxl_ep_release(ep); + cxl_device_unlock(dev); + ida_free(&cxl_port_ida, port->id); + kfree(port); +} + +static const struct attribute_group *cxl_port_attribute_groups[] = { + &cxl_base_attribute_group, + NULL, +}; + +static const struct device_type cxl_port_type = { + .name = "cxl_port", + .release = cxl_port_release, + .groups = cxl_port_attribute_groups, +}; + +bool is_cxl_port(struct device *dev) +{ + return dev->type == &cxl_port_type; +} +EXPORT_SYMBOL_NS_GPL(is_cxl_port, CXL); + +struct cxl_port *to_cxl_port(struct device *dev) +{ + if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type, + "not a cxl_port device\n")) + return NULL; + return container_of(dev, struct cxl_port, dev); +} +EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL); + +static void unregister_port(void *_port) +{ + struct cxl_port *port = _port; + struct cxl_port *parent; + struct device *lock_dev; + + if (is_cxl_root(port)) + parent = NULL; + else + parent = to_cxl_port(port->dev.parent); + + /* + * CXL root port's and the first level of ports are unregistered + * under the platform firmware device lock, all other ports are + * unregistered while holding their parent port lock. + */ + if (!parent) + lock_dev = port->uport; + else if (is_cxl_root(parent)) + lock_dev = parent->uport; + else + lock_dev = &parent->dev; + + device_lock_assert(lock_dev); + port->uport = NULL; + device_unregister(&port->dev); +} + +static void cxl_unlink_uport(void *_port) +{ + struct cxl_port *port = _port; + + sysfs_remove_link(&port->dev.kobj, "uport"); +} + +static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) +{ + int rc; + + rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport"); + if (rc) + return rc; + return devm_add_action_or_reset(host, cxl_unlink_uport, port); +} + +static struct cxl_port *cxl_port_alloc(struct device *uport, + resource_size_t component_reg_phys, + struct cxl_port *parent_port) +{ + struct cxl_port *port; + struct device *dev; + int rc; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return ERR_PTR(-ENOMEM); + + rc = ida_alloc(&cxl_port_ida, GFP_KERNEL); + if (rc < 0) + goto err; + port->id = rc; + + /* + * The top-level cxl_port "cxl_root" does not have a cxl_port as + * its parent and it does not have any corresponding component + * registers as its decode is described by a fixed platform + * description. + */ + dev = &port->dev; + if (parent_port) + dev->parent = &parent_port->dev; + else + dev->parent = uport; + + port->uport = uport; + port->component_reg_phys = component_reg_phys; + ida_init(&port->decoder_ida); + INIT_LIST_HEAD(&port->dports); + INIT_LIST_HEAD(&port->endpoints); + + device_initialize(dev); + device_set_pm_not_required(dev); + dev->bus = &cxl_bus_type; + dev->type = &cxl_port_type; + + return port; + +err: + kfree(port); + return ERR_PTR(rc); +} + +/** + * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy + * @host: host device for devm operations + * @uport: "physical" device implementing this upstream port + * @component_reg_phys: (optional) for configurable cxl_port instances + * @parent_port: next hop up in the CXL memory decode hierarchy + */ +struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, + resource_size_t component_reg_phys, + struct cxl_port *parent_port) +{ + struct cxl_port *port; + struct device *dev; + int rc; + + port = cxl_port_alloc(uport, component_reg_phys, parent_port); + if (IS_ERR(port)) + return port; + + if (parent_port) + port->depth = parent_port->depth + 1; + dev = &port->dev; + if (is_cxl_memdev(uport)) + rc = dev_set_name(dev, "endpoint%d", port->id); + else if (parent_port) + rc = dev_set_name(dev, "port%d", port->id); + else + rc = dev_set_name(dev, "root%d", port->id); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + rc = devm_add_action_or_reset(host, unregister_port, port); + if (rc) + return ERR_PTR(rc); + + rc = devm_cxl_link_uport(host, port); + if (rc) + return ERR_PTR(rc); + + return port; + +err: + put_device(dev); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL); + +struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port) +{ + /* There is no pci_bus associated with a CXL platform-root port */ + if (is_cxl_root(port)) + return NULL; + + if (dev_is_pci(port->uport)) { + struct pci_dev *pdev = to_pci_dev(port->uport); + + return pdev->subordinate; + } + + return xa_load(&cxl_root_buses, (unsigned long)port->uport); +} +EXPORT_SYMBOL_NS_GPL(cxl_port_to_pci_bus, CXL); + +static void unregister_pci_bus(void *uport) +{ + xa_erase(&cxl_root_buses, (unsigned long)uport); +} + +int devm_cxl_register_pci_bus(struct device *host, struct device *uport, + struct pci_bus *bus) +{ + int rc; + + if (dev_is_pci(uport)) + return -EINVAL; + + rc = xa_insert(&cxl_root_buses, (unsigned long)uport, bus, GFP_KERNEL); + if (rc) + return rc; + return devm_add_action_or_reset(host, unregister_pci_bus, uport); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_register_pci_bus, CXL); + +static bool dev_is_cxl_root_child(struct device *dev) +{ + struct cxl_port *port, *parent; + + if (!is_cxl_port(dev)) + return false; + + port = to_cxl_port(dev); + if (is_cxl_root(port)) + return false; + + parent = to_cxl_port(port->dev.parent); + if (is_cxl_root(parent)) + return true; + + return false; +} + +/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */ +static int match_root_child(struct device *dev, const void *match) +{ + const struct device *iter = NULL; + struct cxl_dport *dport; + struct cxl_port *port; + + if (!dev_is_cxl_root_child(dev)) + return 0; + + port = to_cxl_port(dev); + cxl_device_lock(dev); + list_for_each_entry(dport, &port->dports, list) { + iter = match; + while (iter) { + if (iter == dport->dport) + goto out; + iter = iter->parent; + } + } +out: + cxl_device_unlock(dev); + + return !!iter; +} + +struct cxl_port *find_cxl_root(struct device *dev) +{ + struct device *port_dev; + struct cxl_port *root; + + port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child); + if (!port_dev) + return NULL; + + root = to_cxl_port(port_dev->parent); + get_device(&root->dev); + put_device(port_dev); + return root; +} +EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL); + +static struct cxl_dport *find_dport(struct cxl_port *port, int id) +{ + struct cxl_dport *dport; + + device_lock_assert(&port->dev); + list_for_each_entry (dport, &port->dports, list) + if (dport->port_id == id) + return dport; + return NULL; +} + +static int add_dport(struct cxl_port *port, struct cxl_dport *new) +{ + struct cxl_dport *dup; + + device_lock_assert(&port->dev); + dup = find_dport(port, new->port_id); + if (dup) + dev_err(&port->dev, + "unable to add dport%d-%s non-unique port id (%s)\n", + new->port_id, dev_name(new->dport), + dev_name(dup->dport)); + else + list_add_tail(&new->list, &port->dports); + + return dup ? -EEXIST : 0; +} + +/* + * Since root-level CXL dports cannot be enumerated by PCI they are not + * enumerated by the common port driver that acquires the port lock over + * dport add/remove. Instead, root dports are manually added by a + * platform driver and cond_cxl_root_lock() is used to take the missing + * port lock in that case. + */ +static void cond_cxl_root_lock(struct cxl_port *port) +{ + if (is_cxl_root(port)) + cxl_device_lock(&port->dev); +} + +static void cond_cxl_root_unlock(struct cxl_port *port) +{ + if (is_cxl_root(port)) + cxl_device_unlock(&port->dev); +} + +static void cxl_dport_remove(void *data) +{ + struct cxl_dport *dport = data; + struct cxl_port *port = dport->port; + + put_device(dport->dport); + cond_cxl_root_lock(port); + list_del(&dport->list); + cond_cxl_root_unlock(port); +} + +static void cxl_dport_unlink(void *data) +{ + struct cxl_dport *dport = data; + struct cxl_port *port = dport->port; + char link_name[CXL_TARGET_STRLEN]; + + sprintf(link_name, "dport%d", dport->port_id); + sysfs_remove_link(&port->dev.kobj, link_name); +} + +/** + * devm_cxl_add_dport - append downstream port data to a cxl_port + * @port: the cxl_port that references this dport + * @dport_dev: firmware or PCI device representing the dport + * @port_id: identifier for this dport in a decoder's target list + * @component_reg_phys: optional location of CXL component registers + * + * Note that dports are appended to the devm release action's of the + * either the port's host (for root ports), or the port itself (for + * switch ports) + */ +struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, + struct device *dport_dev, int port_id, + resource_size_t component_reg_phys) +{ + char link_name[CXL_TARGET_STRLEN]; + struct cxl_dport *dport; + struct device *host; + int rc; + + if (is_cxl_root(port)) + host = port->uport; + else + host = &port->dev; + + if (!host->driver) { + dev_WARN_ONCE(&port->dev, 1, "dport:%s bad devm context\n", + dev_name(dport_dev)); + return ERR_PTR(-ENXIO); + } + + if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >= + CXL_TARGET_STRLEN) + return ERR_PTR(-EINVAL); + + dport = devm_kzalloc(host, sizeof(*dport), GFP_KERNEL); + if (!dport) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&dport->list); + dport->dport = dport_dev; + dport->port_id = port_id; + dport->component_reg_phys = component_reg_phys; + dport->port = port; + + cond_cxl_root_lock(port); + rc = add_dport(port, dport); + cond_cxl_root_unlock(port); + if (rc) + return ERR_PTR(rc); + + get_device(dport_dev); + rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); + if (rc) + return ERR_PTR(rc); + + rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name); + if (rc) + return ERR_PTR(rc); + + rc = devm_add_action_or_reset(host, cxl_dport_unlink, dport); + if (rc) + return ERR_PTR(rc); + + return dport; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL); + +static struct cxl_ep *find_ep(struct cxl_port *port, struct device *ep_dev) +{ + struct cxl_ep *ep; + + device_lock_assert(&port->dev); + list_for_each_entry(ep, &port->endpoints, list) + if (ep->ep == ep_dev) + return ep; + return NULL; +} + +static int add_ep(struct cxl_port *port, struct cxl_ep *new) +{ + struct cxl_ep *dup; + + cxl_device_lock(&port->dev); + if (port->dead) { + cxl_device_unlock(&port->dev); + return -ENXIO; + } + dup = find_ep(port, new->ep); + if (!dup) + list_add_tail(&new->list, &port->endpoints); + cxl_device_unlock(&port->dev); + + return dup ? -EEXIST : 0; +} + +/** + * cxl_add_ep - register an endpoint's interest in a port + * @port: a port in the endpoint's topology ancestry + * @ep_dev: device representing the endpoint + * + * Intermediate CXL ports are scanned based on the arrival of endpoints. + * When those endpoints depart the port can be destroyed once all + * endpoints that care about that port have been removed. + */ +static int cxl_add_ep(struct cxl_port *port, struct device *ep_dev) +{ + struct cxl_ep *ep; + int rc; + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + return -ENOMEM; + + INIT_LIST_HEAD(&ep->list); + ep->ep = get_device(ep_dev); + + rc = add_ep(port, ep); + if (rc) + cxl_ep_release(ep); + return rc; +} + +struct cxl_find_port_ctx { + const struct device *dport_dev; + const struct cxl_port *parent_port; +}; + +static int match_port_by_dport(struct device *dev, const void *data) +{ + const struct cxl_find_port_ctx *ctx = data; + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + if (ctx->parent_port && dev->parent != &ctx->parent_port->dev) + return 0; + + port = to_cxl_port(dev); + return cxl_find_dport_by_dev(port, ctx->dport_dev) != NULL; +} + +static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx) +{ + struct device *dev; + + if (!ctx->dport_dev) + return NULL; + + dev = bus_find_device(&cxl_bus_type, NULL, ctx, match_port_by_dport); + if (dev) + return to_cxl_port(dev); + return NULL; +} + +static struct cxl_port *find_cxl_port(struct device *dport_dev) +{ + struct cxl_find_port_ctx ctx = { + .dport_dev = dport_dev, + }; + + return __find_cxl_port(&ctx); +} + +static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port, + struct device *dport_dev) +{ + struct cxl_find_port_ctx ctx = { + .dport_dev = dport_dev, + .parent_port = parent_port, + }; + + return __find_cxl_port(&ctx); +} + +/* + * All users of grandparent() are using it to walk PCIe-like swich port + * hierarchy. A PCIe switch is comprised of a bridge device representing the + * upstream switch port and N bridges representing downstream switch ports. When + * bridges stack the grand-parent of a downstream switch port is another + * downstream switch port in the immediate ancestor switch. + */ +static struct device *grandparent(struct device *dev) +{ + if (dev && dev->parent) + return dev->parent->parent; + return NULL; +} + +static void delete_endpoint(void *data) +{ + struct cxl_memdev *cxlmd = data; + struct cxl_port *endpoint = dev_get_drvdata(&cxlmd->dev); + struct cxl_port *parent_port; + struct device *parent; + + parent_port = cxl_mem_find_port(cxlmd); + if (!parent_port) + goto out; + parent = &parent_port->dev; + + cxl_device_lock(parent); + if (parent->driver && endpoint->uport) { + devm_release_action(parent, cxl_unlink_uport, endpoint); + devm_release_action(parent, unregister_port, endpoint); + } + cxl_device_unlock(parent); + put_device(parent); +out: + put_device(&endpoint->dev); +} + +int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) +{ + struct device *dev = &cxlmd->dev; + + get_device(&endpoint->dev); + dev_set_drvdata(dev, endpoint); + return devm_add_action_or_reset(dev, delete_endpoint, cxlmd); +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL); + +/* + * The natural end of life of a non-root 'cxl_port' is when its parent port goes + * through a ->remove() event ("top-down" unregistration). The unnatural trigger + * for a port to be unregistered is when all memdevs beneath that port have gone + * through ->remove(). This "bottom-up" removal selectively removes individual + * child ports manually. This depends on devm_cxl_add_port() to not change is + * devm action registration order. + */ +static void delete_switch_port(struct cxl_port *port, struct list_head *dports) +{ + struct cxl_dport *dport, *_d; + + list_for_each_entry_safe(dport, _d, dports, list) { + devm_release_action(&port->dev, cxl_dport_unlink, dport); + devm_release_action(&port->dev, cxl_dport_remove, dport); + devm_kfree(&port->dev, dport); + } + devm_release_action(port->dev.parent, cxl_unlink_uport, port); + devm_release_action(port->dev.parent, unregister_port, port); +} + +static void cxl_detach_ep(void *data) +{ + struct cxl_memdev *cxlmd = data; + struct device *iter; + + for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) { + struct device *dport_dev = grandparent(iter); + struct cxl_port *port, *parent_port; + LIST_HEAD(reap_dports); + struct cxl_ep *ep; + + if (!dport_dev) + break; + + port = find_cxl_port(dport_dev); + if (!port) + continue; + + if (is_cxl_root(port)) { + put_device(&port->dev); + continue; + } + + parent_port = to_cxl_port(port->dev.parent); + cxl_device_lock(&parent_port->dev); + if (!parent_port->dev.driver) { + /* + * The bottom-up race to delete the port lost to a + * top-down port disable, give up here, because the + * parent_port ->remove() will have cleaned up all + * descendants. + */ + cxl_device_unlock(&parent_port->dev); + put_device(&port->dev); + continue; + } + + cxl_device_lock(&port->dev); + ep = find_ep(port, &cxlmd->dev); + dev_dbg(&cxlmd->dev, "disconnect %s from %s\n", + ep ? dev_name(ep->ep) : "", dev_name(&port->dev)); + cxl_ep_release(ep); + if (ep && !port->dead && list_empty(&port->endpoints) && + !is_cxl_root(parent_port)) { + /* + * This was the last ep attached to a dynamically + * enumerated port. Block new cxl_add_ep() and garbage + * collect the port. + */ + port->dead = true; + list_splice_init(&port->dports, &reap_dports); + } + cxl_device_unlock(&port->dev); + + if (!list_empty(&reap_dports)) { + dev_dbg(&cxlmd->dev, "delete %s\n", + dev_name(&port->dev)); + delete_switch_port(port, &reap_dports); + } + put_device(&port->dev); + cxl_device_unlock(&parent_port->dev); + } +} + +static resource_size_t find_component_registers(struct device *dev) +{ + struct cxl_register_map map; + struct pci_dev *pdev; + + /* + * Theoretically, CXL component registers can be hosted on a + * non-PCI device, in practice, only cxl_test hits this case. + */ + if (!dev_is_pci(dev)) + return CXL_RESOURCE_NONE; + + pdev = to_pci_dev(dev); + + cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + return cxl_regmap_to_base(pdev, &map); +} + +static int add_port_attach_ep(struct cxl_memdev *cxlmd, + struct device *uport_dev, + struct device *dport_dev) +{ + struct device *dparent = grandparent(dport_dev); + struct cxl_port *port, *parent_port = NULL; + resource_size_t component_reg_phys; + int rc; + + if (!dparent) { + /* + * The iteration reached the topology root without finding the + * CXL-root 'cxl_port' on a previous iteration, fail for now to + * be re-probed after platform driver attaches. + */ + dev_dbg(&cxlmd->dev, "%s is a root dport\n", + dev_name(dport_dev)); + return -ENXIO; + } + + parent_port = find_cxl_port(dparent); + if (!parent_port) { + /* iterate to create this parent_port */ + return -EAGAIN; + } + + cxl_device_lock(&parent_port->dev); + if (!parent_port->dev.driver) { + dev_warn(&cxlmd->dev, + "port %s:%s disabled, failed to enumerate CXL.mem\n", + dev_name(&parent_port->dev), dev_name(uport_dev)); + port = ERR_PTR(-ENXIO); + goto out; + } + + port = find_cxl_port_at(parent_port, dport_dev); + if (!port) { + component_reg_phys = find_component_registers(uport_dev); + port = devm_cxl_add_port(&parent_port->dev, uport_dev, + component_reg_phys, parent_port); + if (!IS_ERR(port)) + get_device(&port->dev); + } +out: + cxl_device_unlock(&parent_port->dev); + + if (IS_ERR(port)) + rc = PTR_ERR(port); + else { + dev_dbg(&cxlmd->dev, "add to new port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport)); + rc = cxl_add_ep(port, &cxlmd->dev); + if (rc == -EEXIST) { + /* + * "can't" happen, but this error code means + * something to the caller, so translate it. + */ + rc = -ENXIO; + } + put_device(&port->dev); + } + + put_device(&parent_port->dev); + return rc; +} + +int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) +{ + struct device *dev = &cxlmd->dev; + struct device *iter; + int rc; + + rc = devm_add_action_or_reset(&cxlmd->dev, cxl_detach_ep, cxlmd); + if (rc) + return rc; + + /* + * Scan for and add all cxl_ports in this device's ancestry. + * Repeat until no more ports are added. Abort if a port add + * attempt fails. + */ +retry: + for (iter = dev; iter; iter = grandparent(iter)) { + struct device *dport_dev = grandparent(iter); + struct device *uport_dev; + struct cxl_port *port; + + if (!dport_dev) + return 0; + + uport_dev = dport_dev->parent; + if (!uport_dev) { + dev_warn(dev, "at %s no parent for dport: %s\n", + dev_name(iter), dev_name(dport_dev)); + return -ENXIO; + } + + dev_dbg(dev, "scan: iter: %s dport_dev: %s parent: %s\n", + dev_name(iter), dev_name(dport_dev), + dev_name(uport_dev)); + port = find_cxl_port(dport_dev); + if (port) { + dev_dbg(&cxlmd->dev, + "found already registered port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport)); + rc = cxl_add_ep(port, &cxlmd->dev); + + /* + * If the endpoint already exists in the port's list, + * that's ok, it was added on a previous pass. + * Otherwise, retry in add_port_attach_ep() after taking + * the parent_port lock as the current port may be being + * reaped. + */ + if (rc && rc != -EEXIST) { + put_device(&port->dev); + return rc; + } + + /* Any more ports to add between this one and the root? */ + if (!dev_is_cxl_root_child(&port->dev)) { + put_device(&port->dev); + continue; + } + + put_device(&port->dev); + return 0; + } + + rc = add_port_attach_ep(cxlmd, uport_dev, dport_dev); + /* port missing, try to add parent */ + if (rc == -EAGAIN) + continue; + /* failed to add ep or port */ + if (rc) + return rc; + /* port added, new descendants possible, start over */ + goto retry; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_ports, CXL); + +struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd) +{ + return find_cxl_port(grandparent(&cxlmd->dev)); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, CXL); + +struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, + const struct device *dev) +{ + struct cxl_dport *dport; + + cxl_device_lock(&port->dev); + list_for_each_entry(dport, &port->dports, list) + if (dport->dport == dev) { + cxl_device_unlock(&port->dev); + return dport; + } + + cxl_device_unlock(&port->dev); + return NULL; +} +EXPORT_SYMBOL_NS_GPL(cxl_find_dport_by_dev, CXL); + +static int decoder_populate_targets(struct cxl_decoder *cxld, + struct cxl_port *port, int *target_map) +{ + int i, rc = 0; + + if (!target_map) + return 0; + + device_lock_assert(&port->dev); + + if (list_empty(&port->dports)) + return -EINVAL; + + write_seqlock(&cxld->target_lock); + for (i = 0; i < cxld->nr_targets; i++) { + struct cxl_dport *dport = find_dport(port, target_map[i]); + + if (!dport) { + rc = -ENXIO; + break; + } + cxld->target[i] = dport; + } + write_sequnlock(&cxld->target_lock); + + return rc; +} + +/** + * cxl_decoder_alloc - Allocate a new CXL decoder + * @port: owning port of this decoder + * @nr_targets: downstream targets accessible by this decoder. All upstream + * ports and root ports must have at least 1 target. Endpoint + * devices will have 0 targets. Callers wishing to register an + * endpoint device should specify 0. + * + * A port should contain one or more decoders. Each of those decoders enable + * some address space for CXL.mem utilization. A decoder is expected to be + * configured by the caller before registering. + * + * Return: A new cxl decoder to be registered by cxl_decoder_add(). The decoder + * is initialized to be a "passthrough" decoder. + */ +static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets) +{ + struct cxl_decoder *cxld; + struct device *dev; + int rc = 0; + + if (nr_targets > CXL_DECODER_MAX_INTERLEAVE) + return ERR_PTR(-EINVAL); + + cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL); + if (!cxld) + return ERR_PTR(-ENOMEM); + + rc = ida_alloc(&port->decoder_ida, GFP_KERNEL); + if (rc < 0) + goto err; + + /* need parent to stick around to release the id */ + get_device(&port->dev); + cxld->id = rc; + + cxld->nr_targets = nr_targets; + seqlock_init(&cxld->target_lock); + dev = &cxld->dev; + device_initialize(dev); + device_set_pm_not_required(dev); + dev->parent = &port->dev; + dev->bus = &cxl_bus_type; + if (is_cxl_root(port)) + cxld->dev.type = &cxl_decoder_root_type; + else if (is_cxl_endpoint(port)) + cxld->dev.type = &cxl_decoder_endpoint_type; + else + cxld->dev.type = &cxl_decoder_switch_type; + + /* Pre initialize an "empty" decoder */ + cxld->interleave_ways = 1; + cxld->interleave_granularity = PAGE_SIZE; + cxld->target_type = CXL_DECODER_EXPANDER; + cxld->platform_res = (struct resource)DEFINE_RES_MEM(0, 0); + + return cxld; +err: + kfree(cxld); + return ERR_PTR(rc); +} + +/** + * cxl_root_decoder_alloc - Allocate a root level decoder + * @port: owning CXL root of this decoder + * @nr_targets: static number of downstream targets + * + * Return: A new cxl decoder to be registered by cxl_decoder_add(). A + * 'CXL root' decoder is one that decodes from a top-level / static platform + * firmware description of CXL resources into a CXL standard decode + * topology. + */ +struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets) +{ + if (!is_cxl_root(port)) + return ERR_PTR(-EINVAL); + + return cxl_decoder_alloc(port, nr_targets); +} +EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL); + +/** + * cxl_switch_decoder_alloc - Allocate a switch level decoder + * @port: owning CXL switch port of this decoder + * @nr_targets: max number of dynamically addressable downstream targets + * + * Return: A new cxl decoder to be registered by cxl_decoder_add(). A + * 'switch' decoder is any decoder that can be enumerated by PCIe + * topology and the HDM Decoder Capability. This includes the decoders + * that sit between Switch Upstream Ports / Switch Downstream Ports and + * Host Bridges / Root Ports. + */ +struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets) +{ + if (is_cxl_root(port) || is_cxl_endpoint(port)) + return ERR_PTR(-EINVAL); + + return cxl_decoder_alloc(port, nr_targets); +} +EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL); + +/** + * cxl_endpoint_decoder_alloc - Allocate an endpoint decoder + * @port: owning port of this decoder + * + * Return: A new cxl decoder to be registered by cxl_decoder_add() + */ +struct cxl_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port) +{ + if (!is_cxl_endpoint(port)) + return ERR_PTR(-EINVAL); + + return cxl_decoder_alloc(port, 0); +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, CXL); + +/** + * cxl_decoder_add_locked - Add a decoder with targets + * @cxld: The cxl decoder allocated by cxl_decoder_alloc() + * @target_map: A list of downstream ports that this decoder can direct memory + * traffic to. These numbers should correspond with the port number + * in the PCIe Link Capabilities structure. + * + * Certain types of decoders may not have any targets. The main example of this + * is an endpoint device. A more awkward example is a hostbridge whose root + * ports get hot added (technically possible, though unlikely). + * + * This is the locked variant of cxl_decoder_add(). + * + * Context: Process context. Expects the device lock of the port that owns the + * @cxld to be held. + * + * Return: Negative error code if the decoder wasn't properly configured; else + * returns 0. + */ +int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) +{ + struct cxl_port *port; + struct device *dev; + int rc; + + if (WARN_ON_ONCE(!cxld)) + return -EINVAL; + + if (WARN_ON_ONCE(IS_ERR(cxld))) + return PTR_ERR(cxld); + + if (cxld->interleave_ways < 1) + return -EINVAL; + + dev = &cxld->dev; + + port = to_cxl_port(cxld->dev.parent); + if (!is_endpoint_decoder(dev)) { + rc = decoder_populate_targets(cxld, port, target_map); + if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) { + dev_err(&port->dev, + "Failed to populate active decoder targets\n"); + return rc; + } + } + + rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id); + if (rc) + return rc; + + /* + * Platform decoder resources should show up with a reasonable name. All + * other resources are just sub ranges within the main decoder resource. + */ + if (is_root_decoder(dev)) + cxld->platform_res.name = dev_name(dev); + + return device_add(dev); +} +EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, CXL); + +/** + * cxl_decoder_add - Add a decoder with targets + * @cxld: The cxl decoder allocated by cxl_decoder_alloc() + * @target_map: A list of downstream ports that this decoder can direct memory + * traffic to. These numbers should correspond with the port number + * in the PCIe Link Capabilities structure. + * + * This is the unlocked variant of cxl_decoder_add_locked(). + * See cxl_decoder_add_locked(). + * + * Context: Process context. Takes and releases the device lock of the port that + * owns the @cxld. + */ +int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) +{ + struct cxl_port *port; + int rc; + + if (WARN_ON_ONCE(!cxld)) + return -EINVAL; + + if (WARN_ON_ONCE(IS_ERR(cxld))) + return PTR_ERR(cxld); + + port = to_cxl_port(cxld->dev.parent); + + cxl_device_lock(&port->dev); + rc = cxl_decoder_add_locked(cxld, target_map); + cxl_device_unlock(&port->dev); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL); + +static void cxld_unregister(void *dev) +{ + device_unregister(dev); +} + +int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld) +{ + return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev); +} +EXPORT_SYMBOL_NS_GPL(cxl_decoder_autoremove, CXL); + +/** + * __cxl_driver_register - register a driver for the cxl bus + * @cxl_drv: cxl driver structure to attach + * @owner: owning module/driver + * @modname: KBUILD_MODNAME for parent driver + */ +int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner, + const char *modname) +{ + if (!cxl_drv->probe) { + pr_debug("%s ->probe() must be specified\n", modname); + return -EINVAL; + } + + if (!cxl_drv->name) { + pr_debug("%s ->name must be specified\n", modname); + return -EINVAL; + } + + if (!cxl_drv->id) { + pr_debug("%s ->id must be specified\n", modname); + return -EINVAL; + } + + cxl_drv->drv.bus = &cxl_bus_type; + cxl_drv->drv.owner = owner; + cxl_drv->drv.mod_name = modname; + cxl_drv->drv.name = cxl_drv->name; + + return driver_register(&cxl_drv->drv); +} +EXPORT_SYMBOL_NS_GPL(__cxl_driver_register, CXL); + +void cxl_driver_unregister(struct cxl_driver *cxl_drv) +{ + driver_unregister(&cxl_drv->drv); +} +EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL); + +static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT, + cxl_device_id(dev)); +} + +static int cxl_bus_match(struct device *dev, struct device_driver *drv) +{ + return cxl_device_id(dev) == to_cxl_drv(drv)->id; +} + +static int cxl_bus_probe(struct device *dev) +{ + int rc; + + /* + * Take the CXL nested lock since the driver core only holds + * @dev->mutex and not @dev->lockdep_mutex. + */ + cxl_nested_lock(dev); + rc = to_cxl_drv(dev->driver)->probe(dev); + cxl_nested_unlock(dev); + + dev_dbg(dev, "probe: %d\n", rc); + return rc; +} + +static void cxl_bus_remove(struct device *dev) +{ + struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver); + + cxl_nested_lock(dev); + if (cxl_drv->remove) + cxl_drv->remove(dev); + cxl_nested_unlock(dev); +} + +static struct workqueue_struct *cxl_bus_wq; + +int cxl_bus_rescan(void) +{ + return bus_rescan_devices(&cxl_bus_type); +} +EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL); + +bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) +{ + return queue_work(cxl_bus_wq, &cxlmd->detach_work); +} +EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL); + +/* for user tooling to ensure port disable work has completed */ +static ssize_t flush_store(struct bus_type *bus, const char *buf, size_t count) +{ + if (sysfs_streq(buf, "1")) { + flush_workqueue(cxl_bus_wq); + return count; + } + + return -EINVAL; +} + +static BUS_ATTR_WO(flush); + +static struct attribute *cxl_bus_attributes[] = { + &bus_attr_flush.attr, + NULL, +}; + +static struct attribute_group cxl_bus_attribute_group = { + .attrs = cxl_bus_attributes, +}; + +static const struct attribute_group *cxl_bus_attribute_groups[] = { + &cxl_bus_attribute_group, + NULL, +}; + +struct bus_type cxl_bus_type = { + .name = "cxl", + .uevent = cxl_bus_uevent, + .match = cxl_bus_match, + .probe = cxl_bus_probe, + .remove = cxl_bus_remove, + .bus_groups = cxl_bus_attribute_groups, +}; +EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL); + +static __init int cxl_core_init(void) +{ + int rc; + + cxl_mbox_init(); + + rc = cxl_memdev_init(); + if (rc) + return rc; + + cxl_bus_wq = alloc_ordered_workqueue("cxl_port", 0); + if (!cxl_bus_wq) { + rc = -ENOMEM; + goto err_wq; + } + + rc = bus_register(&cxl_bus_type); + if (rc) + goto err_bus; + + return 0; + +err_bus: + destroy_workqueue(cxl_bus_wq); +err_wq: + cxl_memdev_exit(); + cxl_mbox_exit(); + return rc; +} + +static void cxl_core_exit(void) +{ + bus_unregister(&cxl_bus_type); + destroy_workqueue(cxl_bus_wq); + cxl_memdev_exit(); + cxl_mbox_exit(); +} + +module_init(cxl_core_init); +module_exit(cxl_core_exit); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index e37e23bf4355..39a129c57d40 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -5,6 +5,7 @@ #include #include #include +#include /** * DOC: cxl registers @@ -35,7 +36,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map) { int cap, cap_count; - u64 cap_array; + u32 cap_array; *map = (struct cxl_component_reg_map) { 0 }; @@ -45,11 +46,11 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, */ base += CXL_CM_OFFSET; - cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET); + cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET); if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) { dev_err(dev, - "Couldn't locate the CXL.cache and CXL.mem capability array header./n"); + "Couldn't locate the CXL.cache and CXL.mem capability array header.\n"); return; } @@ -158,9 +159,8 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base, } EXPORT_SYMBOL_NS_GPL(cxl_probe_device_regs, CXL); -static void __iomem *devm_cxl_iomap_block(struct device *dev, - resource_size_t addr, - resource_size_t length) +void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, + resource_size_t length) { void __iomem *ret_val; struct resource *res; @@ -247,3 +247,58 @@ int cxl_map_device_regs(struct pci_dev *pdev, return 0; } EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL); + +static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi, + struct cxl_register_map *map) +{ + map->block_offset = ((u64)reg_hi << 32) | + (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); + map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); +} + +/** + * cxl_find_regblock() - Locate register blocks by type + * @pdev: The CXL PCI device to enumerate. + * @type: Register Block Indicator id + * @map: Enumeration output, clobbered on error + * + * Return: 0 if register block enumerated, negative error code otherwise + * + * A CXL DVSEC may point to one or more register blocks, search for them + * by @type. + */ +int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) +{ + u32 regloc_size, regblocks; + int regloc, i; + + map->block_offset = U64_MAX; + regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, + CXL_DVSEC_REG_LOCATOR); + if (!regloc) + return -ENXIO; + + pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); + regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); + + regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET; + regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8; + + for (i = 0; i < regblocks; i++, regloc += 8) { + u32 reg_lo, reg_hi; + + pci_read_config_dword(pdev, regloc, ®_lo); + pci_read_config_dword(pdev, regloc + 4, ®_hi); + + cxl_decode_regblock(reg_lo, reg_hi, map); + + if (map->reg_type == type) + return 0; + } + + map->block_offset = U64_MAX; + return -ENODEV; +} +EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index a5a0be3f088b..990b6670222e 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -17,6 +17,9 @@ * (port-driver, region-driver, nvdimm object-drivers... etc). */ +/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */ +#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K + /* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers*/ #define CXL_CM_OFFSET 0x1000 #define CXL_CM_CAP_HDR_OFFSET 0x0 @@ -36,11 +39,23 @@ #define CXL_HDM_DECODER_CAP_OFFSET 0x0 #define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) #define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) -#define CXL_HDM_DECODER0_BASE_LOW_OFFSET 0x10 -#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET 0x14 -#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET 0x18 -#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET 0x1c -#define CXL_HDM_DECODER0_CTRL_OFFSET 0x20 +#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8) +#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9) +#define CXL_HDM_DECODER_CTRL_OFFSET 0x4 +#define CXL_HDM_DECODER_ENABLE BIT(1) +#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) +#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14) +#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18) +#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c) +#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20) +#define CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0) +#define CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4) +#define CXL_HDM_DECODER0_CTRL_LOCK BIT(8) +#define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9) +#define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10) +#define CXL_HDM_DECODER0_CTRL_TYPE BIT(12) +#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24) +#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28) static inline int cxl_hdm_decoder_count(u32 cap_hdr) { @@ -145,6 +160,12 @@ int cxl_map_device_regs(struct pci_dev *pdev, struct cxl_device_regs *regs, struct cxl_register_map *map); +enum cxl_regloc_type; +int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); +void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, + resource_size_t length); + #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 @@ -158,7 +179,8 @@ int cxl_map_device_regs(struct pci_dev *pdev, #define CXL_DECODER_F_TYPE2 BIT(2) #define CXL_DECODER_F_TYPE3 BIT(3) #define CXL_DECODER_F_LOCK BIT(4) -#define CXL_DECODER_F_MASK GENMASK(4, 0) +#define CXL_DECODER_F_ENABLE BIT(5) +#define CXL_DECODER_F_MASK GENMASK(5, 0) enum cxl_decoder_type { CXL_DECODER_ACCELERATOR = 2, @@ -175,22 +197,28 @@ enum cxl_decoder_type { * struct cxl_decoder - CXL address range decode configuration * @dev: this decoder's device * @id: kernel device name id - * @range: address range considered by this decoder + * @platform_res: address space resources considered by root decoder + * @decoder_range: address space resources considered by midlevel decoder * @interleave_ways: number of cxl_dports in this decode * @interleave_granularity: data stride per dport * @target_type: accelerator vs expander (type2 vs type3) selector * @flags: memory type capabilities and locking + * @target_lock: coordinate coherent reads of the target list * @nr_targets: number of elements in @target * @target: active ordered target list in current decoder configuration */ struct cxl_decoder { struct device dev; int id; - struct range range; + union { + struct resource platform_res; + struct range decoder_range; + }; int interleave_ways; int interleave_granularity; enum cxl_decoder_type target_type; unsigned long flags; + seqlock_t target_lock; int nr_targets; struct cxl_dport *target[]; }; @@ -226,14 +254,6 @@ struct cxl_nvdimm { struct nvdimm *nvdimm; }; -struct cxl_walk_context { - struct device *dev; - struct pci_bus *root; - struct cxl_port *port; - int error; - int count; -}; - /** * struct cxl_port - logical collection of upstream port devices and * downstream port devices to construct a CXL memory @@ -242,16 +262,22 @@ struct cxl_walk_context { * @uport: PCI or platform device implementing the upstream port capability * @id: id for port device-name * @dports: cxl_dport instances referenced by decoders + * @endpoints: cxl_ep instances, endpoints that are a descendant of this port * @decoder_ida: allocator for decoder ids * @component_reg_phys: component register capability base address (optional) + * @dead: last ep has been removed, force port re-creation + * @depth: How deep this port is relative to the root. depth 0 is the root. */ struct cxl_port { struct device dev; struct device *uport; int id; struct list_head dports; + struct list_head endpoints; struct ida decoder_ida; resource_size_t component_reg_phys; + bool dead; + unsigned int depth; }; /** @@ -270,19 +296,65 @@ struct cxl_dport { struct list_head list; }; +/** + * struct cxl_ep - track an endpoint's interest in a port + * @ep: device that hosts a generic CXL endpoint (expander or accelerator) + * @list: node on port->endpoints list + */ +struct cxl_ep { + struct device *ep; + struct list_head list; +}; + +/* + * The platform firmware device hosting the root is also the top of the + * CXL port topology. All other CXL ports have another CXL port as their + * parent and their ->uport / host device is out-of-line of the port + * ancestry. + */ +static inline bool is_cxl_root(struct cxl_port *port) +{ + return port->uport == port->dev.parent; +} + +bool is_cxl_port(struct device *dev); struct cxl_port *to_cxl_port(struct device *dev); +struct pci_bus; +int devm_cxl_register_pci_bus(struct device *host, struct device *uport, + struct pci_bus *bus); +struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port); struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, struct cxl_port *parent_port); +struct cxl_port *find_cxl_root(struct device *dev); +int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); +int cxl_bus_rescan(void); +struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd); +bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd); -int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, - resource_size_t component_reg_phys); +struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, + struct device *dport, int port_id, + resource_size_t component_reg_phys); +struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, + const struct device *dev); struct cxl_decoder *to_cxl_decoder(struct device *dev); bool is_root_decoder(struct device *dev); -struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets); +bool is_cxl_decoder(struct device *dev); +struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets); +struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets); int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); +struct cxl_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port); +int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); +int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint); + +struct cxl_hdm; +struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port); +int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm); +int devm_cxl_add_passthrough_decoder(struct cxl_port *port); extern struct bus_type cxl_bus_type; @@ -304,8 +376,14 @@ int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner, #define cxl_driver_register(x) __cxl_driver_register(x, THIS_MODULE, KBUILD_MODNAME) void cxl_driver_unregister(struct cxl_driver *cxl_drv); +#define module_cxl_driver(__cxl_driver) \ + module_driver(__cxl_driver, cxl_driver_register, cxl_driver_unregister) + #define CXL_DEVICE_NVDIMM_BRIDGE 1 #define CXL_DEVICE_NVDIMM 2 +#define CXL_DEVICE_PORT 3 +#define CXL_DEVICE_ROOT 4 +#define CXL_DEVICE_MEMORY_EXPANDER 5 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") #define CXL_MODALIAS_FMT "cxl:t%d" @@ -326,4 +404,83 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd); #ifndef __mock #define __mock static #endif + +#ifdef CONFIG_PROVE_CXL_LOCKING +enum cxl_lock_class { + CXL_ANON_LOCK, + CXL_NVDIMM_LOCK, + CXL_NVDIMM_BRIDGE_LOCK, + CXL_PORT_LOCK, + /* + * Be careful to add new lock classes here, CXL_PORT_LOCK is + * extended by the port depth, so a maximum CXL port topology + * depth would need to be defined first. + */ +}; + +static inline void cxl_nested_lock(struct device *dev) +{ + if (is_cxl_port(dev)) { + struct cxl_port *port = to_cxl_port(dev); + + mutex_lock_nested(&dev->lockdep_mutex, + CXL_PORT_LOCK + port->depth); + } else if (is_cxl_decoder(dev)) { + struct cxl_port *port = to_cxl_port(dev->parent); + + /* + * A decoder is the immediate child of a port, so set + * its lock class equal to other child device siblings. + */ + mutex_lock_nested(&dev->lockdep_mutex, + CXL_PORT_LOCK + port->depth + 1); + } else if (is_cxl_nvdimm_bridge(dev)) + mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_BRIDGE_LOCK); + else if (is_cxl_nvdimm(dev)) + mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_LOCK); + else + mutex_lock_nested(&dev->lockdep_mutex, CXL_ANON_LOCK); +} + +static inline void cxl_nested_unlock(struct device *dev) +{ + mutex_unlock(&dev->lockdep_mutex); +} + +static inline void cxl_device_lock(struct device *dev) +{ + /* + * For double lock errors the lockup will happen before lockdep + * warns at cxl_nested_lock(), so assert explicitly. + */ + lockdep_assert_not_held(&dev->lockdep_mutex); + + device_lock(dev); + cxl_nested_lock(dev); +} + +static inline void cxl_device_unlock(struct device *dev) +{ + cxl_nested_unlock(dev); + device_unlock(dev); +} +#else +static inline void cxl_nested_lock(struct device *dev) +{ +} + +static inline void cxl_nested_unlock(struct device *dev) +{ +} + +static inline void cxl_device_lock(struct device *dev) +{ + device_lock(dev); +} + +static inline void cxl_device_unlock(struct device *dev) +{ + device_unlock(dev); +} +#endif #endif /* __CXL_H__ */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 8d96d009ad90..5d33ce24fe09 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -34,12 +34,14 @@ * @dev: driver core device object * @cdev: char dev core object for ioctl operations * @cxlds: The device state backing this device + * @detach_work: active memdev lost a port in its ancestry * @id: id number of this memdev instance. */ struct cxl_memdev { struct device dev; struct cdev cdev; struct cxl_dev_state *cxlds; + struct work_struct detach_work; int id; }; @@ -48,6 +50,12 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) return container_of(dev, struct cxl_memdev, dev); } +bool is_cxl_memdev(struct device *dev); +static inline bool is_cxl_endpoint(struct cxl_port *port) +{ + return is_cxl_memdev(port->uport); +} + struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); /** @@ -89,6 +97,18 @@ struct cxl_mbox_cmd { */ #define CXL_CAPACITY_MULTIPLIER SZ_256M +/** + * struct cxl_endpoint_dvsec_info - Cached DVSEC info + * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE + * @ranges: Number of active HDM ranges this device uses. + * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE + */ +struct cxl_endpoint_dvsec_info { + bool mem_enabled; + int ranges; + struct range dvsec_range[2]; +}; + /** * struct cxl_dev_state - The driver device state * @@ -98,6 +118,7 @@ struct cxl_mbox_cmd { * * @dev: The device associated with this CXL state * @regs: Parsed register blocks + * @cxl_dvsec: Offset to the PCIe device DVSEC * @payload_size: Size of space for payload * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) * @lsa_size: Size of Label Storage Area @@ -116,7 +137,11 @@ struct cxl_mbox_cmd { * @active_persistent_bytes: sum of hard + soft persistent * @next_volatile_bytes: volatile capacity change pending device reset * @next_persistent_bytes: persistent capacity change pending device reset + * @component_reg_phys: register base of component registers + * @info: Cached DVSEC information about the device. + * @serial: PCIe Device Serial Number * @mbox_send: @dev specific transport for transmitting mailbox commands + * @wait_media_ready: @dev specific method to await media ready * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -125,6 +150,7 @@ struct cxl_dev_state { struct device *dev; struct cxl_regs regs; + int cxl_dvsec; size_t payload_size; size_t lsa_size; @@ -145,7 +171,12 @@ struct cxl_dev_state { u64 next_volatile_bytes; u64 next_persistent_bytes; + resource_size_t component_reg_phys; + struct cxl_endpoint_dvsec_info info; + u64 serial; + int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); + int (*wait_media_ready)(struct cxl_dev_state *cxlds); }; enum cxl_opcode { @@ -264,4 +295,12 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); + +struct cxl_hdm { + struct cxl_component_regs regs; + unsigned int decoder_count; + unsigned int target_count; + unsigned int interleave_mask; + struct cxl_port *port; +}; #endif /* __CXL_MEM_H__ */ diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h new file mode 100644 index 000000000000..329e7ea3f36a --- /dev/null +++ b/drivers/cxl/cxlpci.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#ifndef __CXL_PCI_H__ +#define __CXL_PCI_H__ +#include +#include "cxl.h" + +#define CXL_MEMORY_PROGIF 0x10 + +/* + * See section 8.1 Configuration Space Registers in the CXL 2.0 + * Specification. Names are taken straight from the specification with "CXL" and + * "DVSEC" redundancies removed. When obvious, abbreviations may be used. + */ +#define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) +#define PCI_DVSEC_VENDOR_ID_CXL 0x1E98 + +/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ +#define CXL_DVSEC_PCIE_DEVICE 0 +#define CXL_DVSEC_CAP_OFFSET 0xA +#define CXL_DVSEC_MEM_CAPABLE BIT(2) +#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) +#define CXL_DVSEC_CTRL_OFFSET 0xC +#define CXL_DVSEC_MEM_ENABLE BIT(2) +#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define CXL_DVSEC_MEM_INFO_VALID BIT(0) +#define CXL_DVSEC_MEM_ACTIVE BIT(1) +#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) +#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) + +/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */ +#define CXL_DVSEC_FUNCTION_MAP 2 + +/* CXL 2.0 8.1.5: CXL 2.0 Extensions DVSEC for Ports */ +#define CXL_DVSEC_PORT_EXTENSIONS 3 + +/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ +#define CXL_DVSEC_PORT_GPF 4 + +/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ +#define CXL_DVSEC_DEVICE_GPF 5 + +/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ +#define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 + +/* CXL 2.0 8.1.9: Register Locator DVSEC */ +#define CXL_DVSEC_REG_LOCATOR 8 +#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC +#define CXL_DVSEC_REG_LOCATOR_BIR_MASK GENMASK(2, 0) +#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8) +#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16) + +/* Register Block Identifier (RBI) */ +enum cxl_regloc_type { + CXL_REGLOC_RBI_EMPTY = 0, + CXL_REGLOC_RBI_COMPONENT, + CXL_REGLOC_RBI_VIRT, + CXL_REGLOC_RBI_MEMDEV, + CXL_REGLOC_RBI_TYPES +}; + +static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, + struct cxl_register_map *map) +{ + if (map->block_offset == U64_MAX) + return CXL_RESOURCE_NONE; + + return pci_resource_start(pdev, map->barno) + map->block_offset; +} + +int devm_cxl_port_enumerate_dports(struct cxl_port *port); +#endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c new file mode 100644 index 000000000000..49a4b1c47299 --- /dev/null +++ b/drivers/cxl/mem.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include + +#include "cxlmem.h" +#include "cxlpci.h" + +/** + * DOC: cxl mem + * + * CXL memory endpoint devices and switches are CXL capable devices that are + * participating in CXL.mem protocol. Their functionality builds on top of the + * CXL.io protocol that allows enumerating and configuring components via + * standard PCI mechanisms. + * + * The cxl_mem driver owns kicking off the enumeration of this CXL.mem + * capability. With the detection of a CXL capable endpoint, the driver will + * walk up to find the platform specific port it is connected to, and determine + * if there are intervening switches in the path. If there are switches, a + * secondary action is to enumerate those (implemented in cxl_core). Finally the + * cxl_mem driver adds the device it is bound to as a CXL endpoint-port for use + * in higher level operations. + */ + +static int wait_for_media(struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + int rc; + + if (!info->mem_enabled) + return -EBUSY; + + rc = cxlds->wait_media_ready(cxlds); + if (rc) + return rc; + + /* + * We know the device is active, and enabled, if any ranges are non-zero + * we'll need to check later before adding the port since that owns the + * HDM decoder registers. + */ + return 0; +} + +static int create_endpoint(struct cxl_memdev *cxlmd, + struct cxl_port *parent_port) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_port *endpoint; + + endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev, + cxlds->component_reg_phys, parent_port); + if (IS_ERR(endpoint)) + return PTR_ERR(endpoint); + + dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev)); + + if (!endpoint->dev.driver) { + dev_err(&cxlmd->dev, "%s failed probe\n", + dev_name(&endpoint->dev)); + return -ENXIO; + } + + return cxl_endpoint_autoremove(cxlmd, endpoint); +} + +/** + * cxl_dvsec_decode_init() - Setup HDM decoding for the endpoint + * @cxlds: Device state + * + * Additionally, enables global HDM decoding. Warning: don't call this outside + * of probe. Once probe is complete, the port driver owns all access to the HDM + * decoder registers. + * + * Returns: false if DVSEC Ranges are being used instead of HDM + * decoders, or if it can not be determined if DVSEC Ranges are in use. + * Otherwise, returns true. + */ +__mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + struct cxl_register_map map; + struct cxl_component_reg_map *cmap = &map.component_map; + bool global_enable, do_hdm_init = false; + void __iomem *crb; + u32 global_ctrl; + + /* map hdm decoder */ + crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); + if (!crb) { + dev_dbg(cxlds->dev, "Failed to map component registers\n"); + return false; + } + + cxl_probe_component_regs(cxlds->dev, crb, cmap); + if (!cmap->hdm_decoder.valid) { + dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); + goto out; + } + + global_ctrl = readl(crb + cmap->hdm_decoder.offset + + CXL_HDM_DECODER_CTRL_OFFSET); + global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; + if (!global_enable && info->ranges) { + dev_dbg(cxlds->dev, + "DVSEC ranges already programmed and HDM decoders not enabled.\n"); + goto out; + } + + do_hdm_init = true; + + /* + * Permanently (for this boot at least) opt the device into HDM + * operation. Individual HDM decoders still need to be enabled after + * this point. + */ + if (!global_enable) { + dev_dbg(cxlds->dev, "Enabling HDM decode\n"); + writel(global_ctrl | CXL_HDM_DECODER_ENABLE, + crb + cmap->hdm_decoder.offset + + CXL_HDM_DECODER_CTRL_OFFSET); + } + +out: + iounmap(crb); + return do_hdm_init; +} + +static int cxl_mem_probe(struct device *dev) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_port *parent_port; + int rc; + + /* + * Someone is trying to reattach this device after it lost its port + * connection (an endpoint port previously registered by this memdev was + * disabled). This racy check is ok because if the port is still gone, + * no harm done, and if the port hierarchy comes back it will re-trigger + * this probe. Port rescan and memdev detach work share the same + * single-threaded workqueue. + */ + if (work_pending(&cxlmd->detach_work)) + return -EBUSY; + + rc = wait_for_media(cxlmd); + if (rc) { + dev_err(dev, "Media not active (%d)\n", rc); + return rc; + } + + /* + * If DVSEC ranges are being used instead of HDM decoder registers there + * is no use in trying to manage those. + */ + if (!cxl_dvsec_decode_init(cxlds)) { + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + int i; + + /* */ + for (i = 0; i < 2; i++) { + u64 base, size; + + /* + * Give a nice warning to the user that BIOS has really + * botched things for them if it didn't place DVSEC + * ranges in the memory map. + */ + base = info->dvsec_range[i].start; + size = range_len(&info->dvsec_range[i]); + if (size && !region_intersects(base, size, + IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE)) { + dev_err(dev, + "DVSEC range %#llx-%#llx must be reserved by BIOS, but isn't\n", + base, base + size - 1); + } + } + dev_err(dev, + "Active DVSEC range registers in use. Will not bind.\n"); + return -EBUSY; + } + + rc = devm_cxl_enumerate_ports(cxlmd); + if (rc) + return rc; + + parent_port = cxl_mem_find_port(cxlmd); + if (!parent_port) { + dev_err(dev, "CXL port topology not found\n"); + return -ENXIO; + } + + cxl_device_lock(&parent_port->dev); + if (!parent_port->dev.driver) { + dev_err(dev, "CXL port topology %s not enabled\n", + dev_name(&parent_port->dev)); + rc = -ENXIO; + goto out; + } + + rc = create_endpoint(cxlmd, parent_port); +out: + cxl_device_unlock(&parent_port->dev); + put_device(&parent_port->dev); + return rc; +} + +static struct cxl_driver cxl_mem_driver = { + .name = "cxl_mem", + .probe = cxl_mem_probe, + .id = CXL_DEVICE_MEMORY_EXPANDER, +}; + +module_cxl_driver(cxl_mem_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); +MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER); +/* + * create_endpoint() wants to validate port driver attach immediately after + * endpoint registration. + */ +MODULE_SOFTDEP("pre: cxl_port"); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 8dc91fd3396a..8a7267d116b7 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1,14 +1,16 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ #include +#include #include +#include #include #include #include #include #include #include "cxlmem.h" -#include "pci.h" +#include "cxlpci.h" #include "cxl.h" /** @@ -35,6 +37,20 @@ /* CXL 2.0 - 8.2.8.4 */ #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) +/* + * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to + * dictate how long to wait for the mailbox to become ready. The new + * field allows the device to tell software the amount of time to wait + * before mailbox ready. This field per the spec theoretically allows + * for up to 255 seconds. 255 seconds is unreasonably long, its longer + * than the maximum SATA port link recovery wait. Default to 60 seconds + * until someone builds a CXL device that needs more time in practice. + */ +static unsigned short mbox_ready_timeout = 60; +module_param(mbox_ready_timeout, ushort, 0644); +MODULE_PARM_DESC(mbox_ready_timeout, + "seconds to wait for mailbox ready / memory active status"); + static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) { const unsigned long start = jiffies; @@ -57,14 +73,16 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) return 0; } -static void cxl_pci_mbox_timeout(struct cxl_dev_state *cxlds, - struct cxl_mbox_cmd *mbox_cmd) -{ - struct device *dev = cxlds->dev; +#define cxl_err(dev, status, msg) \ + dev_err_ratelimited(dev, msg ", device state %s%s\n", \ + status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ + status & CXLMDEV_FW_HALT ? " firmware-halt" : "") - dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n", - mbox_cmd->opcode, mbox_cmd->size_in); -} +#define cxl_cmd_err(dev, cmd, status, msg) \ + dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ + (cmd)->opcode, \ + status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ + status & CXLMDEV_FW_HALT ? " firmware-halt" : "") /** * __cxl_pci_mbox_send_cmd() - Execute a mailbox command @@ -118,7 +136,11 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, /* #1 */ if (cxl_doorbell_busy(cxlds)) { - dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n"); + u64 md_status = + readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + + cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, + "mailbox queue busy"); return -EBUSY; } @@ -144,7 +166,9 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, /* #5 */ rc = cxl_pci_mbox_wait_for_doorbell(cxlds); if (rc == -ETIMEDOUT) { - cxl_pci_mbox_timeout(cxlds, mbox_cmd); + u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + + cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); return rc; } @@ -182,98 +206,13 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, return 0; } -/** - * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox. - * @cxlds: The device state to gain access to. - * - * Context: Any context. Takes the mbox_mutex. - * Return: 0 if exclusive access was acquired. - */ -static int cxl_pci_mbox_get(struct cxl_dev_state *cxlds) -{ - struct device *dev = cxlds->dev; - u64 md_status; - int rc; - - mutex_lock_io(&cxlds->mbox_mutex); - - /* - * XXX: There is some amount of ambiguity in the 2.0 version of the spec - * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the - * bit is to allow firmware running on the device to notify the driver - * that it's ready to receive commands. It is unclear if the bit needs - * to be read for each transaction mailbox, ie. the firmware can switch - * it on and off as needed. Second, there is no defined timeout for - * mailbox ready, like there is for the doorbell interface. - * - * Assumptions: - * 1. The firmware might toggle the Mailbox Interface Ready bit, check - * it for every command. - * - * 2. If the doorbell is clear, the firmware should have first set the - * Mailbox Interface Ready bit. Therefore, waiting for the doorbell - * to be ready is sufficient. - */ - rc = cxl_pci_mbox_wait_for_doorbell(cxlds); - if (rc) { - dev_warn(dev, "Mailbox interface not ready\n"); - goto out; - } - - md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); - if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) { - dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n"); - rc = -EBUSY; - goto out; - } - - /* - * Hardware shouldn't allow a ready status but also have failure bits - * set. Spit out an error, this should be a bug report - */ - rc = -EFAULT; - if (md_status & CXLMDEV_DEV_FATAL) { - dev_err(dev, "mbox: reported ready, but fatal\n"); - goto out; - } - if (md_status & CXLMDEV_FW_HALT) { - dev_err(dev, "mbox: reported ready, but halted\n"); - goto out; - } - if (CXLMDEV_RESET_NEEDED(md_status)) { - dev_err(dev, "mbox: reported ready, but reset needed\n"); - goto out; - } - - /* with lock held */ - return 0; - -out: - mutex_unlock(&cxlds->mbox_mutex); - return rc; -} - -/** - * cxl_pci_mbox_put() - Release exclusive access to the mailbox. - * @cxlds: The device state to communicate with. - * - * Context: Any context. Expects mbox_mutex to be held. - */ -static void cxl_pci_mbox_put(struct cxl_dev_state *cxlds) -{ - mutex_unlock(&cxlds->mbox_mutex); -} - static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { int rc; - rc = cxl_pci_mbox_get(cxlds); - if (rc) - return rc; - + mutex_lock_io(&cxlds->mbox_mutex); rc = __cxl_pci_mbox_send_cmd(cxlds, cmd); - cxl_pci_mbox_put(cxlds); + mutex_unlock(&cxlds->mbox_mutex); return rc; } @@ -281,6 +220,34 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) { const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); + unsigned long timeout; + u64 md_status; + + timeout = jiffies + mbox_ready_timeout * HZ; + do { + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (md_status & CXLMDEV_MBOX_IF_READY) + break; + if (msleep_interruptible(100)) + break; + } while (!time_after(jiffies, timeout)); + + if (!(md_status & CXLMDEV_MBOX_IF_READY)) { + cxl_err(cxlds->dev, md_status, + "timeout awaiting mailbox ready"); + return -ETIMEDOUT; + } + + /* + * A command may be in flight from a previous driver instance, + * think kexec, do one doorbell wait so that + * __cxl_pci_mbox_send_cmd() can assume that it is the only + * source for future doorbell busy events. + */ + if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { + cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle"); + return -ETIMEDOUT; + } cxlds->mbox_send = cxl_pci_mbox_send; cxlds->payload_size = @@ -400,58 +367,6 @@ static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *ma return 0; } -static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi, - struct cxl_register_map *map) -{ - map->block_offset = - ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); - map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); - map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo); -} - -/** - * cxl_find_regblock() - Locate register blocks by type - * @pdev: The CXL PCI device to enumerate. - * @type: Register Block Indicator id - * @map: Enumeration output, clobbered on error - * - * Return: 0 if register block enumerated, negative error code otherwise - * - * A CXL DVSEC may point to one or more register blocks, search for them - * by @type. - */ -static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) -{ - u32 regloc_size, regblocks; - int regloc, i; - - regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, - PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID); - if (!regloc) - return -ENXIO; - - pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); - regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); - - regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET; - regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8; - - for (i = 0; i < regblocks; i++, regloc += 8) { - u32 reg_lo, reg_hi; - - pci_read_config_dword(pdev, regloc, ®_lo); - pci_read_config_dword(pdev, regloc + 4, ®_hi); - - cxl_decode_regblock(reg_lo, reg_hi, map); - - if (map->reg_type == type) - return 0; - } - - return -ENODEV; -} - static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map) { @@ -471,6 +386,165 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, return rc; } +static int wait_for_valid(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec, rc; + u32 val; + + /* + * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high + * and Size Low registers are valid. Must be set within 1 second of + * deassertion of reset to CXL device. Likely it is already set by the + * time this runs, but otherwise give a 1.5 second timeout in case of + * clock skew. + */ + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + msleep(1500); + + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + return -ETIMEDOUT; +} + +/* + * Wait up to @mbox_ready_timeout for the device to report memory + * active. + */ +static int wait_for_media_ready(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + bool active = false; + u64 md_status; + int rc, i; + + rc = wait_for_valid(cxlds); + if (rc) + return rc; + + for (i = mbox_ready_timeout; i; i--) { + u32 temp; + int rc; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); + if (rc) + return rc; + + active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + if (active) + break; + msleep(1000); + } + + if (!active) { + dev_err(&pdev->dev, + "timeout awaiting memory active after %d seconds\n", + mbox_ready_timeout); + return -ETIMEDOUT; + } + + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (!CXLMDEV_READY(md_status)) + return -EIO; + + return 0; +} + +static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + int hdm_count, rc, i; + u16 cap, ctrl; + + if (!d) + return -ENXIO; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + if (rc) + return rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc) + return rc; + + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) + return -ENXIO; + + /* + * It is not allowed by spec for MEM.capable to be set and have 0 legacy + * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this + * driver is for a spec defined class code which must be CXL.mem + * capable, there is no point in continuing to enable CXL.mem. + */ + hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + if (!hdm_count || hdm_count > 2) + return -EINVAL; + + rc = wait_for_valid(cxlds); + if (rc) + return rc; + + info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + + for (i = 0; i < hdm_count; i++) { + u64 base, size; + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + if (rc) + return rc; + + size = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + if (rc) + return rc; + + size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + if (rc) + return rc; + + base = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + if (rc) + return rc; + + base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + + info->dvsec_range[i] = (struct range) { + .start = base, + .end = base + size - 1 + }; + + if (size) + info->ranges++; + } + + return 0; +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -493,6 +567,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + cxlds->serial = pci_get_dsn(pdev); + cxlds->cxl_dvsec = pci_find_dvsec_capability( + pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); + if (!cxlds->cxl_dvsec) + dev_warn(&pdev->dev, + "Device DVSEC not present, skip CXL.mem init\n"); + + cxlds->wait_media_ready = wait_for_media_ready; + rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) return rc; @@ -501,6 +584,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + /* + * If the component registers can't be found, the cxl_pci driver may + * still be useful for management functions so don't return an error. + */ + cxlds->component_reg_phys = CXL_RESOURCE_NONE; + rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + dev_warn(&pdev->dev, "No component registers (%d)\n", rc); + + cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map); + rc = cxl_pci_setup_mailbox(cxlds); if (rc) return rc; @@ -517,6 +611,11 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = cxl_dvsec_ranges(cxlds); + if (rc) + dev_warn(&pdev->dev, + "Failed to get DVSEC range information (%d)\n", rc); + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h deleted file mode 100644 index 7d3e4bf06b45..000000000000 --- a/drivers/cxl/pci.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ -#ifndef __CXL_PCI_H__ -#define __CXL_PCI_H__ - -#define CXL_MEMORY_PROGIF 0x10 - -/* - * See section 8.1 Configuration Space Registers in the CXL 2.0 - * Specification - */ -#define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) -#define PCI_DVSEC_VENDOR_ID_CXL 0x1E98 -#define PCI_DVSEC_ID_CXL 0x0 - -#define PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID 0x8 -#define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET 0xC - -/* BAR Indicator Register (BIR) */ -#define CXL_REGLOC_BIR_MASK GENMASK(2, 0) - -/* Register Block Identifier (RBI) */ -enum cxl_regloc_type { - CXL_REGLOC_RBI_EMPTY = 0, - CXL_REGLOC_RBI_COMPONENT, - CXL_REGLOC_RBI_VIRT, - CXL_REGLOC_RBI_MEMDEV, - CXL_REGLOC_RBI_TYPES -}; - -#define CXL_REGLOC_RBI_MASK GENMASK(15, 8) -#define CXL_REGLOC_ADDR_MASK GENMASK(31, 16) - -#endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index b65a272a2d6d..15ad666ab03e 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -43,7 +43,7 @@ static int cxl_nvdimm_probe(struct device *dev) if (!cxl_nvb) return -ENXIO; - device_lock(&cxl_nvb->dev); + cxl_device_lock(&cxl_nvb->dev); if (!cxl_nvb->nvdimm_bus) { rc = -ENXIO; goto out; @@ -68,7 +68,7 @@ static int cxl_nvdimm_probe(struct device *dev) dev_set_drvdata(dev, nvdimm); rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); out: - device_unlock(&cxl_nvb->dev); + cxl_device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; @@ -233,7 +233,7 @@ static void cxl_nvb_update_state(struct work_struct *work) struct nvdimm_bus *victim_bus = NULL; bool release = false, rescan = false; - device_lock(&cxl_nvb->dev); + cxl_device_lock(&cxl_nvb->dev); switch (cxl_nvb->state) { case CXL_NVB_ONLINE: if (!online_nvdimm_bus(cxl_nvb)) { @@ -251,7 +251,7 @@ static void cxl_nvb_update_state(struct work_struct *work) default: break; } - device_unlock(&cxl_nvb->dev); + cxl_device_unlock(&cxl_nvb->dev); if (release) device_release_driver(&cxl_nvb->dev); @@ -327,9 +327,9 @@ static int cxl_nvdimm_bridge_reset(struct device *dev, void *data) return 0; cxl_nvb = to_cxl_nvdimm_bridge(dev); - device_lock(dev); + cxl_device_lock(dev); cxl_nvb->state = CXL_NVB_NEW; - device_unlock(dev); + cxl_device_unlock(dev); return 0; } diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c new file mode 100644 index 000000000000..d420da5fc39c --- /dev/null +++ b/drivers/cxl/port.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include + +#include "cxlmem.h" +#include "cxlpci.h" + +/** + * DOC: cxl port + * + * The port driver enumerates dport via PCI and scans for HDM + * (Host-managed-Device-Memory) decoder resources via the + * @component_reg_phys value passed in by the agent that registered the + * port. All descendant ports of a CXL root port (described by platform + * firmware) are managed in this drivers context. Each driver instance + * is responsible for tearing down the driver context of immediate + * descendant ports. The locking for this is validated by + * CONFIG_PROVE_CXL_LOCKING. + * + * The primary service this driver provides is presenting APIs to other + * drivers to utilize the decoders, and indicating to userspace (via bind + * status) the connectivity of the CXL.mem protocol throughout the + * PCIe topology. + */ + +static void schedule_detach(void *cxlmd) +{ + schedule_cxl_memdev_detach(cxlmd); +} + +static int cxl_port_probe(struct device *dev) +{ + struct cxl_port *port = to_cxl_port(dev); + struct cxl_hdm *cxlhdm; + int rc; + + if (is_cxl_endpoint(port)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); + + get_device(&cxlmd->dev); + rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); + if (rc) + return rc; + } else { + rc = devm_cxl_port_enumerate_dports(port); + if (rc < 0) + return rc; + if (rc == 1) + return devm_cxl_add_passthrough_decoder(port); + } + + cxlhdm = devm_cxl_setup_hdm(port); + if (IS_ERR(cxlhdm)) + return PTR_ERR(cxlhdm); + + rc = devm_cxl_enumerate_decoders(cxlhdm); + if (rc) { + dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); + return rc; + } + + return 0; +} + +static struct cxl_driver cxl_port_driver = { + .name = "cxl_port", + .probe = cxl_port_probe, + .id = CXL_DEVICE_PORT, +}; + +module_cxl_driver(cxl_port_driver); +MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); +MODULE_ALIAS_CXL(CXL_DEVICE_PORT); diff --git a/drivers/dax/super.c b/drivers/dax/super.c index aedc15eabeac..0211e6f7b47a 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -21,6 +21,7 @@ * @cdev: optional character interface for "device dax" * @private: dax driver private data * @flags: state and boolean properties + * @ops: operations for this device */ struct dax_device { struct inode inode; @@ -476,6 +477,7 @@ static int dax_fs_init(void) static void dax_fs_exit(void) { kern_unmount(dax_mnt); + rcu_barrier(); kmem_cache_destroy(dax_cache); } diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 65e3e7df8a4b..f253295795f0 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index f6aa1a964573..4084d05a4510 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -34,7 +34,6 @@ */ #include -#include #include #include #include diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 50c53409ceb6..fabca5e51e3d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2642,7 +2642,7 @@ int rdma_set_ack_timeout(struct rdma_cm_id *id, u8 timeout) { struct rdma_id_private *id_priv; - if (id->qp_type != IB_QPT_RC) + if (id->qp_type != IB_QPT_RC && id->qp_type != IB_QPT_XRC_INI) return -EINVAL; id_priv = container_of(id, struct rdma_id_private, id); diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 9ac16e0db761..de8a2d5d741c 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include #include #include #include diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index 433b426729d4..a70876a0a231 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -2,7 +2,6 @@ /* * Copyright (c) 2015 HGST, a Western Digital Company. */ -#include #include #include #include diff --git a/drivers/infiniband/core/iwpm_util.h b/drivers/infiniband/core/iwpm_util.h index 3a42ad43056e..d6fc8402158a 100644 --- a/drivers/infiniband/core/iwpm_util.h +++ b/drivers/infiniband/core/iwpm_util.h @@ -33,7 +33,6 @@ #ifndef _IWPM_UTIL_H #define _IWPM_UTIL_H -#include #include #include #include diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index f5aacaf7fb8e..ca24ce34da76 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -1951,9 +1951,10 @@ static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], u32 port) { struct rdma_hw_stats *stats; - int rem, i, index, ret = 0; struct nlattr *entry_attr; unsigned long *target; + int rem, i, ret = 0; + u32 index; stats = ib_get_hw_stats_port(device, port); if (!stats) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 74ecd7456a11..8dc7d1f4b35d 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -32,7 +32,6 @@ * SOFTWARE. */ -#include #include #include #include diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c18634bec212..a9819c40a140 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -268,9 +268,6 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, return ERR_PTR(-ENOMEM); pd->device = device; - pd->uobject = NULL; - pd->__internal_mr = NULL; - atomic_set(&pd->usecnt, 0); pd->flags = flags; rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); @@ -341,11 +338,6 @@ int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) pd->__internal_mr = NULL; } - /* uverbs manipulates usecnt with proper locking, while the kabi - * requires the caller to guarantee we can't race here. - */ - WARN_ON(atomic_read(&pd->usecnt)); - ret = pd->device->ops.dealloc_pd(pd, udata); if (ret) return ret; @@ -2153,6 +2145,7 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return mr; mr->device = pd->device; + mr->type = IB_MR_TYPE_USER; mr->pd = pd; mr->dm = NULL; atomic_inc(&pd->usecnt); diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 98c813ba4304..877f8e84a672 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -5,7 +5,6 @@ #include #include -#include #include #include @@ -667,7 +666,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) * engines, use the same CPU cores as general/control * context. */ - if (cpumask_weight(&entry->def_intr.mask) == 0) + if (cpumask_empty(&entry->def_intr.mask)) cpumask_copy(&entry->def_intr.mask, &entry->general_intr_mask); } @@ -687,7 +686,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) * vectors, use the same CPU core as the general/control * context. */ - if (cpumask_weight(&entry->comp_vect_mask) == 0) + if (cpumask_empty(&entry->comp_vect_mask)) cpumask_copy(&entry->comp_vect_mask, &entry->general_intr_mask); } diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 22a3cdb940be..80ba1e53c068 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c index 68a184c39941..8ceff7141baf 100644 --- a/drivers/infiniband/hw/hfi1/device.c +++ b/drivers/infiniband/hw/hfi1/device.c @@ -4,7 +4,6 @@ */ #include -#include #include #include diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index e2e4f9f6fae2..3af77a0840ab 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 31e63e245ea9..aa15a5cc7cf3 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -5,7 +5,6 @@ #include #include -#include #include #include diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index dc9211f3a009..99d0743133ca 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1397,8 +1397,7 @@ static int query_port(struct rvt_dev_info *rdi, u32 port_num, 4096 : hfi1_max_mtu), IB_MTU_4096); props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : mtu_to_enum(ppd->ibmtu, IB_MTU_4096); - props->phys_mtu = HFI1_CAP_IS_KSET(AIP) ? hfi1_max_mtu : - ib_mtu_enum_to_int(props->max_mtu); + props->phys_mtu = hfi1_max_mtu; return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 4b693d542ace..864413607571 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -38,45 +38,36 @@ #define CMD_POLL_TOKEN 0xffff #define CMD_MAX_NUM 32 -static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, u32 in_modifier, - u8 op_modifier, u16 op, u16 token, - int event) +static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { - return hr_dev->hw->post_mbox(hr_dev, in_param, out_param, in_modifier, - op_modifier, op, token, event); + return hr_dev->hw->post_mbox(hr_dev, mbox_msg); } /* this should be called with "poll_sem" */ -static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, - unsigned int timeout) +static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { int ret; - ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, - CMD_POLL_TOKEN, 0); + ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg); if (ret) { dev_err_ratelimited(hr_dev->dev, "failed to post mailbox 0x%x in poll mode, ret = %d.\n", - op, ret); + mbox_msg->cmd, ret); return ret; } - return hr_dev->hw->poll_mbox_done(hr_dev, timeout); + return hr_dev->hw->poll_mbox_done(hr_dev); } -static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned int timeout) +static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { int ret; down(&hr_dev->cmd.poll_sem); - ret = __hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, in_modifier, - op_modifier, op, timeout); + ret = __hns_roce_cmd_mbox_poll(hr_dev, mbox_msg); up(&hr_dev->cmd.poll_sem); return ret; @@ -100,10 +91,8 @@ void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, complete(&context->done); } -static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, - unsigned int timeout) +static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { struct hns_roce_cmdq *cmd = &hr_dev->cmd; struct hns_roce_cmd_context *context; @@ -124,20 +113,19 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, reinit_completion(&context->done); - ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, - context->token, 1); + mbox_msg->token = context->token; + ret = hns_roce_cmd_mbox_post_hw(hr_dev, mbox_msg); if (ret) { dev_err_ratelimited(dev, "failed to post mailbox 0x%x in event mode, ret = %d.\n", - op, ret); + mbox_msg->cmd, ret); goto out; } if (!wait_for_completion_timeout(&context->done, - msecs_to_jiffies(timeout))) { + msecs_to_jiffies(HNS_ROCE_CMD_TIMEOUT_MSECS))) { dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x timeout.\n", - context->token, op); + context->token, mbox_msg->cmd); ret = -EBUSY; goto out; } @@ -145,45 +133,50 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, ret = context->result; if (ret) dev_err_ratelimited(dev, "[cmd] token 0x%x mailbox 0x%x error %d.\n", - context->token, op, ret); + context->token, mbox_msg->cmd, ret); out: context->busy = 0; return ret; } -static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, unsigned long in_modifier, - u8 op_modifier, u16 op, unsigned int timeout) +static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { int ret; down(&hr_dev->cmd.event_sem); - ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier, - op_modifier, op, timeout); + ret = __hns_roce_cmd_mbox_wait(hr_dev, mbox_msg); up(&hr_dev->cmd.event_sem); return ret; } int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, - unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned int timeout) + u8 cmd, unsigned long tag) { + struct hns_roce_mbox_msg mbox_msg = {}; bool is_busy; if (hr_dev->hw->chk_mbox_avail) if (!hr_dev->hw->chk_mbox_avail(hr_dev, &is_busy)) return is_busy ? -EBUSY : 0; - if (hr_dev->cmd.use_events) - return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, - timeout); - else - return hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, - in_modifier, op_modifier, op, - timeout); + mbox_msg.in_param = in_param; + mbox_msg.out_param = out_param; + mbox_msg.cmd = cmd; + mbox_msg.tag = tag; + + if (hr_dev->cmd.use_events) { + mbox_msg.event_en = 1; + + return hns_roce_cmd_mbox_wait(hr_dev, &mbox_msg); + } else { + mbox_msg.event_en = 0; + mbox_msg.token = CMD_POLL_TOKEN; + + return hns_roce_cmd_mbox_poll(hr_dev, &mbox_msg); + } } int hns_roce_cmd_init(struct hns_roce_dev *hr_dev) @@ -269,3 +262,15 @@ void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma); kfree(mailbox); } + +int hns_roce_create_hw_ctx(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + u8 cmd, unsigned long idx) +{ + return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cmd, idx); +} + +int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd, unsigned long idx) +{ + return hns_roce_cmd_mbox(dev, 0, 0, cmd, idx); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 8025e7f657fa..052a3d60905a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -140,12 +140,16 @@ enum { }; int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, - unsigned long in_modifier, u8 op_modifier, u16 op, - unsigned int timeout); + u8 cmd, unsigned long tag); struct hns_roce_cmd_mailbox * hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox); +int hns_roce_create_hw_ctx(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + u8 cmd, unsigned long idx); +int hns_roce_destroy_hw_ctx(struct hns_roce_dev *dev, u8 cmd, + unsigned long idx); #endif /* _HNS_ROCE_CMD_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 55057dcbb2dc..8acd599ffac1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -100,12 +100,39 @@ static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn) mutex_unlock(&cq_table->bank_mutex); } +static int hns_roce_create_cqc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, + u64 *mtts, dma_addr_t dma_handle) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) { + ibdev_err(ibdev, "failed to alloc mailbox for CQC.\n"); + return PTR_ERR(mailbox); + } + + hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle); + + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_CQC, + hr_cq->cqn); + if (ret) + ibdev_err(ibdev, + "failed to send create cmd for CQ(0x%lx), ret = %d.\n", + hr_cq->cqn, ret); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_cmd_mailbox *mailbox; - u64 mtts[MTT_MIN_COUNT] = { 0 }; + u64 mtts[MTT_MIN_COUNT] = {}; dma_addr_t dma_handle; int ret; @@ -121,7 +148,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) if (ret) { ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n", hr_cq->cqn, ret); - goto err_out; + return ret; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); @@ -130,41 +157,17 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) goto err_put; } - /* Allocate mailbox memory */ - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) { - ret = PTR_ERR(mailbox); + ret = hns_roce_create_cqc(hr_dev, hr_cq, mtts, dma_handle); + if (ret) goto err_xa; - } - - hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle); - - /* Send mailbox to hw */ - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 0, - HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS); - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) { - ibdev_err(ibdev, - "failed to send create cmd for CQ(0x%lx), ret = %d.\n", - hr_cq->cqn, ret); - goto err_xa; - } - - hr_cq->cons_index = 0; - hr_cq->arm_sn = 1; - - refcount_set(&hr_cq->refcount, 1); - init_completion(&hr_cq->free); return 0; err_xa: xa_erase(&cq_table->array, hr_cq->cqn); - err_put: hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); -err_out: return ret; } @@ -174,9 +177,8 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) struct device *dev = hr_dev->dev; int ret; - ret = hns_roce_cmd_mbox(hr_dev, 0, 0, hr_cq->cqn, 1, - HNS_ROCE_CMD_DESTROY_CQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC, + hr_cq->cqn); if (ret) dev_err(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); @@ -413,6 +415,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cqc; } + hr_cq->cons_index = 0; + hr_cq->arm_sn = 1; + refcount_set(&hr_cq->refcount, 1); + init_completion(&hr_cq->free); + return 0; err_cqc: diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1e0bae136997..3083d6db1d68 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -561,6 +561,15 @@ struct hns_roce_cmd_mailbox { dma_addr_t dma; }; +struct hns_roce_mbox_msg { + u64 in_param; + u64 out_param; + u8 cmd; + u32 tag; + u16 token; + u8 event_en; +}; + struct hns_roce_dev; struct hns_roce_rinl_sge { @@ -624,6 +633,7 @@ struct hns_roce_qp { u32 next_sge; enum ib_mtu path_mtu; u32 max_inline_data; + u8 free_mr_en; /* 0: flush needed, 1: unneeded */ unsigned long flush_flag; @@ -851,11 +861,9 @@ struct hns_roce_hw { int (*hw_profile)(struct hns_roce_dev *hr_dev); int (*hw_init)(struct hns_roce_dev *hr_dev); void (*hw_exit)(struct hns_roce_dev *hr_dev); - int (*post_mbox)(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, - u16 token, int event); - int (*poll_mbox_done)(struct hns_roce_dev *hr_dev, - unsigned int timeout); + int (*post_mbox)(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg); + int (*poll_mbox_done)(struct hns_roce_dev *hr_dev); bool (*chk_mbox_avail)(struct hns_roce_dev *hr_dev, bool *is_busy); int (*set_gid)(struct hns_roce_dev *hr_dev, int gid_index, const union ib_gid *gid, const struct ib_gid_attr *attr); @@ -873,15 +881,16 @@ struct hns_roce_hw { struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, dma_addr_t dma_handle); int (*set_hem)(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, int obj, int step_idx); + struct hns_roce_hem_table *table, int obj, u32 step_idx); int (*clear_hem)(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, - int step_idx); + u32 step_idx); int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state); int (*qp_flow_control_init)(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); + void (*dereg_mr)(struct hns_roce_dev *hr_dev); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf); @@ -1145,9 +1154,6 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); -int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index); unsigned long key_to_hw_index(u32 key); int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 8917365cc6b8..ce1a0d2792a3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -488,7 +488,7 @@ static int set_mhop_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_index *index) { struct ib_device *ibdev = &hr_dev->ib_dev; - int step_idx; + u32 step_idx; int ret = 0; if (index->inited & HEM_INDEX_L0) { @@ -618,7 +618,7 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, struct ib_device *ibdev = &hr_dev->ib_dev; u32 hop_num = mhop->hop_num; u32 chunk_ba_num; - int step_idx; + u32 step_idx; index->inited = HEM_INDEX_BUF; chunk_ba_num = mhop->bt_chunk_size / BA_BYTE_LEN; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b33e948fd060..2b0cef17ad45 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1344,17 +1344,17 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, return ret; } -static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev, unsigned long obj, - dma_addr_t base_addr, u16 op) +static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev, + dma_addr_t base_addr, u8 cmd, unsigned long tag) { - struct hns_roce_cmd_mailbox *mbox = hns_roce_alloc_cmd_mailbox(hr_dev); + struct hns_roce_cmd_mailbox *mbox; int ret; + mbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mbox)) return PTR_ERR(mbox); - ret = hns_roce_cmd_mbox(hr_dev, base_addr, mbox->dma, obj, 0, op, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, base_addr, mbox->dma, cmd, tag); hns_roce_free_cmd_mailbox(hr_dev, mbox); return ret; } @@ -2664,6 +2664,194 @@ static void free_dip_list(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); } +static void free_mr_exit(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + int ret; + int i; + + for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { + if (free_mr->rsv_qp[i]) { + ret = ib_destroy_qp(free_mr->rsv_qp[i]); + if (ret) + ibdev_err(&hr_dev->ib_dev, + "failed to destroy qp in free mr.\n"); + + free_mr->rsv_qp[i] = NULL; + } + } + + if (free_mr->rsv_cq) { + ib_destroy_cq(free_mr->rsv_cq); + free_mr->rsv_cq = NULL; + } + + if (free_mr->rsv_pd) { + ib_dealloc_pd(free_mr->rsv_pd); + free_mr->rsv_pd = NULL; + } +} + +static int free_mr_alloc_res(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_cq_init_attr cq_init_attr = {}; + struct ib_qp_init_attr qp_init_attr = {}; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + int ret; + int i; + + pd = ib_alloc_pd(ibdev, 0); + if (IS_ERR(pd)) { + ibdev_err(ibdev, "failed to create pd for free mr.\n"); + return PTR_ERR(pd); + } + free_mr->rsv_pd = pd; + + cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM; + cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr); + if (IS_ERR(cq)) { + ibdev_err(ibdev, "failed to create cq for free mr.\n"); + ret = PTR_ERR(cq); + goto create_failed; + } + free_mr->rsv_cq = cq; + + qp_init_attr.qp_type = IB_QPT_RC; + qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.send_cq = free_mr->rsv_cq; + qp_init_attr.recv_cq = free_mr->rsv_cq; + for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { + qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM; + qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM; + qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM; + qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM; + + qp = ib_create_qp(free_mr->rsv_pd, &qp_init_attr); + if (IS_ERR(qp)) { + ibdev_err(ibdev, "failed to create qp for free mr.\n"); + ret = PTR_ERR(qp); + goto create_failed; + } + + free_mr->rsv_qp[i] = qp; + } + + return 0; + +create_failed: + free_mr_exit(hr_dev); + + return ret; +} + +static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, + struct ib_qp_attr *attr, int sl_num) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_qp *hr_qp; + int loopback; + int mask; + int ret; + + hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]); + hr_qp->free_mr_en = 1; + + mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS; + attr->qp_state = IB_QPS_INIT; + attr->port_num = 1; + attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; + ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + if (ret) { + ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n", + ret); + return ret; + } + + loopback = hr_dev->loop_idc; + /* Set qpc lbi = 1 incidate loopback IO */ + hr_dev->loop_idc = 1; + + mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | + IB_QP_RQ_PSN | IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER; + attr->qp_state = IB_QPS_RTR; + attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; + attr->path_mtu = IB_MTU_256; + attr->dest_qp_num = hr_qp->qpn; + attr->rq_psn = HNS_ROCE_FREE_MR_USED_PSN; + + rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num); + + ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + hr_dev->loop_idc = loopback; + if (ret) { + ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n", + ret); + return ret; + } + + mask = IB_QP_STATE | IB_QP_SQ_PSN | IB_QP_RETRY_CNT | IB_QP_TIMEOUT | + IB_QP_RNR_RETRY | IB_QP_MAX_QP_RD_ATOMIC; + attr->qp_state = IB_QPS_RTS; + attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN; + attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT; + attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT; + ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + if (ret) + ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n", + ret); + + return ret; +} + +static int free_mr_modify_qp(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_qp_attr attr = {}; + int ret; + int i; + + rdma_ah_set_grh(&attr.ah_attr, NULL, 0, 0, 1, 0); + rdma_ah_set_static_rate(&attr.ah_attr, 3); + rdma_ah_set_port_num(&attr.ah_attr, 1); + + for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { + ret = free_mr_modify_rsv_qp(hr_dev, &attr, i); + if (ret) + return ret; + } + + return 0; +} + +static int free_mr_init(struct hns_roce_dev *hr_dev) +{ + int ret; + + ret = free_mr_alloc_res(hr_dev); + if (ret) + return ret; + + ret = free_mr_modify_qp(hr_dev); + if (ret) + goto err_modify_qp; + + return 0; + +err_modify_qp: + free_mr_exit(hr_dev); + + return ret; +} + static int get_hem_table(struct hns_roce_dev *hr_dev) { unsigned int qpc_count; @@ -2780,21 +2968,21 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) free_dip_list(hr_dev); } -static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, u32 in_modifier, u8 op_modifier, - u16 op, u16 token, int event) +static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { struct hns_roce_cmq_desc desc; struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false); - mb->in_param_l = cpu_to_le32(in_param); - mb->in_param_h = cpu_to_le32(in_param >> 32); - mb->out_param_l = cpu_to_le32(out_param); - mb->out_param_h = cpu_to_le32(out_param >> 32); - mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op); - mb->token_event_en = cpu_to_le32(event << 16 | token); + mb->in_param_l = cpu_to_le32(mbox_msg->in_param); + mb->in_param_h = cpu_to_le32(mbox_msg->in_param >> 32); + mb->out_param_l = cpu_to_le32(mbox_msg->out_param); + mb->out_param_h = cpu_to_le32(mbox_msg->out_param >> 32); + mb->cmd_tag = cpu_to_le32(mbox_msg->tag << 8 | mbox_msg->cmd); + mb->token_event_en = cpu_to_le32(mbox_msg->event_en << 16 | + mbox_msg->token); return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -2847,9 +3035,8 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout, return ret; } -static int v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, - u64 out_param, u32 in_modifier, u8 op_modifier, - u16 op, u16 token, int event) +static int v2_post_mbox(struct hns_roce_dev *hr_dev, + struct hns_roce_mbox_msg *mbox_msg) { u8 status = 0; int ret; @@ -2865,8 +3052,7 @@ static int v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, } /* Post new message to mbox */ - ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier, - op_modifier, op, token, event); + ret = hns_roce_mbox_post(hr_dev, mbox_msg); if (ret) dev_err_ratelimited(hr_dev->dev, "failed to post mailbox, ret = %d.\n", ret); @@ -2874,12 +3060,13 @@ static int v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, return ret; } -static int v2_poll_mbox_done(struct hns_roce_dev *hr_dev, unsigned int timeout) +static int v2_poll_mbox_done(struct hns_roce_dev *hr_dev) { u8 status = 0; int ret; - ret = v2_wait_mbox_complete(hr_dev, timeout, &status); + ret = v2_wait_mbox_complete(hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS, + &status); if (!ret) { if (status != MB_ST_COMPLETE_SUCC) return -EBUSY; @@ -3245,6 +3432,98 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) return 0; } +static int free_mr_post_send_lp_wqe(struct hns_roce_qp *hr_qp) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + struct ib_device *ibdev = &hr_dev->ib_dev; + const struct ib_send_wr *bad_wr; + struct ib_rdma_wr rdma_wr = {}; + struct ib_send_wr *send_wr; + int ret; + + send_wr = &rdma_wr.wr; + send_wr->opcode = IB_WR_RDMA_WRITE; + + ret = hns_roce_v2_post_send(&hr_qp->ibqp, send_wr, &bad_wr); + if (ret) { + ibdev_err(ibdev, "failed to post wqe for free mr, ret = %d.\n", + ret); + return ret; + } + + return 0; +} + +static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, + struct ib_wc *wc); + +static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_wc wc[ARRAY_SIZE(free_mr->rsv_qp)]; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_qp *hr_qp; + unsigned long end; + int cqe_cnt = 0; + int npolled; + int ret; + int i; + + /* + * If the device initialization is not complete or in the uninstall + * process, then there is no need to execute free mr. + */ + if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT || + priv->handle->rinfo.instance_state == HNS_ROCE_STATE_INIT || + hr_dev->state == HNS_ROCE_DEVICE_STATE_UNINIT) + return; + + mutex_lock(&free_mr->mutex); + + for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { + hr_qp = to_hr_qp(free_mr->rsv_qp[i]); + + ret = free_mr_post_send_lp_wqe(hr_qp); + if (ret) { + ibdev_err(ibdev, + "failed to send wqe (qp:0x%lx) for free mr, ret = %d.\n", + hr_qp->qpn, ret); + break; + } + + cqe_cnt++; + } + + end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies; + while (cqe_cnt) { + npolled = hns_roce_v2_poll_cq(free_mr->rsv_cq, cqe_cnt, wc); + if (npolled < 0) { + ibdev_err(ibdev, + "failed to poll cqe for free mr, remain %d cqe.\n", + cqe_cnt); + goto out; + } + + if (time_after(jiffies, end)) { + ibdev_err(ibdev, + "failed to poll cqe for free mr and timeout, remain %d cqe.\n", + cqe_cnt); + goto out; + } + cqe_cnt -= npolled; + } + +out: + mutex_unlock(&free_mr->mutex); +} + +static void hns_roce_v2_dereg_mr(struct hns_roce_dev *hr_dev) +{ + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + free_mr_send_cmd_to_hw(hr_dev); +} + static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size); @@ -3818,38 +4097,38 @@ out: } static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, - int step_idx, u16 *mbox_op) + u32 step_idx, u8 *mbox_cmd) { - u16 op; + u8 cmd; switch (type) { case HEM_TYPE_QPC: - op = HNS_ROCE_CMD_WRITE_QPC_BT0; + cmd = HNS_ROCE_CMD_WRITE_QPC_BT0; break; case HEM_TYPE_MTPT: - op = HNS_ROCE_CMD_WRITE_MPT_BT0; + cmd = HNS_ROCE_CMD_WRITE_MPT_BT0; break; case HEM_TYPE_CQC: - op = HNS_ROCE_CMD_WRITE_CQC_BT0; + cmd = HNS_ROCE_CMD_WRITE_CQC_BT0; break; case HEM_TYPE_SRQC: - op = HNS_ROCE_CMD_WRITE_SRQC_BT0; + cmd = HNS_ROCE_CMD_WRITE_SRQC_BT0; break; case HEM_TYPE_SCCC: - op = HNS_ROCE_CMD_WRITE_SCCC_BT0; + cmd = HNS_ROCE_CMD_WRITE_SCCC_BT0; break; case HEM_TYPE_QPC_TIMER: - op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0; + cmd = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0; break; case HEM_TYPE_CQC_TIMER: - op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0; + cmd = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0; break; default: dev_warn(hr_dev->dev, "failed to check hem type %u.\n", type); return -EINVAL; } - *mbox_op = op + step_idx; + *mbox_cmd = cmd + step_idx; return 0; } @@ -3872,10 +4151,10 @@ static int config_gmv_ba_to_hw(struct hns_roce_dev *hr_dev, unsigned long obj, } static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, - dma_addr_t base_addr, u32 hem_type, int step_idx) + dma_addr_t base_addr, u32 hem_type, u32 step_idx) { int ret; - u16 op; + u8 cmd; if (unlikely(hem_type == HEM_TYPE_GMV)) return config_gmv_ba_to_hw(hr_dev, obj, base_addr); @@ -3883,16 +4162,16 @@ static int set_hem_to_hw(struct hns_roce_dev *hr_dev, int obj, if (unlikely(hem_type == HEM_TYPE_SCCC && step_idx)) return 0; - ret = get_op_for_set_hem(hr_dev, hem_type, step_idx, &op); + ret = get_op_for_set_hem(hr_dev, hem_type, step_idx, &cmd); if (ret < 0) return ret; - return config_hem_ba_to_hw(hr_dev, obj, base_addr, op); + return config_hem_ba_to_hw(hr_dev, base_addr, cmd, obj); } static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, - int step_idx) + u32 step_idx) { struct hns_roce_hem_iter iter; struct hns_roce_hem_mhop mhop; @@ -3950,29 +4229,29 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, } static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, int obj, - int step_idx) + struct hns_roce_hem_table *table, + int tag, u32 step_idx) { - struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = hr_dev->dev; + u8 cmd = 0xff; int ret; - u16 op = 0xff; if (!hns_roce_check_whether_mhop(hr_dev, table->type)) return 0; switch (table->type) { case HEM_TYPE_QPC: - op = HNS_ROCE_CMD_DESTROY_QPC_BT0; + cmd = HNS_ROCE_CMD_DESTROY_QPC_BT0; break; case HEM_TYPE_MTPT: - op = HNS_ROCE_CMD_DESTROY_MPT_BT0; + cmd = HNS_ROCE_CMD_DESTROY_MPT_BT0; break; case HEM_TYPE_CQC: - op = HNS_ROCE_CMD_DESTROY_CQC_BT0; + cmd = HNS_ROCE_CMD_DESTROY_CQC_BT0; break; case HEM_TYPE_SRQC: - op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; + cmd = HNS_ROCE_CMD_DESTROY_SRQC_BT0; break; case HEM_TYPE_SCCC: case HEM_TYPE_QPC_TIMER: @@ -3985,15 +4264,13 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, return 0; } - op += step_idx; + cmd += step_idx; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - /* configure the tag and op */ - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, obj, 0, op, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cmd, tag); hns_roce_free_cmd_mailbox(hr_dev, mailbox); return ret; @@ -4017,9 +4294,8 @@ static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev, memcpy(mailbox->buf, context, qpc_size); memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size); - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, - HNS_ROCE_CMD_MODIFY_QPC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, + HNS_ROCE_CMD_MODIFY_QPC, hr_qp->qpn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -4667,6 +4943,18 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, u8 hr_port; int ret; + /* + * If free_mr_en of qp is set, it means that this qp comes from + * free mr. This qp will perform the loopback operation. + * In the loopback scenario, only sl needs to be set. + */ + if (hr_qp->free_mr_en) { + hr_reg_write(context, QPC_SL, rdma_ah_get_sl(&attr->ah_attr)); + hr_reg_clear(qpc_mask, QPC_SL); + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + return 0; + } + ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1; hr_port = ib_port - 1; is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) && @@ -5092,9 +5380,8 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0, - HNS_ROCE_CMD_QUERY_QPC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_QPC, + hr_qp->qpn); if (ret) goto out; @@ -5460,9 +5747,8 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, hr_reg_write(srq_context, SRQC_LIMIT_WL, srq_attr->srq_limit); hr_reg_clear(srqc_mask, SRQC_LIMIT_WL); - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0, - HNS_ROCE_CMD_MODIFY_SRQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, + HNS_ROCE_CMD_MODIFY_SRQC, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(&hr_dev->ib_dev, @@ -5488,9 +5774,8 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) return PTR_ERR(mailbox); srq_context = mailbox->buf; - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0, - HNS_ROCE_CMD_QUERY_SRQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, + HNS_ROCE_CMD_QUERY_SRQC, srq->srqn); if (ret) { ibdev_err(&hr_dev->ib_dev, "failed to process cmd of querying SRQ, ret = %d.\n", @@ -5540,9 +5825,8 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hr_reg_write(cq_context, CQC_CQ_PERIOD, cq_period); hr_reg_clear(cqc_mask, CQC_CQ_PERIOD); - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_cq->cqn, 1, - HNS_ROCE_CMD_MODIFY_CQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, + HNS_ROCE_CMD_MODIFY_CQC, hr_cq->cqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) ibdev_err(&hr_dev->ib_dev, @@ -5869,15 +6153,14 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, u32 eqn) { struct device *dev = hr_dev->dev; int ret; + u8 cmd; if (eqn < hr_dev->caps.num_comp_vectors) - ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M, - 0, HNS_ROCE_CMD_DESTROY_CEQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + cmd = HNS_ROCE_CMD_DESTROY_CEQC; else - ret = hns_roce_cmd_mbox(hr_dev, 0, 0, eqn & HNS_ROCE_V2_EQN_M, - 0, HNS_ROCE_CMD_DESTROY_AEQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + cmd = HNS_ROCE_CMD_DESTROY_AEQC; + + ret = hns_roce_destroy_hw_ctx(hr_dev, cmd, eqn & HNS_ROCE_V2_EQN_M); if (ret) dev_err(dev, "[mailbox cmd] destroy eqc(%u) failed.\n", eqn); } @@ -5983,16 +6266,15 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) } static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq, - unsigned int eq_cmd) + struct hns_roce_eq *eq, u8 eq_cmd) { struct hns_roce_cmd_mailbox *mailbox; int ret; /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR_OR_NULL(mailbox)) - return -ENOMEM; + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); ret = alloc_eq_buf(hr_dev, eq); if (ret) @@ -6002,8 +6284,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, if (ret) goto err_cmd_mbox; - ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0, - eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, eq_cmd, eq->eqn); if (ret) { dev_err(hr_dev->dev, "[mailbox cmd] create eqc failed.\n"); goto err_cmd_mbox; @@ -6114,14 +6395,14 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; struct device *dev = hr_dev->dev; struct hns_roce_eq *eq; - unsigned int eq_cmd; - int irq_num; - int eq_num; int other_num; int comp_num; int aeq_num; - int i; + int irq_num; + int eq_num; + u8 eq_cmd; int ret; + int i; other_num = hr_dev->caps.num_other_vectors; comp_num = hr_dev->caps.num_comp_vectors; @@ -6258,6 +6539,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .set_hem = hns_roce_v2_set_hem, .clear_hem = hns_roce_v2_clear_hem, .modify_qp = hns_roce_v2_modify_qp, + .dereg_mr = hns_roce_v2_dereg_mr, .qp_flow_control_init = hns_roce_v2_qp_flow_control_init, .init_eq = hns_roce_v2_init_eq_table, .cleanup_eq = hns_roce_v2_cleanup_eq_table, @@ -6339,14 +6621,25 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) ret = hns_roce_init(hr_dev); if (ret) { dev_err(hr_dev->dev, "RoCE Engine init failed!\n"); - goto error_failed_get_cfg; + goto error_failed_cfg; + } + + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + ret = free_mr_init(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "failed to init free mr!\n"); + goto error_failed_roce_init; + } } handle->priv = hr_dev; return 0; -error_failed_get_cfg: +error_failed_roce_init: + hns_roce_exit(hr_dev); + +error_failed_cfg: kfree(hr_dev->priv); error_failed_kzalloc: @@ -6368,6 +6661,9 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, hr_dev->state = HNS_ROCE_DEVICE_STATE_UNINIT; hns_roce_handle_device_err(hr_dev); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + free_mr_exit(hr_dev); + hns_roce_exit(hr_dev); kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 12be85f0986e..0d87b627601e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -139,6 +139,18 @@ enum { #define CMD_CSQ_DESC_NUM 1024 #define CMD_CRQ_DESC_NUM 1024 +/* Free mr used parameters */ +#define HNS_ROCE_FREE_MR_USED_CQE_NUM 128 +#define HNS_ROCE_FREE_MR_USED_QP_NUM 0x8 +#define HNS_ROCE_FREE_MR_USED_PSN 0x0808 +#define HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT 0x7 +#define HNS_ROCE_FREE_MR_USED_QP_TIMEOUT 0x12 +#define HNS_ROCE_FREE_MR_USED_SQWQE_NUM 128 +#define HNS_ROCE_FREE_MR_USED_SQSGE_NUM 0x2 +#define HNS_ROCE_FREE_MR_USED_RQWQE_NUM 128 +#define HNS_ROCE_FREE_MR_USED_RQSGE_NUM 0x2 +#define HNS_ROCE_V2_FREE_MR_TIMEOUT 4500 + enum { NO_ARMED = 0x0, REG_NXT_CEQE = 0x2, @@ -1418,10 +1430,18 @@ struct hns_roce_link_table { #define HNS_ROCE_EXT_LLM_ENTRY(addr, id) (((id) << (64 - 12)) | ((addr) >> 12)) #define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2) +struct hns_roce_v2_free_mr { + struct ib_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM]; + struct ib_cq *rsv_cq; + struct ib_pd *rsv_pd; + struct mutex mutex; +}; + struct hns_roce_v2_priv { struct hnae3_handle *handle; struct hns_roce_v2_cmq cmq; struct hns_roce_link_table ext_llm; + struct hns_roce_v2_free_mr free_mr; }; struct hns_roce_dip { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c index 5a97b5a0b7be..f7a75a7cda74 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2_dfx.c @@ -18,9 +18,8 @@ int hns_roce_v2_query_cqc_info(struct hns_roce_dev *hr_dev, u32 cqn, return PTR_ERR(mailbox); cq_context = mailbox->buf; - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, cqn, 0, - HNS_ROCE_CMD_QUERY_CQC, - HNS_ROCE_CMD_TIMEOUT_MSECS); + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_CQC, + cqn); if (ret) { dev_err(hr_dev->dev, "QUERY cqc cmd process error\n"); goto err_mailbox; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 2ee06b906b60..b389738d157f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -47,24 +47,6 @@ unsigned long key_to_hw_index(u32 key) return (key << 24) | (key >> 8); } -static int hns_roce_hw_create_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) -{ - return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, - HNS_ROCE_CMD_CREATE_MPT, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long mpt_index) -{ - return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, - mpt_index, !mailbox, HNS_ROCE_CMD_DESTROY_MPT, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida; @@ -137,14 +119,13 @@ static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr); } -static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr) +static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct ib_device *ibdev = &hr_dev->ib_dev; int ret; if (mr->enabled) { - ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); if (ret) @@ -166,10 +147,8 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) { - ret = PTR_ERR(mailbox); - return ret; - } + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); if (mr->type != MR_TYPE_FRMR) ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr); @@ -180,7 +159,7 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { dev_err(dev, "failed to create mpt, ret = %d.\n", ret); @@ -303,13 +282,14 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, return ERR_CAST(mailbox); mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1); - ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, mtpt_idx, 0, - HNS_ROCE_CMD_QUERY_MPT, - HNS_ROCE_CMD_TIMEOUT_MSECS); + + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT, + mtpt_idx); if (ret) goto free_cmd_mbox; - ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, mtpt_idx); + ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, + mtpt_idx); if (ret) ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret); @@ -339,7 +319,8 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, goto free_cmd_mbox; } - ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT, + mtpt_idx); if (ret) { ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret); goto free_cmd_mbox; @@ -361,6 +342,9 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) struct hns_roce_mr *mr = to_hr_mr(ibmr); int ret = 0; + if (hr_dev->hw->dereg_mr) + hr_dev->hw->dereg_mr(hr_dev); + hns_roce_mr_free(hr_dev, mr); kfree(mr); @@ -480,7 +464,7 @@ static void hns_roce_mw_free(struct hns_roce_dev *hr_dev, int ret; if (mw->enabled) { - ret = hns_roce_hw_destroy_mpt(hr_dev, NULL, + ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, key_to_hw_index(mw->rkey) & (hr_dev->caps.num_mtpts - 1)); if (ret) @@ -520,7 +504,7 @@ static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev, goto err_page; } - ret = hns_roce_hw_create_mpt(hr_dev, mailbox, + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index e64ef6903fb4..8dae98f827eb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -59,58 +59,39 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, } } -static int hns_roce_hw_create_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) +static int alloc_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0, - HNS_ROCE_CMD_CREATE_SRQ, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, - struct hns_roce_cmd_mailbox *mailbox, - unsigned long srq_num) -{ - return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num, - mailbox ? 0 : 1, HNS_ROCE_CMD_DESTROY_SRQ, - HNS_ROCE_CMD_TIMEOUT_MSECS); -} - -static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) -{ - struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; struct hns_roce_ida *srq_ida = &hr_dev->srq_table.srq_ida; - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_cmd_mailbox *mailbox; - int ret; int id; id = ida_alloc_range(&srq_ida->ida, srq_ida->min, srq_ida->max, GFP_KERNEL); if (id < 0) { - ibdev_err(ibdev, "failed to alloc srq(%d).\n", id); + ibdev_err(&hr_dev->ib_dev, "failed to alloc srq(%d).\n", id); return -ENOMEM; } - srq->srqn = (unsigned long)id; - ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); - if (ret) { - ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret); - goto err_out; - } + srq->srqn = id; - ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); - if (ret) { - ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); - goto err_put; - } + return 0; +} + +static void free_srqn(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + ida_free(&hr_dev->srq_table.srq_ida.ida, (int)srq->srqn); +} + +static int hns_roce_create_srqc(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_cmd_mailbox *mailbox; + int ret; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR_OR_NULL(mailbox)) { + if (IS_ERR(mailbox)) { ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); - ret = -ENOMEM; - goto err_xa; + return PTR_ERR(mailbox); } ret = hr_dev->hw->write_srqc(srq, mailbox->buf); @@ -119,24 +100,44 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) goto err_mbox; } - ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); - if (ret) { + ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_SRQ, + srq->srqn); + if (ret) ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); - goto err_mbox; - } - - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return 0; err_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct ib_device *ibdev = &hr_dev->ib_dev; + int ret; + + ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); + if (ret) { + ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret); + return ret; + } + + ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); + if (ret) { + ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); + goto err_put; + } + + ret = hns_roce_create_srqc(hr_dev, srq); + if (ret) + goto err_xa; + + return 0; + err_xa: xa_erase(&srq_table->xa, srq->srqn); err_put: hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); -err_out: - ida_free(&srq_ida->ida, id); return ret; } @@ -146,7 +147,8 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; int ret; - ret = hns_roce_hw_destroy_srq(hr_dev, NULL, srq->srqn); + ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ, + srq->srqn); if (ret) dev_err(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); @@ -158,7 +160,6 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) wait_for_completion(&srq->free); hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); - ida_free(&srq_table->srq_ida.ida, (int)srq->srqn); } static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, @@ -406,10 +407,14 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, if (ret) return ret; - ret = alloc_srqc(hr_dev, srq); + ret = alloc_srqn(hr_dev, srq); if (ret) goto err_srq_buf; + ret = alloc_srqc(hr_dev, srq); + if (ret) + goto err_srqn; + if (udata) { resp.srqn = srq->srqn; if (ib_copy_to_udata(udata, &resp, @@ -428,6 +433,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, err_srqc: free_srqc(hr_dev, srq); +err_srqn: + free_srqn(hr_dev, srq); err_srq_buf: free_srq_buf(hr_dev, srq); @@ -440,6 +447,7 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) struct hns_roce_srq *srq = to_hr_srq(ibsrq); free_srqc(hr_dev, srq); + free_srqn(hr_dev, srq); free_srq_buf(hr_dev, srq); return 0; } diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 6dea0a49d171..dedb3b7edd8d 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1501,15 +1501,14 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port, * @cm_info: CM info for parent listen node * @cm_parent_listen_node: The parent listen node */ -static enum irdma_status_code -irdma_del_multiple_qhash(struct irdma_device *iwdev, - struct irdma_cm_info *cm_info, - struct irdma_cm_listener *cm_parent_listen_node) +static int irdma_del_multiple_qhash(struct irdma_device *iwdev, + struct irdma_cm_info *cm_info, + struct irdma_cm_listener *cm_parent_listen_node) { struct irdma_cm_listener *child_listen_node; - enum irdma_status_code ret = IRDMA_ERR_CFG; struct list_head *pos, *tpos; unsigned long flags; + int ret = -EINVAL; spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags); list_for_each_safe (pos, tpos, @@ -1618,16 +1617,16 @@ u16 irdma_get_vlan_ipv4(u32 *addr) * Adds a qhash and a child listen node for every IPv6 address * on the adapter and adds the associated qhash filter */ -static enum irdma_status_code -irdma_add_mqh_6(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, - struct irdma_cm_listener *cm_parent_listen_node) +static int irdma_add_mqh_6(struct irdma_device *iwdev, + struct irdma_cm_info *cm_info, + struct irdma_cm_listener *cm_parent_listen_node) { struct net_device *ip_dev; struct inet6_dev *idev; struct inet6_ifaddr *ifp, *tmp; - enum irdma_status_code ret = 0; struct irdma_cm_listener *child_listen_node; unsigned long flags; + int ret = 0; rtnl_lock(); for_each_netdev(&init_net, ip_dev) { @@ -1653,7 +1652,7 @@ irdma_add_mqh_6(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, child_listen_node); if (!child_listen_node) { ibdev_dbg(&iwdev->ibdev, "CM: listener memory allocation\n"); - ret = IRDMA_ERR_NO_MEMORY; + ret = -ENOMEM; goto exit; } @@ -1700,16 +1699,16 @@ exit: * Adds a qhash and a child listen node for every IPv4 address * on the adapter and adds the associated qhash filter */ -static enum irdma_status_code -irdma_add_mqh_4(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, - struct irdma_cm_listener *cm_parent_listen_node) +static int irdma_add_mqh_4(struct irdma_device *iwdev, + struct irdma_cm_info *cm_info, + struct irdma_cm_listener *cm_parent_listen_node) { struct net_device *ip_dev; struct in_device *idev; struct irdma_cm_listener *child_listen_node; - enum irdma_status_code ret = 0; unsigned long flags; const struct in_ifaddr *ifa; + int ret = 0; rtnl_lock(); for_each_netdev(&init_net, ip_dev) { @@ -1734,7 +1733,7 @@ irdma_add_mqh_4(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, if (!child_listen_node) { ibdev_dbg(&iwdev->ibdev, "CM: listener memory allocation\n"); in_dev_put(idev); - ret = IRDMA_ERR_NO_MEMORY; + ret = -ENOMEM; goto exit; } @@ -1781,9 +1780,9 @@ exit: * @cm_info: CM info for parent listen node * @cm_listen_node: The parent listen node */ -static enum irdma_status_code -irdma_add_mqh(struct irdma_device *iwdev, struct irdma_cm_info *cm_info, - struct irdma_cm_listener *cm_listen_node) +static int irdma_add_mqh(struct irdma_device *iwdev, + struct irdma_cm_info *cm_info, + struct irdma_cm_listener *cm_listen_node) { if (cm_info->ipv4) return irdma_add_mqh_4(iwdev, cm_info, cm_listen_node); @@ -2200,7 +2199,7 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, /* set our node specific transport info */ cm_node->ipv4 = cm_info->ipv4; cm_node->vlan_id = cm_info->vlan_id; - if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb) + if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) cm_node->vlan_id = 0; cm_node->tos = cm_info->tos; cm_node->user_pri = cm_info->user_pri; @@ -2209,8 +2208,12 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, ibdev_warn(&iwdev->ibdev, "application TOS[%d] and remote client TOS[%d] mismatch\n", listener->tos, cm_info->tos); - cm_node->tos = max(listener->tos, cm_info->tos); - cm_node->user_pri = rt_tos2priority(cm_node->tos); + if (iwdev->vsi.dscp_mode) { + cm_node->user_pri = listener->user_pri; + } else { + cm_node->tos = max(listener->tos, cm_info->tos); + cm_node->user_pri = rt_tos2priority(cm_node->tos); + } ibdev_dbg(&iwdev->ibdev, "DCB: listener: TOS:[%d] UP:[%d]\n", cm_node->tos, cm_node->user_pri); @@ -3201,8 +3204,7 @@ static void irdma_cm_free_ah_nop(struct irdma_cm_node *cm_node) * @iwdev: iwarp device structure * @rdma_ver: HW version */ -enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, - u8 rdma_ver) +int irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver) { struct irdma_cm_core *cm_core = &iwdev->cm_core; @@ -3212,7 +3214,7 @@ enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, /* Handles CM event work items send to Iwarp core */ cm_core->event_wq = alloc_ordered_workqueue("iwarp-event-wq", 0); if (!cm_core->event_wq) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; INIT_LIST_HEAD(&cm_core->listen_list); @@ -3835,7 +3837,11 @@ int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_info.cm_id = cm_id; cm_info.qh_qpid = iwdev->vsi.ilq->qp_id; cm_info.tos = cm_id->tos; - cm_info.user_pri = rt_tos2priority(cm_id->tos); + if (iwdev->vsi.dscp_mode) + cm_info.user_pri = + iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)]; + else + cm_info.user_pri = rt_tos2priority(cm_id->tos); if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri)) return -ENOMEM; @@ -3915,10 +3921,10 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) struct irdma_device *iwdev; struct irdma_cm_listener *cm_listen_node; struct irdma_cm_info cm_info = {}; - enum irdma_status_code err; struct sockaddr_in *laddr; struct sockaddr_in6 *laddr6; bool wildcard = false; + int err; iwdev = to_iwdev(cm_id->device); if (!iwdev) @@ -3959,7 +3965,7 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) } } - if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb) + if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) cm_info.vlan_id = 0; cm_info.backlog = backlog; cm_info.cm_id = cm_id; @@ -3977,7 +3983,11 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) cm_id->provider_data = cm_listen_node; cm_listen_node->tos = cm_id->tos; - cm_listen_node->user_pri = rt_tos2priority(cm_id->tos); + if (iwdev->vsi.dscp_mode) + cm_listen_node->user_pri = + iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)]; + else + cm_listen_node->user_pri = rt_tos2priority(cm_id->tos); cm_info.user_pri = cm_listen_node->user_pri; if (!cm_listen_node->reused_node) { if (wildcard) { @@ -4325,11 +4335,11 @@ static void irdma_qhash_ctrl(struct irdma_device *iwdev, struct list_head *child_listen_list = &parent_listen_node->child_listen_list; struct irdma_cm_listener *child_listen_node; struct list_head *pos, *tpos; - enum irdma_status_code err; bool node_allocated = false; enum irdma_quad_hash_manage_type op = ifup ? IRDMA_QHASH_MANAGE_TYPE_ADD : IRDMA_QHASH_MANAGE_TYPE_DELETE; + int err; list_for_each_safe (pos, tpos, child_listen_list) { child_listen_node = list_entry(pos, struct irdma_cm_listener, diff --git a/drivers/infiniband/hw/irdma/cm.h b/drivers/infiniband/hw/irdma/cm.h index 3bf42728e9b7..19c284975fc7 100644 --- a/drivers/infiniband/hw/irdma/cm.h +++ b/drivers/infiniband/hw/irdma/cm.h @@ -384,6 +384,13 @@ int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node, struct irdma_puda_buf *sqbuf, enum irdma_timer_type type, int send_retrans, int close_when_complete); + +static inline u8 irdma_tos2dscp(u8 tos) +{ +#define IRDMA_DSCP_VAL GENMASK(7, 2) + return (u8)FIELD_GET(IRDMA_DSCP_VAL, tos); +} + int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len); int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param); diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 3141a9c85de5..58c0e181ca2b 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -3,7 +3,6 @@ #include #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" @@ -70,6 +69,31 @@ void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 op) } } +static void irdma_set_qos_info(struct irdma_sc_vsi *vsi, + struct irdma_l2params *l2p) +{ + u8 i; + + vsi->qos_rel_bw = l2p->vsi_rel_bw; + vsi->qos_prio_type = l2p->vsi_prio_type; + vsi->dscp_mode = l2p->dscp_mode; + if (l2p->dscp_mode) { + memcpy(vsi->dscp_map, l2p->dscp_map, sizeof(vsi->dscp_map)); + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) + l2p->up2tc[i] = i; + } + for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { + if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) + vsi->qos[i].qs_handle = l2p->qs_handle_list[i]; + vsi->qos[i].traffic_class = l2p->up2tc[i]; + vsi->qos[i].rel_bw = + l2p->tc_info[vsi->qos[i].traffic_class].rel_bw; + vsi->qos[i].prio_type = + l2p->tc_info[vsi->qos[i].traffic_class].prio_type; + vsi->qos[i].valid = false; + } +} + /** * irdma_change_l2params - given the new l2 parameters, change all qp * @vsi: RDMA VSI pointer @@ -88,6 +112,7 @@ void irdma_change_l2params(struct irdma_sc_vsi *vsi, return; vsi->tc_change_pending = false; + irdma_set_qos_info(vsi, l2params); irdma_sc_suspend_resume_qps(vsi, IRDMA_OP_RESUME); } @@ -154,17 +179,16 @@ void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_i * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp, - struct irdma_add_arp_cache_entry_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp, + struct irdma_add_arp_cache_entry_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 8, info->reach_max); set_64bit_val(wqe, 16, ether_addr_to_u64(info->mac_addr)); @@ -192,16 +216,15 @@ irdma_sc_add_arp_cache_entry(struct irdma_sc_cqp *cqp, * @arp_index: arp index to delete arp entry * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch, - u16 arp_index, bool post_sq) +static int irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch, + u16 arp_index, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; hdr = arp_index | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_ARP) | @@ -226,17 +249,16 @@ irdma_sc_del_arp_cache_entry(struct irdma_sc_cqp *cqp, u64 scratch, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp, - struct irdma_apbvt_info *info, u64 scratch, - bool post_sq) +static int irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp, + struct irdma_apbvt_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, info->port); @@ -274,7 +296,7 @@ irdma_sc_manage_apbvt_entry(struct irdma_sc_cqp *cqp, * quad hash entry in the hardware will point to iwarp's qp * number and requires no calls from the driver. */ -static enum irdma_status_code +static int irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp, struct irdma_qhash_table_info *info, u64 scratch, bool post_sq) @@ -287,7 +309,7 @@ irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp, wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, ether_addr_to_u64(info->mac_addr)); @@ -350,10 +372,9 @@ irdma_sc_manage_qhash_table_entry(struct irdma_sc_cqp *cqp, * @qp: sc qp * @info: initialization qp info */ -enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp, - struct irdma_qp_init_info *info) +int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info) { - enum irdma_status_code ret_code; + int ret_code; u32 pble_obj_cnt; u16 wqe_size; @@ -361,7 +382,7 @@ enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp, info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags || info->qp_uk_init_info.max_rq_frag_cnt > info->pd->dev->hw_attrs.uk_attrs.max_hw_wq_frags) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; qp->dev = info->pd->dev; qp->vsi = info->vsi; @@ -384,7 +405,7 @@ enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp, if ((info->virtual_map && info->sq_pa >= pble_obj_cnt) || (info->virtual_map && info->rq_pa >= pble_obj_cnt)) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; qp->llp_stream_handle = (void *)(-1); qp->hw_sq_size = irdma_get_encoded_wqe_size(qp->qp_uk.sq_ring.size, @@ -424,8 +445,8 @@ enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_create_qp_info *info, - u64 scratch, bool post_sq) +int irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_create_qp_info *info, + u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; @@ -433,12 +454,12 @@ enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_c cqp = qp->dev->cqp; if (qp->qp_uk.qp_id < cqp->dev->hw_attrs.min_hw_qp_id || - qp->qp_uk.qp_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt - 1)) - return IRDMA_ERR_INVALID_QP_ID; + qp->qp_uk.qp_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_QP].max_cnt) + return -EINVAL; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, qp->hw_host_ctx_pa); set_64bit_val(wqe, 40, qp->shadow_area_pa); @@ -475,9 +496,8 @@ enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, struct irdma_c * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp, - struct irdma_modify_qp_info *info, - u64 scratch, bool post_sq) +int irdma_sc_qp_modify(struct irdma_sc_qp *qp, struct irdma_modify_qp_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -488,7 +508,7 @@ enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp, cqp = qp->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; if (info->next_iwarp_state == IRDMA_QP_STATE_TERMINATE) { if (info->dont_send_fin) @@ -546,9 +566,8 @@ enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp, * @ignore_mw_bnd: memory window bind flag * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch, - bool remove_hash_idx, bool ignore_mw_bnd, - bool post_sq) +int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch, + bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -557,7 +576,7 @@ enum irdma_status_code irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch, cqp = qp->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, qp->hw_host_ctx_pa); set_64bit_val(wqe, 40, qp->shadow_area_pa); @@ -739,16 +758,15 @@ void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, - bool post_sq) +static int irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, + bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_ALLOCATE_LOC_MAC_TABLE_ENTRY) | @@ -774,17 +792,16 @@ irdma_sc_alloc_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp, - struct irdma_local_mac_entry_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp, + struct irdma_local_mac_entry_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 header; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 32, ether_addr_to_u64(info->mac_addr)); @@ -813,16 +830,16 @@ irdma_sc_add_local_mac_entry(struct irdma_sc_cqp *cqp, * @ignore_ref_count: to force mac adde delete * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_del_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, - u16 entry_idx, u8 ignore_ref_count, bool post_sq) +static int irdma_sc_del_local_mac_entry(struct irdma_sc_cqp *cqp, u64 scratch, + u16 entry_idx, u8 ignore_ref_count, + bool post_sq) { __le64 *wqe; u64 header; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; header = FIELD_PREP(IRDMA_CQPSQ_MLM_TABLEIDX, entry_idx) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_MANAGE_LOC_MAC_TABLE) | @@ -1035,10 +1052,9 @@ void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_alloc_stag(struct irdma_sc_dev *dev, - struct irdma_allocate_stag_info *info, u64 scratch, - bool post_sq) +static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev, + struct irdma_allocate_stag_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -1055,7 +1071,7 @@ irdma_sc_alloc_stag(struct irdma_sc_dev *dev, cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID) | @@ -1097,10 +1113,9 @@ irdma_sc_alloc_stag(struct irdma_sc_dev *dev, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, - struct irdma_reg_ns_stag_info *info, u64 scratch, - bool post_sq) +static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, + struct irdma_reg_ns_stag_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 fbo; @@ -1118,7 +1133,7 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, else if (info->page_size == 0x1000) page_size = IRDMA_PAGE_SIZE_4K; else - return IRDMA_ERR_PARAM; + return -EINVAL; if (info->access_rights & (IRDMA_ACCESS_FLAGS_REMOTEREAD_ONLY | IRDMA_ACCESS_FLAGS_REMOTEWRITE_ONLY)) @@ -1128,12 +1143,12 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, pble_obj_cnt = dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->chunk_size && info->first_pm_pbl_index >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; fbo = info->va & (info->page_size - 1); set_64bit_val(wqe, 0, @@ -1186,10 +1201,9 @@ irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, - struct irdma_dealloc_stag_info *info, u64 scratch, - bool post_sq) +static int irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, + struct irdma_dealloc_stag_info *info, + u64 scratch, bool post_sq) { u64 hdr; __le64 *wqe; @@ -1198,7 +1212,7 @@ irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); @@ -1227,9 +1241,9 @@ irdma_sc_dealloc_stag(struct irdma_sc_dev *dev, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_mw_alloc(struct irdma_sc_dev *dev, struct irdma_mw_alloc_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_mw_alloc(struct irdma_sc_dev *dev, + struct irdma_mw_alloc_info *info, u64 scratch, + bool post_sq) { u64 hdr; struct irdma_sc_cqp *cqp; @@ -1238,7 +1252,7 @@ irdma_sc_mw_alloc(struct irdma_sc_dev *dev, struct irdma_mw_alloc_info *info, cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 8, FLD_LS_64(dev, info->pd_id, IRDMA_CQPSQ_STAG_PDID)); @@ -1268,9 +1282,9 @@ irdma_sc_mw_alloc(struct irdma_sc_dev *dev, struct irdma_mw_alloc_info *info, * @info: fast mr info * @post_sq: flag for cqp db to ring */ -enum irdma_status_code -irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, - struct irdma_fast_reg_stag_info *info, bool post_sq) +int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, + struct irdma_fast_reg_stag_info *info, + bool post_sq) { u64 temp, hdr; __le64 *wqe; @@ -1292,7 +1306,7 @@ irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, wqe = irdma_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 0, &sq_info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(&qp->qp_uk, wqe_idx); @@ -1821,8 +1835,7 @@ void irdma_terminate_received(struct irdma_sc_qp *qp, } } -static enum irdma_status_code irdma_null_ws_add(struct irdma_sc_vsi *vsi, - u8 user_pri) +static int irdma_null_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) { return 0; } @@ -1845,7 +1858,6 @@ static void irdma_null_ws_reset(struct irdma_sc_vsi *vsi) void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, struct irdma_vsi_init_info *info) { - struct irdma_l2params *l2p; int i; vsi->dev = info->dev; @@ -1858,18 +1870,8 @@ void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) vsi->fcn_id = info->dev->hmc_fn_id; - l2p = info->params; - vsi->qos_rel_bw = l2p->vsi_rel_bw; - vsi->qos_prio_type = l2p->vsi_prio_type; + irdma_set_qos_info(vsi, info->params); for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) { - if (vsi->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1) - vsi->qos[i].qs_handle = l2p->qs_handle_list[i]; - vsi->qos[i].traffic_class = info->params->up2tc[i]; - vsi->qos[i].rel_bw = - l2p->tc_info[vsi->qos[i].traffic_class].rel_bw; - vsi->qos[i].prio_type = - l2p->tc_info[vsi->qos[i].traffic_class].prio_type; - vsi->qos[i].valid = false; mutex_init(&vsi->qos[i].qos_mutex); INIT_LIST_HEAD(&vsi->qos[i].qplist); } @@ -1918,8 +1920,8 @@ static u8 irdma_get_fcn_id(struct irdma_sc_vsi *vsi) * @vsi: pointer to the vsi structure * @info: The info structure used for initialization */ -enum irdma_status_code irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, - struct irdma_vsi_stats_info *info) +int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, + struct irdma_vsi_stats_info *info) { u8 fcn_id = info->fcn_id; struct irdma_dma_mem *stats_buff_mem; @@ -1934,7 +1936,7 @@ enum irdma_status_code irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, &stats_buff_mem->pa, GFP_KERNEL); if (!stats_buff_mem->va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; vsi->pestat->gather_info.gather_stats_va = stats_buff_mem->va; vsi->pestat->gather_info.last_gather_stats_va = @@ -1961,7 +1963,7 @@ stats_error: stats_buff_mem->va, stats_buff_mem->pa); stats_buff_mem->va = NULL; - return IRDMA_ERR_CQP_COMPL_ERROR; + return -EIO; } /** @@ -2023,19 +2025,19 @@ u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type) * @info: gather stats info structure * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -irdma_sc_gather_stats(struct irdma_sc_cqp *cqp, - struct irdma_stats_gather_info *info, u64 scratch) +static int irdma_sc_gather_stats(struct irdma_sc_cqp *cqp, + struct irdma_stats_gather_info *info, + u64 scratch) { __le64 *wqe; u64 temp; if (info->stats_buff_mem.size < IRDMA_GATHER_STATS_BUF_SIZE) - return IRDMA_ERR_BUF_TOO_SHORT; + return -ENOMEM; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 40, FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fcn_index)); @@ -2070,17 +2072,16 @@ irdma_sc_gather_stats(struct irdma_sc_cqp *cqp, * @alloc: alloc vs. delete flag * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp, - struct irdma_stats_inst_info *info, bool alloc, - u64 scratch) +static int irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp, + struct irdma_stats_inst_info *info, + bool alloc, u64 scratch) { __le64 *wqe; u64 temp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 40, FIELD_PREP(IRDMA_CQPSQ_STATS_HMC_FCN_INDEX, info->hmc_fn_id)); @@ -2108,9 +2109,8 @@ irdma_sc_manage_stats_inst(struct irdma_sc_cqp *cqp, * @info: User priority map info * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code irdma_sc_set_up_map(struct irdma_sc_cqp *cqp, - struct irdma_up_info *info, - u64 scratch) +static int irdma_sc_set_up_map(struct irdma_sc_cqp *cqp, + struct irdma_up_info *info, u64 scratch) { __le64 *wqe; u64 temp = 0; @@ -2118,7 +2118,7 @@ static enum irdma_status_code irdma_sc_set_up_map(struct irdma_sc_cqp *cqp, wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) temp |= (u64)info->map[i] << (i * 8); @@ -2151,17 +2151,16 @@ static enum irdma_status_code irdma_sc_set_up_map(struct irdma_sc_cqp *cqp, * @node_op: 0 for add 1 for modify, 2 for delete * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp, - struct irdma_ws_node_info *info, - enum irdma_ws_node_op node_op, u64 scratch) +static int irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp, + struct irdma_ws_node_info *info, + enum irdma_ws_node_op node_op, u64 scratch) { __le64 *wqe; u64 temp = 0; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 32, FIELD_PREP(IRDMA_CQPSQ_WS_VSI, info->vsi) | @@ -2194,9 +2193,9 @@ irdma_sc_manage_ws_node(struct irdma_sc_cqp *cqp, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, - struct irdma_qp_flush_info *info, - u64 scratch, bool post_sq) +int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, + struct irdma_qp_flush_info *info, u64 scratch, + bool post_sq) { u64 temp = 0; __le64 *wqe; @@ -2215,13 +2214,13 @@ enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, ibdev_dbg(to_ibdev(qp->dev), "CQP: Additional flush request ignored for qp %x\n", qp->qp_uk.qp_id); - return IRDMA_ERR_FLUSHED_Q; + return -EALREADY; } cqp = qp->pd->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; if (info->userflushcode) { if (flush_rq) @@ -2268,9 +2267,9 @@ enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code irdma_sc_gen_ae(struct irdma_sc_qp *qp, - struct irdma_gen_ae_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_gen_ae(struct irdma_sc_qp *qp, + struct irdma_gen_ae_info *info, u64 scratch, + bool post_sq) { u64 temp; __le64 *wqe; @@ -2280,7 +2279,7 @@ static enum irdma_status_code irdma_sc_gen_ae(struct irdma_sc_qp *qp, cqp = qp->pd->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; temp = info->ae_code | FIELD_PREP(IRDMA_CQPSQ_FWQE_AESOURCE, info->ae_src); @@ -2308,10 +2307,9 @@ static enum irdma_status_code irdma_sc_gen_ae(struct irdma_sc_qp *qp, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_qp_upload_context(struct irdma_sc_dev *dev, - struct irdma_upload_context_info *info, u64 scratch, - bool post_sq) +static int irdma_sc_qp_upload_context(struct irdma_sc_dev *dev, + struct irdma_upload_context_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -2320,7 +2318,7 @@ irdma_sc_qp_upload_context(struct irdma_sc_dev *dev, cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, info->buf_pa); @@ -2349,21 +2347,20 @@ irdma_sc_qp_upload_context(struct irdma_sc_dev *dev, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp, - struct irdma_cqp_manage_push_page_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp, + struct irdma_cqp_manage_push_page_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; if (info->free_page && info->push_idx >= cqp->dev->hw_attrs.max_hw_device_pages) - return IRDMA_ERR_INVALID_PUSH_PAGE_INDEX; + return -EINVAL; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, info->qs_handle); hdr = FIELD_PREP(IRDMA_CQPSQ_MPP_PPIDX, info->push_idx) | @@ -2389,16 +2386,15 @@ irdma_sc_manage_push_page(struct irdma_sc_cqp *cqp, * @qp: sc qp struct * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp, - struct irdma_sc_qp *qp, - u64 scratch) +static int irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp, + u64 scratch) { u64 hdr; __le64 *wqe; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; hdr = FIELD_PREP(IRDMA_CQPSQ_SUSPENDQP_QPID, qp->qp_uk.qp_id) | FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_SUSPEND_QP) | @@ -2420,16 +2416,15 @@ static enum irdma_status_code irdma_sc_suspend_qp(struct irdma_sc_cqp *cqp, * @qp: sc qp struct * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code irdma_sc_resume_qp(struct irdma_sc_cqp *cqp, - struct irdma_sc_qp *qp, - u64 scratch) +static int irdma_sc_resume_qp(struct irdma_sc_cqp *cqp, struct irdma_sc_qp *qp, + u64 scratch) { u64 hdr; __le64 *wqe; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, FIELD_PREP(IRDMA_CQPSQ_RESUMEQP_QSHANDLE, qp->qs_handle)); @@ -2462,14 +2457,13 @@ static inline void irdma_sc_cq_ack(struct irdma_sc_cq *cq) * @cq: cq struct * @info: cq initialization info */ -enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, - struct irdma_cq_init_info *info) +int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info) { u32 pble_obj_cnt; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; cq->cq_pa = info->cq_base_pa; cq->dev = info->dev; @@ -2500,23 +2494,21 @@ enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, * @check_overflow: flag for overflow check * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq, - u64 scratch, - bool check_overflow, - bool post_sq) +static int irdma_sc_cq_create(struct irdma_sc_cq *cq, u64 scratch, + bool check_overflow, bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; struct irdma_sc_ceq *ceq; - enum irdma_status_code ret_code = 0; + int ret_code = 0; cqp = cq->dev->cqp; - if (cq->cq_uk.cq_id > (cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt - 1)) - return IRDMA_ERR_INVALID_CQ_ID; + if (cq->cq_uk.cq_id >= cqp->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].max_cnt) + return -EINVAL; - if (cq->ceq_id > (cq->dev->hmc_fpm_misc.max_ceqs - 1)) - return IRDMA_ERR_INVALID_CEQ_ID; + if (cq->ceq_id >= cq->dev->hmc_fpm_misc.max_ceqs) + return -EINVAL; ceq = cq->dev->ceq[cq->ceq_id]; if (ceq && ceq->reg_cq) @@ -2529,7 +2521,7 @@ static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq, if (!wqe) { if (ceq && ceq->reg_cq) irdma_sc_remove_cq_ctx(ceq, cq); - return IRDMA_ERR_RING_FULL; + return -ENOMEM; } set_64bit_val(wqe, 0, cq->cq_uk.cq_size); @@ -2575,8 +2567,7 @@ static enum irdma_status_code irdma_sc_cq_create(struct irdma_sc_cq *cq, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, - bool post_sq) +int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; @@ -2586,7 +2577,7 @@ enum irdma_status_code irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, cqp = cq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; ceq = cq->dev->ceq[cq->ceq_id]; if (ceq && ceq->reg_cq) @@ -2642,9 +2633,9 @@ void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *inf * @scratch: u64 saved to be used during cqp completion * @post_sq: flag to post to sq */ -static enum irdma_status_code -irdma_sc_cq_modify(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_cq_modify(struct irdma_sc_cq *cq, + struct irdma_modify_cq_info *info, u64 scratch, + bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; @@ -2654,12 +2645,12 @@ irdma_sc_cq_modify(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info, pble_obj_cnt = cq->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->cq_resize && info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; cqp = cq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, info->cq_size); set_64bit_val(wqe, 8, (uintptr_t)cq >> 1); @@ -2733,8 +2724,8 @@ static inline void irdma_get_cqp_reg_info(struct irdma_sc_cqp *cqp, u32 *val, * @tail: wqtail register value * @count: how many times to try for completion */ -static enum irdma_status_code irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, - u32 tail, u32 count) +static int irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, u32 tail, + u32 count) { u32 i = 0; u32 newtail, error, val; @@ -2746,7 +2737,7 @@ static enum irdma_status_code irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, ibdev_dbg(to_ibdev(cqp->dev), "CQP: CQPERRCODES error_code[x%08X]\n", error); - return IRDMA_ERR_CQP_COMPL_ERROR; + return -EIO; } if (newtail != tail) { /* SUCCESS */ @@ -2757,7 +2748,7 @@ static enum irdma_status_code irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, udelay(cqp->dev->hw_attrs.max_sleep_count); } - return IRDMA_ERR_TIMEOUT; + return -ETIMEDOUT; } /** @@ -2912,10 +2903,9 @@ static u64 irdma_sc_decode_fpm_query(__le64 *buf, u32 buf_idx, * parses fpm query buffer and copy max_cnt and * size value of hmc objects in hmc_info */ -static enum irdma_status_code -irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, - struct irdma_hmc_info *hmc_info, - struct irdma_hmc_fpm_misc *hmc_fpm_misc) +static int irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, + struct irdma_hmc_info *hmc_info, + struct irdma_hmc_fpm_misc *hmc_fpm_misc) { struct irdma_hmc_obj_info *obj_info; u64 temp; @@ -2954,7 +2944,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, obj_info[IRDMA_HMC_IW_XFFL].size = 4; hmc_fpm_misc->xf_block_size = FIELD_GET(IRDMA_QUERY_FPM_XFBLOCKSIZE, temp); if (!hmc_fpm_misc->xf_block_size) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; irdma_sc_decode_fpm_query(buf, 72, obj_info, IRDMA_HMC_IW_Q1); get_64bit_val(buf, 80, &temp); @@ -2963,7 +2953,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, hmc_fpm_misc->q1_block_size = FIELD_GET(IRDMA_QUERY_FPM_Q1BLOCKSIZE, temp); if (!hmc_fpm_misc->q1_block_size) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; irdma_sc_decode_fpm_query(buf, 88, obj_info, IRDMA_HMC_IW_TIMER); @@ -2987,7 +2977,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, hmc_fpm_misc->rrf_block_size = FIELD_GET(IRDMA_QUERY_FPM_RRFBLOCKSIZE, temp); if (!hmc_fpm_misc->rrf_block_size && obj_info[IRDMA_HMC_IW_RRFFL].max_cnt) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; irdma_sc_decode_fpm_query(buf, 144, obj_info, IRDMA_HMC_IW_HDR); irdma_sc_decode_fpm_query(buf, 152, obj_info, IRDMA_HMC_IW_MD); @@ -2999,7 +2989,7 @@ irdma_sc_parse_fpm_query_buf(struct irdma_sc_dev *dev, __le64 *buf, hmc_fpm_misc->ooiscf_block_size = FIELD_GET(IRDMA_QUERY_FPM_OOISCFBLOCKSIZE, temp); if (!hmc_fpm_misc->ooiscf_block_size && obj_info[IRDMA_HMC_IW_OOISCFFL].max_cnt) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; return 0; } @@ -3027,8 +3017,7 @@ static u32 irdma_sc_find_reg_cq(struct irdma_sc_ceq *ceq, * @ceq: ceq sc structure * @cq: cq sc structure */ -enum irdma_status_code irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, - struct irdma_sc_cq *cq) +int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq) { unsigned long flags; @@ -3036,7 +3025,7 @@ enum irdma_status_code irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, if (ceq->reg_cq_size == ceq->elem_cnt) { spin_unlock_irqrestore(&ceq->req_cq_lock, flags); - return IRDMA_ERR_REG_CQ_FULL; + return -ENOMEM; } ceq->reg_cq[ceq->reg_cq_size++] = cq; @@ -3077,15 +3066,15 @@ exit: * * Initializes the object and context buffers for a control Queue Pair. */ -enum irdma_status_code irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, - struct irdma_cqp_init_info *info) +int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, + struct irdma_cqp_init_info *info) { u8 hw_sq_size; if (info->sq_size > IRDMA_CQP_SW_SQSIZE_2048 || info->sq_size < IRDMA_CQP_SW_SQSIZE_4 || ((info->sq_size & (info->sq_size - 1)))) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; hw_sq_size = irdma_get_encoded_wqe_size(info->sq_size, IRDMA_QUEUE_TYPE_CQP); @@ -3135,13 +3124,12 @@ enum irdma_status_code irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, * @maj_err: If error, major err number * @min_err: If error, minor err number */ -enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, - u16 *min_err) +int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err) { u64 temp; u8 hw_rev; u32 cnt = 0, p1, p2, val = 0, err_code; - enum irdma_status_code ret_code; + int ret_code; hw_rev = cqp->dev->hw_attrs.uk_attrs.hw_rev; cqp->sdbuf.size = ALIGN(IRDMA_UPDATE_SD_BUFF_SIZE * cqp->sq_size, @@ -3150,7 +3138,7 @@ enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_er cqp->sdbuf.size, &cqp->sdbuf.pa, GFP_KERNEL); if (!cqp->sdbuf.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; spin_lock_init(&cqp->dev->cqp_lock); @@ -3205,7 +3193,7 @@ enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_er do { if (cnt++ > cqp->dev->hw_attrs.max_done_count) { - ret_code = IRDMA_ERR_TIMEOUT; + ret_code = -ETIMEDOUT; goto err; } udelay(cqp->dev->hw_attrs.max_sleep_count); @@ -3213,7 +3201,7 @@ enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_er } while (!val); if (FLD_RS_32(cqp->dev, val, IRDMA_CCQPSTATUS_CCQP_ERR)) { - ret_code = IRDMA_ERR_DEVICE_NOT_SUPPORTED; + ret_code = -EOPNOTSUPP; goto err; } @@ -3254,7 +3242,7 @@ __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch u32 *wqe_idx) { __le64 *wqe = NULL; - enum irdma_status_code ret_code; + int ret_code; if (IRDMA_RING_FULL_ERR(cqp->sq_ring)) { ibdev_dbg(to_ibdev(cqp->dev), @@ -3281,16 +3269,16 @@ __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch * irdma_sc_cqp_destroy - destroy cqp during close * @cqp: struct for cqp hw */ -enum irdma_status_code irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp) +int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp) { u32 cnt = 0, val; - enum irdma_status_code ret_code = 0; + int ret_code = 0; writel(0, cqp->dev->hw_regs[IRDMA_CCQPHIGH]); writel(0, cqp->dev->hw_regs[IRDMA_CCQPLOW]); do { if (cnt++ > cqp->dev->hw_attrs.max_done_count) { - ret_code = IRDMA_ERR_TIMEOUT; + ret_code = -ETIMEDOUT; break; } udelay(cqp->dev->hw_attrs.max_sleep_count); @@ -3335,8 +3323,8 @@ void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq) * @ccq: ccq sc struct * @info: completion q entry to return */ -enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, - struct irdma_ccq_cqe_info *info) +int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, + struct irdma_ccq_cqe_info *info) { u64 qp_ctx, temp, temp1; __le64 *cqe; @@ -3344,7 +3332,7 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, u32 wqe_idx; u32 error; u8 polarity; - enum irdma_status_code ret_code = 0; + int ret_code = 0; if (ccq->cq_uk.avoid_mem_cflct) cqe = IRDMA_GET_CURRENT_EXTENDED_CQ_ELEM(&ccq->cq_uk); @@ -3354,7 +3342,7 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, get_64bit_val(cqe, 24, &temp); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, temp); if (polarity != ccq->cq_uk.polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; get_64bit_val(cqe, 8, &qp_ctx); cqp = (struct irdma_sc_cqp *)(unsigned long)qp_ctx; @@ -3401,25 +3389,25 @@ enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, * @op_code: cqp opcode for completion * @compl_info: completion q entry to return */ -enum irdma_status_code irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 op_code, - struct irdma_ccq_cqe_info *compl_info) +int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 op_code, + struct irdma_ccq_cqe_info *compl_info) { struct irdma_ccq_cqe_info info = {}; struct irdma_sc_cq *ccq; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u32 cnt = 0; ccq = cqp->dev->ccq; while (1) { if (cnt++ > 100 * cqp->dev->hw_attrs.max_done_count) - return IRDMA_ERR_TIMEOUT; + return -ETIMEDOUT; if (irdma_sc_ccq_get_cqe_info(ccq, &info)) { udelay(cqp->dev->hw_attrs.max_sleep_count); continue; } if (info.error && info.op_code != IRDMA_CQP_OP_QUERY_STAG) { - ret_code = IRDMA_ERR_CQP_COMPL_ERROR; + ret_code = -EIO; break; } /* make sure op code matches*/ @@ -3443,17 +3431,16 @@ enum irdma_status_code irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u * @info: info for the manage function table operation * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code -irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp, - struct irdma_hmc_fcn_info *info, - u64 scratch, bool post_sq) +static int irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp, + struct irdma_hmc_fcn_info *info, + u64 scratch, bool post_sq) { __le64 *wqe; u64 hdr; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, 0); set_64bit_val(wqe, 8, 0); @@ -3486,8 +3473,7 @@ irdma_sc_manage_hmc_pm_func_table(struct irdma_sc_cqp *cqp, * for fpm commit * @cqp: struct for cqp hw */ -static enum irdma_status_code -irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp) +static int irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp) { return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_COMMIT_FPM_VAL, NULL); @@ -3502,19 +3488,19 @@ irdma_sc_commit_fpm_val_done(struct irdma_sc_cqp *cqp) * @post_sq: flag for cqp db to ring * @wait_type: poll ccq or cqp registers for cqp completion */ -static enum irdma_status_code -irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id, - struct irdma_dma_mem *commit_fpm_mem, bool post_sq, - u8 wait_type) +static int irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, + u8 hmc_fn_id, + struct irdma_dma_mem *commit_fpm_mem, + bool post_sq, u8 wait_type) { __le64 *wqe; u64 hdr; u32 tail, val, error; - enum irdma_status_code ret_code = 0; + int ret_code = 0; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, hmc_fn_id); set_64bit_val(wqe, 32, commit_fpm_mem->pa); @@ -3548,8 +3534,7 @@ irdma_sc_commit_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id, * query fpm * @cqp: struct for cqp hw */ -static enum irdma_status_code -irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp) +static int irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp) { return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_QUERY_FPM_VAL, NULL); @@ -3564,19 +3549,19 @@ irdma_sc_query_fpm_val_done(struct irdma_sc_cqp *cqp) * @post_sq: flag for cqp db to ring * @wait_type: poll ccq or cqp registers for cqp completion */ -static enum irdma_status_code -irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id, - struct irdma_dma_mem *query_fpm_mem, bool post_sq, - u8 wait_type) +static int irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, + u8 hmc_fn_id, + struct irdma_dma_mem *query_fpm_mem, + bool post_sq, u8 wait_type) { __le64 *wqe; u64 hdr; u32 tail, val, error; - enum irdma_status_code ret_code = 0; + int ret_code = 0; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, hmc_fn_id); set_64bit_val(wqe, 32, query_fpm_mem->pa); @@ -3608,21 +3593,21 @@ irdma_sc_query_fpm_val(struct irdma_sc_cqp *cqp, u64 scratch, u8 hmc_fn_id, * @ceq: ceq sc structure * @info: ceq initialization info */ -enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, - struct irdma_ceq_init_info *info) +int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, + struct irdma_ceq_init_info *info) { u32 pble_obj_cnt; if (info->elem_cnt < info->dev->hw_attrs.min_hw_ceq_size || info->elem_cnt > info->dev->hw_attrs.max_hw_ceq_size) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; - if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1)) - return IRDMA_ERR_INVALID_CEQ_ID; + if (info->ceq_id >= info->dev->hmc_fpm_misc.max_ceqs) + return -EINVAL; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; ceq->size = sizeof(*ceq); ceq->ceqe_base = (struct irdma_ceqe *)info->ceqe_base; @@ -3655,8 +3640,8 @@ enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch, - bool post_sq) +static int irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 scratch, + bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; @@ -3665,7 +3650,7 @@ static enum irdma_status_code irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 cqp = ceq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, ceq->elem_cnt); set_64bit_val(wqe, 32, (ceq->virtual_map ? 0 : ceq->ceq_elem_pa)); @@ -3697,8 +3682,7 @@ static enum irdma_status_code irdma_sc_ceq_create(struct irdma_sc_ceq *ceq, u64 * irdma_sc_cceq_create_done - poll for control ceq wqe to complete * @ceq: ceq sc structure */ -static enum irdma_status_code -irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq) +static int irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq) { struct irdma_sc_cqp *cqp; @@ -3711,7 +3695,7 @@ irdma_sc_cceq_create_done(struct irdma_sc_ceq *ceq) * irdma_sc_cceq_destroy_done - poll for destroy cceq to complete * @ceq: ceq sc structure */ -enum irdma_status_code irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq) +int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq) { struct irdma_sc_cqp *cqp; @@ -3730,9 +3714,9 @@ enum irdma_status_code irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq) * @ceq: ceq sc structure * @scratch: u64 saved to be used during cqp completion */ -enum irdma_status_code irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch) +int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch) { - enum irdma_status_code ret_code; + int ret_code; struct irdma_sc_dev *dev = ceq->dev; dev->ccq->vsi = ceq->vsi; @@ -3755,8 +3739,7 @@ enum irdma_status_code irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratc * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, - bool post_sq) +int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; @@ -3765,7 +3748,7 @@ enum irdma_status_code irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratc cqp = ceq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, ceq->elem_cnt); set_64bit_val(wqe, 48, ceq->first_pm_pbl_idx); @@ -3884,19 +3867,19 @@ void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq) * @aeq: aeq structure ptr * @info: aeq initialization info */ -enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, - struct irdma_aeq_init_info *info) +int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, + struct irdma_aeq_init_info *info) { u32 pble_obj_cnt; if (info->elem_cnt < info->dev->hw_attrs.min_hw_aeq_size || info->elem_cnt > info->dev->hw_attrs.max_hw_aeq_size) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; aeq->size = sizeof(*aeq); aeq->polarity = 1; @@ -3921,8 +3904,8 @@ enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, - u64 scratch, bool post_sq) +static int irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, u64 scratch, + bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -3931,7 +3914,7 @@ static enum irdma_status_code irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, cqp = aeq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, aeq->elem_cnt); set_64bit_val(wqe, 32, (aeq->virtual_map ? 0 : aeq->aeq_elem_pa)); @@ -3960,8 +3943,8 @@ static enum irdma_status_code irdma_sc_aeq_create(struct irdma_sc_aeq *aeq, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -static enum irdma_status_code irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, - u64 scratch, bool post_sq) +static int irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, u64 scratch, + bool post_sq) { __le64 *wqe; struct irdma_sc_cqp *cqp; @@ -3974,7 +3957,7 @@ static enum irdma_status_code irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, aeq->elem_cnt); set_64bit_val(wqe, 48, aeq->first_pm_pbl_idx); hdr = FIELD_PREP(IRDMA_CQPSQ_OPCODE, IRDMA_CQP_OP_DESTROY_AEQ) | @@ -3997,8 +3980,8 @@ static enum irdma_status_code irdma_sc_aeq_destroy(struct irdma_sc_aeq *aeq, * @aeq: aeq structure ptr * @info: aeqe info to be returned */ -enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, - struct irdma_aeqe_info *info) +int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, + struct irdma_aeqe_info *info) { u64 temp, compl_ctx; __le64 *aeqe; @@ -4012,7 +3995,7 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, polarity = (u8)FIELD_GET(IRDMA_AEQE_VALID, temp); if (aeq->polarity != polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; print_hex_dump_debug("WQE: AEQ_ENTRY WQE", DUMP_PREFIX_OFFSET, 16, 8, aeqe, 16, false); @@ -4157,22 +4140,21 @@ void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) * @cq: sc's cq ctruct * @info: info for control cq initialization */ -enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *cq, - struct irdma_ccq_init_info *info) +int irdma_sc_ccq_init(struct irdma_sc_cq *cq, struct irdma_ccq_init_info *info) { u32 pble_obj_cnt; if (info->num_elem < info->dev->hw_attrs.uk_attrs.min_hw_cq_size || info->num_elem > info->dev->hw_attrs.uk_attrs.max_hw_cq_size) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; - if (info->ceq_id > (info->dev->hmc_fpm_misc.max_ceqs - 1)) - return IRDMA_ERR_INVALID_CEQ_ID; + if (info->ceq_id >= info->dev->hmc_fpm_misc.max_ceqs) + return -EINVAL; pble_obj_cnt = info->dev->hmc_info->hmc_obj[IRDMA_HMC_IW_PBLE].cnt; if (info->virtual_map && info->first_pm_pbl_idx >= pble_obj_cnt) - return IRDMA_ERR_INVALID_PBLE_INDEX; + return -EINVAL; cq->cq_pa = info->cq_pa; cq->cq_uk.cq_base = info->cq_base; @@ -4209,7 +4191,7 @@ enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *cq, * irdma_sc_ccq_create_done - poll cqp for ccq create * @ccq: ccq sc struct */ -static inline enum irdma_status_code irdma_sc_ccq_create_done(struct irdma_sc_cq *ccq) +static inline int irdma_sc_ccq_create_done(struct irdma_sc_cq *ccq) { struct irdma_sc_cqp *cqp; @@ -4225,10 +4207,10 @@ static inline enum irdma_status_code irdma_sc_ccq_create_done(struct irdma_sc_cq * @check_overflow: overlow flag for ccq * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, - bool check_overflow, bool post_sq) +int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, + bool check_overflow, bool post_sq) { - enum irdma_status_code ret_code; + int ret_code; ret_code = irdma_sc_cq_create(ccq, scratch, check_overflow, post_sq); if (ret_code) @@ -4250,19 +4232,18 @@ enum irdma_status_code irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, * @scratch: u64 saved to be used during cqp completion * @post_sq: flag for cqp db to ring */ -enum irdma_status_code irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, - bool post_sq) +int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u32 tail, val, error; cqp = ccq->dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, ccq->cq_uk.cq_size); set_64bit_val(wqe, 8, (uintptr_t)ccq >> 1); @@ -4301,13 +4282,12 @@ enum irdma_status_code irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch * @dev : ptr to irdma_dev struct * @hmc_fn_id: hmc function id */ -enum irdma_status_code irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, - u8 hmc_fn_id) +int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id) { struct irdma_hmc_info *hmc_info; struct irdma_hmc_fpm_misc *hmc_fpm_misc; struct irdma_dma_mem query_fpm_mem; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u8 wait_type; hmc_info = dev->hmc_info; @@ -4338,14 +4318,13 @@ enum irdma_status_code irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, * @dev : ptr to irdma_dev struct * @hmc_fn_id: hmc function id */ -static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, - u8 hmc_fn_id) +static int irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id) { struct irdma_hmc_info *hmc_info; struct irdma_hmc_obj_info *obj_info; __le64 *buf; struct irdma_dma_mem commit_fpm_mem; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u8 wait_type; hmc_info = dev->hmc_info; @@ -4408,9 +4387,8 @@ static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, * @info: sd info for wqe * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, struct irdma_update_sds_info *info, - u64 scratch) +static int cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, + struct irdma_update_sds_info *info, u64 scratch) { u64 data; u64 hdr; @@ -4422,7 +4400,7 @@ cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, struct irdma_update_sds_info *info, wqe = irdma_sc_cqp_get_next_send_wqe_idx(cqp, scratch, &wqe_idx); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; wqe_entries = (info->cnt > 3) ? 3 : info->cnt; mem_entries = info->cnt - wqe_entries; @@ -4488,12 +4466,11 @@ cqp_sds_wqe_fill(struct irdma_sc_cqp *cqp, struct irdma_update_sds_info *info, * @info: sd info for sd's * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -irdma_update_pe_sds(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info, u64 scratch) +static int irdma_update_pe_sds(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info, u64 scratch) { struct irdma_sc_cqp *cqp = dev->cqp; - enum irdma_status_code ret_code; + int ret_code; ret_code = cqp_sds_wqe_fill(cqp, info, scratch); if (!ret_code) @@ -4507,13 +4484,12 @@ irdma_update_pe_sds(struct irdma_sc_dev *dev, * @dev: sc device struct * @info: sd info for sd's */ -enum irdma_status_code -irdma_update_sds_noccq(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info) +int irdma_update_sds_noccq(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info) { u32 error, val, tail; struct irdma_sc_cqp *cqp = dev->cqp; - enum irdma_status_code ret_code; + int ret_code; ret_code = cqp_sds_wqe_fill(cqp, info, 0); if (ret_code) @@ -4534,10 +4510,9 @@ irdma_update_sds_noccq(struct irdma_sc_dev *dev, * @post_sq: flag for cqp db to ring * @poll_registers: flag to poll register for cqp completion */ -enum irdma_status_code -irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, - u8 hmc_fn_id, bool post_sq, - bool poll_registers) +int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, + u8 hmc_fn_id, bool post_sq, + bool poll_registers) { u64 hdr; __le64 *wqe; @@ -4545,7 +4520,7 @@ irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, FIELD_PREP(IRDMA_SHMC_PAGE_ALLOCATED_HMC_FN_ID, hmc_fn_id)); @@ -4620,8 +4595,7 @@ static u32 irdma_est_sd(struct irdma_sc_dev *dev, * irdma_sc_query_rdma_features_done - poll cqp for query features done * @cqp: struct for cqp hw */ -static enum irdma_status_code -irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp) +static int irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp) { return irdma_sc_poll_for_cqp_op_done(cqp, IRDMA_CQP_OP_QUERY_RDMA_FEATURES, @@ -4634,16 +4608,15 @@ irdma_sc_query_rdma_features_done(struct irdma_sc_cqp *cqp) * @buf: buffer to hold query info * @scratch: u64 saved to be used during cqp completion */ -static enum irdma_status_code -irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp, - struct irdma_dma_mem *buf, u64 scratch) +static int irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp, + struct irdma_dma_mem *buf, u64 scratch) { __le64 *wqe; u64 temp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; temp = buf->pa; set_64bit_val(wqe, 32, temp); @@ -4667,9 +4640,9 @@ irdma_sc_query_rdma_features(struct irdma_sc_cqp *cqp, * irdma_get_rdma_features - get RDMA features * @dev: sc device struct */ -enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev) +int irdma_get_rdma_features(struct irdma_sc_dev *dev) { - enum irdma_status_code ret_code; + int ret_code; struct irdma_dma_mem feat_buf; u64 temp; u16 byte_idx, feat_type, feat_cnt, feat_idx; @@ -4679,7 +4652,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev) feat_buf.va = dma_alloc_coherent(dev->hw->device, feat_buf.size, &feat_buf.pa, GFP_KERNEL); if (!feat_buf.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0); if (!ret_code) @@ -4690,7 +4663,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev) get_64bit_val(feat_buf.va, 0, &temp); feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp); if (feat_cnt < 2) { - ret_code = IRDMA_ERR_INVALID_FEAT_CNT; + ret_code = -EINVAL; goto exit; } else if (feat_cnt > IRDMA_MAX_FEATURES) { ibdev_dbg(to_ibdev(dev), @@ -4704,7 +4677,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev) feat_buf.size, &feat_buf.pa, GFP_KERNEL); if (!feat_buf.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; ret_code = irdma_sc_query_rdma_features(dev->cqp, &feat_buf, 0); if (!ret_code) @@ -4715,7 +4688,7 @@ enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev) get_64bit_val(feat_buf.va, 0, &temp); feat_cnt = (u16)FIELD_GET(IRDMA_FEATURE_CNT, temp); if (feat_cnt < 2) { - ret_code = IRDMA_ERR_INVALID_FEAT_CNT; + ret_code = -EINVAL; goto exit; } } @@ -4794,7 +4767,7 @@ static void cfg_fpm_value_gen_2(struct irdma_sc_dev *dev, * @dev: sc device struct * @qp_count: desired qp count */ -enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) +int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) { struct irdma_virt_mem virt_mem; u32 i, mem_size; @@ -4805,7 +4778,7 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) u32 loop_count = 0; struct irdma_hmc_info *hmc_info; struct irdma_hmc_fpm_misc *hmc_fpm_misc; - enum irdma_status_code ret_code = 0; + int ret_code = 0; hmc_info = dev->hmc_info; hmc_fpm_misc = &dev->hmc_fpm_misc; @@ -4932,7 +4905,7 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) ibdev_dbg(to_ibdev(dev), "HMC: cfg_fpm failed loop_cnt=%d, sd_needed=%d, max sd count %d\n", loop_count, sd_needed, hmc_info->sd_table.sd_cnt); - return IRDMA_ERR_CFG; + return -EINVAL; } if (loop_count > 1 && sd_needed < hmc_fpm_misc->max_sds) { @@ -4968,7 +4941,7 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) if (!virt_mem.va) { ibdev_dbg(to_ibdev(dev), "HMC: failed to allocate memory for sd_entry buffer\n"); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } hmc_info->sd_table.sd_entry = virt_mem.va; @@ -4980,10 +4953,10 @@ enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count) * @dev: rdma device * @pcmdinfo: cqp command info */ -static enum irdma_status_code irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, - struct cqp_cmds_info *pcmdinfo) +static int irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, + struct cqp_cmds_info *pcmdinfo) { - enum irdma_status_code status; + int status; struct irdma_dma_mem val_mem; bool alloc = false; @@ -5245,7 +5218,7 @@ static enum irdma_status_code irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, pcmdinfo->in.u.mc_modify.scratch); break; default: - status = IRDMA_NOT_SUPPORTED; + status = -EOPNOTSUPP; break; } @@ -5257,10 +5230,10 @@ static enum irdma_status_code irdma_exec_cqp_cmd(struct irdma_sc_dev *dev, * @dev: sc device struct * @pcmdinfo: cqp command info */ -enum irdma_status_code irdma_process_cqp_cmd(struct irdma_sc_dev *dev, - struct cqp_cmds_info *pcmdinfo) +int irdma_process_cqp_cmd(struct irdma_sc_dev *dev, + struct cqp_cmds_info *pcmdinfo) { - enum irdma_status_code status = 0; + int status = 0; unsigned long flags; spin_lock_irqsave(&dev->cqp_lock, flags); @@ -5276,9 +5249,9 @@ enum irdma_status_code irdma_process_cqp_cmd(struct irdma_sc_dev *dev, * irdma_process_bh - called from tasklet for cqp list * @dev: sc device struct */ -enum irdma_status_code irdma_process_bh(struct irdma_sc_dev *dev) +int irdma_process_bh(struct irdma_sc_dev *dev) { - enum irdma_status_code status = 0; + int status = 0; struct cqp_cmds_info *pcmdinfo; unsigned long flags; @@ -5366,12 +5339,11 @@ static inline void irdma_sc_init_hw(struct irdma_sc_dev *dev) * @dev: Device pointer * @info: Device init info */ -enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver, - struct irdma_sc_dev *dev, - struct irdma_device_init_info *info) +int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, + struct irdma_device_init_info *info) { u32 val; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u8 db_size; INIT_LIST_HEAD(&dev->cqp_cmd_head); /* for CQP command backlog */ @@ -5415,7 +5387,7 @@ enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver, irdma_sc_init_hw(dev); if (irdma_wait_pe_ready(dev)) - return IRDMA_ERR_TIMEOUT; + return -ETIMEDOUT; val = readl(dev->hw_regs[IRDMA_GLPCI_LBARCTRL]); db_size = (u8)FIELD_GET(IRDMA_GLPCI_LBARCTRL_PE_DB_SIZE, val); @@ -5423,7 +5395,7 @@ enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver, ibdev_dbg(to_ibdev(dev), "DEV: RDMA PE doorbell is not enabled in CSR val 0x%x db_size=%d\n", val, db_size); - return IRDMA_ERR_PE_DOORBELL_NOT_ENA; + return -ENODEV; } dev->db_addr = dev->hw->hw_addr + (uintptr_t)dev->hw_regs[IRDMA_DB_ADDR_OFFSET]; diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h index cc3d9a365b35..e03e03082a5f 100644 --- a/drivers/infiniband/hw/irdma/defs.h +++ b/drivers/infiniband/hw/irdma/defs.h @@ -964,7 +964,7 @@ enum irdma_cqp_op_type { (_ring).head = ((_ring).head + 1) % size; \ (_retcode) = 0; \ } else { \ - (_retcode) = IRDMA_ERR_RING_FULL; \ + (_retcode) = -ENOMEM; \ } \ } #define IRDMA_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ @@ -975,7 +975,7 @@ enum irdma_cqp_op_type { (_ring).head = ((_ring).head + (_count)) % size; \ (_retcode) = 0; \ } else { \ - (_retcode) = IRDMA_ERR_RING_FULL; \ + (_retcode) = -ENOMEM; \ } \ } #define IRDMA_SQ_RING_MOVE_HEAD(_ring, _retcode) \ @@ -986,7 +986,7 @@ enum irdma_cqp_op_type { (_ring).head = ((_ring).head + 1) % size; \ (_retcode) = 0; \ } else { \ - (_retcode) = IRDMA_ERR_RING_FULL; \ + (_retcode) = -ENOMEM; \ } \ } #define IRDMA_SQ_RING_MOVE_HEAD_BY_COUNT(_ring, _count, _retcode) \ @@ -997,7 +997,7 @@ enum irdma_cqp_op_type { (_ring).head = ((_ring).head + (_count)) % size; \ (_retcode) = 0; \ } else { \ - (_retcode) = IRDMA_ERR_RING_FULL; \ + (_retcode) = -ENOMEM; \ } \ } #define IRDMA_RING_MOVE_HEAD_BY_COUNT_NOCHECK(_ring, _count) \ diff --git a/drivers/infiniband/hw/irdma/hmc.c b/drivers/infiniband/hw/irdma/hmc.c index ecffcb93c05a..49307ce8c4da 100644 --- a/drivers/infiniband/hw/irdma/hmc.c +++ b/drivers/infiniband/hw/irdma/hmc.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" @@ -121,10 +120,8 @@ static inline void irdma_invalidate_pf_hmc_pd(struct irdma_sc_dev *dev, u32 sd_i * @type: paged or direct sd * @setsd: flag to set or clear sd */ -enum irdma_status_code irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, - u64 pa, u32 sd_idx, - enum irdma_sd_entry_type type, - bool setsd) +int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, u64 pa, u32 sd_idx, + enum irdma_sd_entry_type type, bool setsd) { struct irdma_update_sds_info sdinfo; @@ -145,16 +142,15 @@ enum irdma_status_code irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, * @sd_cnt: number of sd entries * @setsd: flag to set or clear sd */ -static enum irdma_status_code irdma_hmc_sd_grp(struct irdma_sc_dev *dev, - struct irdma_hmc_info *hmc_info, - u32 sd_index, u32 sd_cnt, - bool setsd) +static int irdma_hmc_sd_grp(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, u32 sd_index, + u32 sd_cnt, bool setsd) { struct irdma_hmc_sd_entry *sd_entry; struct irdma_update_sds_info sdinfo = {}; u64 pa; u32 i; - enum irdma_status_code ret_code = 0; + int ret_code = 0; sdinfo.hmc_fn_id = hmc_info->hmc_fn_id; for (i = sd_index; i < sd_index + sd_cnt; i++) { @@ -196,16 +192,15 @@ static enum irdma_status_code irdma_hmc_sd_grp(struct irdma_sc_dev *dev, * @dev: pointer to the device structure * @info: create obj info */ -static enum irdma_status_code -irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev, - struct irdma_hmc_create_obj_info *info) +static int irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev, + struct irdma_hmc_create_obj_info *info) { if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) - return IRDMA_ERR_INVALID_HMC_OBJ_INDEX; + return -EINVAL; if ((info->start_idx + info->count) > info->hmc_info->hmc_obj[info->rsrc_type].cnt) - return IRDMA_ERR_INVALID_HMC_OBJ_COUNT; + return -EINVAL; if (!info->add_sd_cnt) return 0; @@ -222,9 +217,8 @@ irdma_hmc_finish_add_sd_reg(struct irdma_sc_dev *dev, * This will allocate memory for PDs and backing pages and populate * the sd and pd entries. */ -enum irdma_status_code -irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, - struct irdma_hmc_create_obj_info *info) +int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, + struct irdma_hmc_create_obj_info *info) { struct irdma_hmc_sd_entry *sd_entry; u32 sd_idx, sd_lmt; @@ -232,10 +226,10 @@ irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, u32 pd_idx1 = 0, pd_lmt1 = 0; u32 i, j; bool pd_error = false; - enum irdma_status_code ret_code = 0; + int ret_code = 0; if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) - return IRDMA_ERR_INVALID_HMC_OBJ_INDEX; + return -EINVAL; if ((info->start_idx + info->count) > info->hmc_info->hmc_obj[info->rsrc_type].cnt) { @@ -243,7 +237,7 @@ irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, "HMC: error type %u, start = %u, req cnt %u, cnt = %u\n", info->rsrc_type, info->start_idx, info->count, info->hmc_info->hmc_obj[info->rsrc_type].cnt); - return IRDMA_ERR_INVALID_HMC_OBJ_COUNT; + return -EINVAL; } irdma_find_sd_index_limit(info->hmc_info, info->rsrc_type, @@ -251,7 +245,7 @@ irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, &sd_lmt); if (sd_idx >= info->hmc_info->sd_table.sd_cnt || sd_lmt > info->hmc_info->sd_table.sd_cnt) { - return IRDMA_ERR_INVALID_SD_INDEX; + return -EINVAL; } irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type, @@ -312,7 +306,7 @@ exit_sd_error: irdma_prep_remove_pd_page(info->hmc_info, (j - 1)); break; default: - ret_code = IRDMA_ERR_INVALID_SD_TYPE; + ret_code = -EINVAL; break; } j--; @@ -327,12 +321,12 @@ exit_sd_error: * @info: dele obj info * @reset: true if called before reset */ -static enum irdma_status_code -irdma_finish_del_sd_reg(struct irdma_sc_dev *dev, - struct irdma_hmc_del_obj_info *info, bool reset) +static int irdma_finish_del_sd_reg(struct irdma_sc_dev *dev, + struct irdma_hmc_del_obj_info *info, + bool reset) { struct irdma_hmc_sd_entry *sd_entry; - enum irdma_status_code ret_code = 0; + int ret_code = 0; u32 i, sd_idx; struct irdma_dma_mem *mem; @@ -373,22 +367,21 @@ irdma_finish_del_sd_reg(struct irdma_sc_dev *dev, * caller should deallocate memory allocated previously for * book-keeping information about PDs and backing storage. */ -enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, - struct irdma_hmc_del_obj_info *info, - bool reset) +int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, + struct irdma_hmc_del_obj_info *info, bool reset) { struct irdma_hmc_pd_table *pd_table; u32 sd_idx, sd_lmt; u32 pd_idx, pd_lmt, rel_pd_idx; u32 i, j; - enum irdma_status_code ret_code = 0; + int ret_code = 0; if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) { ibdev_dbg(to_ibdev(dev), "HMC: error start_idx[%04d] >= [type %04d].cnt[%04d]\n", info->start_idx, info->rsrc_type, info->hmc_info->hmc_obj[info->rsrc_type].cnt); - return IRDMA_ERR_INVALID_HMC_OBJ_INDEX; + return -EINVAL; } if ((info->start_idx + info->count) > @@ -397,7 +390,7 @@ enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, "HMC: error start_idx[%04d] + count %04d >= [type %04d].cnt[%04d]\n", info->start_idx, info->count, info->rsrc_type, info->hmc_info->hmc_obj[info->rsrc_type].cnt); - return IRDMA_ERR_INVALID_HMC_OBJ_COUNT; + return -EINVAL; } irdma_find_pd_index_limit(info->hmc_info, info->rsrc_type, @@ -433,7 +426,7 @@ enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, if (sd_idx >= info->hmc_info->sd_table.sd_cnt || sd_lmt > info->hmc_info->sd_table.sd_cnt) { ibdev_dbg(to_ibdev(dev), "HMC: invalid sd_idx\n"); - return IRDMA_ERR_INVALID_SD_INDEX; + return -EINVAL; } for (i = sd_idx; i < sd_lmt; i++) { @@ -477,11 +470,9 @@ enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, * @type: what type of segment descriptor we're manipulating * @direct_mode_sz: size to alloc in direct mode */ -enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw, - struct irdma_hmc_info *hmc_info, - u32 sd_index, - enum irdma_sd_entry_type type, - u64 direct_mode_sz) +int irdma_add_sd_table_entry(struct irdma_hw *hw, + struct irdma_hmc_info *hmc_info, u32 sd_index, + enum irdma_sd_entry_type type, u64 direct_mode_sz) { struct irdma_hmc_sd_entry *sd_entry; struct irdma_dma_mem dma_mem; @@ -499,7 +490,7 @@ enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw, dma_mem.va = dma_alloc_coherent(hw->device, dma_mem.size, &dma_mem.pa, GFP_KERNEL); if (!dma_mem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; if (type == IRDMA_SD_TYPE_PAGED) { struct irdma_virt_mem *vmem = &sd_entry->u.pd_table.pd_entry_virt_mem; @@ -510,7 +501,7 @@ enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw, dma_free_coherent(hw->device, dma_mem.size, dma_mem.va, dma_mem.pa); dma_mem.va = NULL; - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } sd_entry->u.pd_table.pd_entry = vmem->va; @@ -549,10 +540,9 @@ enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw, * aligned on 4K boundary and zeroed memory. * 2. It should be 4K in size. */ -enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev, - struct irdma_hmc_info *hmc_info, - u32 pd_index, - struct irdma_dma_mem *rsrc_pg) +int irdma_add_pd_table_entry(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, u32 pd_index, + struct irdma_dma_mem *rsrc_pg) { struct irdma_hmc_pd_table *pd_table; struct irdma_hmc_pd_entry *pd_entry; @@ -563,7 +553,7 @@ enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev, u64 page_desc; if (pd_index / IRDMA_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt) - return IRDMA_ERR_INVALID_PAGE_DESC_INDEX; + return -EINVAL; sd_idx = (pd_index / IRDMA_HMC_PD_CNT_IN_SD); if (hmc_info->sd_table.sd_entry[sd_idx].entry_type != @@ -584,7 +574,7 @@ enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev, page->size, &page->pa, GFP_KERNEL); if (!page->va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; pd_entry->rsrc_pg = false; } @@ -621,9 +611,8 @@ enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev, * 1. Caller can deallocate the memory used by backing storage after this * function returns. */ -enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev, - struct irdma_hmc_info *hmc_info, - u32 idx) +int irdma_remove_pd_bp(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, u32 idx) { struct irdma_hmc_pd_entry *pd_entry; struct irdma_hmc_pd_table *pd_table; @@ -635,11 +624,11 @@ enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev, sd_idx = idx / IRDMA_HMC_PD_CNT_IN_SD; rel_pd_idx = idx % IRDMA_HMC_PD_CNT_IN_SD; if (sd_idx >= hmc_info->sd_table.sd_cnt) - return IRDMA_ERR_INVALID_PAGE_DESC_INDEX; + return -EINVAL; sd_entry = &hmc_info->sd_table.sd_entry[sd_idx]; if (sd_entry->entry_type != IRDMA_SD_TYPE_PAGED) - return IRDMA_ERR_INVALID_SD_TYPE; + return -EINVAL; pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table; pd_entry = &pd_table->pd_entry[rel_pd_idx]; @@ -656,7 +645,7 @@ enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev, if (!pd_entry->rsrc_pg) { mem = &pd_entry->bp.addr; if (!mem || !mem->va) - return IRDMA_ERR_PARAM; + return -EINVAL; dma_free_coherent(dev->hw->device, mem->size, mem->va, mem->pa); @@ -673,14 +662,13 @@ enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev, * @hmc_info: pointer to the HMC configuration information structure * @idx: the page index */ -enum irdma_status_code irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, - u32 idx) +int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx) { struct irdma_hmc_sd_entry *sd_entry; sd_entry = &hmc_info->sd_table.sd_entry[idx]; if (--sd_entry->u.bp.use_cnt) - return IRDMA_ERR_NOT_READY; + return -EBUSY; hmc_info->sd_table.use_cnt--; sd_entry->valid = false; @@ -693,15 +681,14 @@ enum irdma_status_code irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, * @hmc_info: pointer to the HMC configuration information structure * @idx: segment descriptor index to find the relevant page descriptor */ -enum irdma_status_code -irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx) +int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx) { struct irdma_hmc_sd_entry *sd_entry; sd_entry = &hmc_info->sd_table.sd_entry[idx]; if (sd_entry->u.pd_table.use_cnt) - return IRDMA_ERR_NOT_READY; + return -EBUSY; sd_entry->valid = false; hmc_info->sd_table.use_cnt--; diff --git a/drivers/infiniband/hw/irdma/hmc.h b/drivers/infiniband/hw/irdma/hmc.h index e2139c788b1b..f5c5dacc7021 100644 --- a/drivers/infiniband/hw/irdma/hmc.h +++ b/drivers/infiniband/hw/irdma/hmc.h @@ -141,40 +141,29 @@ struct irdma_hmc_del_obj_info { bool privileged; }; -enum irdma_status_code irdma_copy_dma_mem(struct irdma_hw *hw, void *dest_buf, - struct irdma_dma_mem *src_mem, - u64 src_offset, u64 size); -enum irdma_status_code -irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, - struct irdma_hmc_create_obj_info *info); -enum irdma_status_code irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, - struct irdma_hmc_del_obj_info *info, - bool reset); -enum irdma_status_code irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, - u64 pa, u32 sd_idx, - enum irdma_sd_entry_type type, - bool setsd); -enum irdma_status_code -irdma_update_sds_noccq(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info); +int irdma_copy_dma_mem(struct irdma_hw *hw, void *dest_buf, + struct irdma_dma_mem *src_mem, u64 src_offset, u64 size); +int irdma_sc_create_hmc_obj(struct irdma_sc_dev *dev, + struct irdma_hmc_create_obj_info *info); +int irdma_sc_del_hmc_obj(struct irdma_sc_dev *dev, + struct irdma_hmc_del_obj_info *info, bool reset); +int irdma_hmc_sd_one(struct irdma_sc_dev *dev, u8 hmc_fn_id, u64 pa, u32 sd_idx, + enum irdma_sd_entry_type type, + bool setsd); +int irdma_update_sds_noccq(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info); struct irdma_vfdev *irdma_vfdev_from_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id); struct irdma_hmc_info *irdma_vf_hmcinfo_from_fpm(struct irdma_sc_dev *dev, u8 hmc_fn_id); -enum irdma_status_code irdma_add_sd_table_entry(struct irdma_hw *hw, - struct irdma_hmc_info *hmc_info, - u32 sd_index, - enum irdma_sd_entry_type type, - u64 direct_mode_sz); -enum irdma_status_code irdma_add_pd_table_entry(struct irdma_sc_dev *dev, - struct irdma_hmc_info *hmc_info, - u32 pd_index, - struct irdma_dma_mem *rsrc_pg); -enum irdma_status_code irdma_remove_pd_bp(struct irdma_sc_dev *dev, - struct irdma_hmc_info *hmc_info, - u32 idx); -enum irdma_status_code irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, - u32 idx); -enum irdma_status_code -irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx); +int irdma_add_sd_table_entry(struct irdma_hw *hw, + struct irdma_hmc_info *hmc_info, u32 sd_index, + enum irdma_sd_entry_type type, u64 direct_mode_sz); +int irdma_add_pd_table_entry(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, u32 pd_index, + struct irdma_dma_mem *rsrc_pg); +int irdma_remove_pd_bp(struct irdma_sc_dev *dev, + struct irdma_hmc_info *hmc_info, u32 idx); +int irdma_prep_remove_sd_bp(struct irdma_hmc_info *hmc_info, u32 idx); +int irdma_prep_remove_pd_page(struct irdma_hmc_info *hmc_info, u32 idx); #endif /* IRDMA_HMC_H */ diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 89234d04cc65..3dc9b5801da1 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -75,12 +75,12 @@ static void irdma_puda_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq) { struct irdma_sc_dev *dev = &rf->sc_dev; - enum irdma_status_code status; u32 compl_error; + int status; do { status = irdma_puda_poll_cmpl(dev, cq, &compl_error); - if (status == IRDMA_ERR_Q_EMPTY) + if (status == -ENOENT) break; if (status) { ibdev_dbg(to_ibdev(dev), "ERR: puda status = %d\n", status); @@ -456,7 +456,7 @@ static void irdma_ceq_dpc(struct tasklet_struct *t) * Allocate iwdev msix table and copy the msix info to the table * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_save_msix_info(struct irdma_pci_f *rf) +static int irdma_save_msix_info(struct irdma_pci_f *rf) { struct irdma_qvlist_info *iw_qvlist; struct irdma_qv_info *iw_qvinfo; @@ -466,13 +466,13 @@ static enum irdma_status_code irdma_save_msix_info(struct irdma_pci_f *rf) size_t size; if (!rf->msix_count) - return IRDMA_ERR_NO_INTR; + return -EINVAL; size = sizeof(struct irdma_msix_vector) * rf->msix_count; size += struct_size(iw_qvlist, qv_info, rf->msix_count); rf->iw_msixtbl = kzalloc(size, GFP_KERNEL); if (!rf->iw_msixtbl) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; rf->iw_qvlist = (struct irdma_qvlist_info *) (&rf->iw_msixtbl[rf->msix_count]); @@ -564,9 +564,9 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, */ static void irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp) { - enum irdma_status_code status = 0; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp *cqp = &rf->cqp; + int status = 0; if (rf->cqp_cmpl_wq) destroy_workqueue(rf->cqp_cmpl_wq); @@ -606,9 +606,9 @@ static void irdma_destroy_virt_aeq(struct irdma_pci_f *rf) */ static void irdma_destroy_aeq(struct irdma_pci_f *rf) { - enum irdma_status_code status = IRDMA_ERR_NOT_READY; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_aeq *aeq = &rf->aeq; + int status = -EBUSY; if (!rf->msix_shared) { rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, rf->iw_msixtbl->idx, false); @@ -642,8 +642,8 @@ exit: */ static void irdma_destroy_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq) { - enum irdma_status_code status; struct irdma_sc_dev *dev = &rf->sc_dev; + int status; if (rf->reset) goto exit; @@ -733,7 +733,7 @@ static void irdma_destroy_ccq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_ccq *ccq = &rf->ccq; - enum irdma_status_code status = 0; + int status = 0; if (!rf->reset) status = irdma_sc_ccq_destroy(dev->ccq, 0, true); @@ -796,9 +796,8 @@ static void irdma_del_hmc_objects(struct irdma_sc_dev *dev, * @dev: hardware control device structure * @info: information for the hmc object to create */ -static enum irdma_status_code -irdma_create_hmc_obj_type(struct irdma_sc_dev *dev, - struct irdma_hmc_create_obj_info *info) +static int irdma_create_hmc_obj_type(struct irdma_sc_dev *dev, + struct irdma_hmc_create_obj_info *info) { return irdma_sc_create_hmc_obj(dev, info); } @@ -812,13 +811,12 @@ irdma_create_hmc_obj_type(struct irdma_sc_dev *dev, * Create the device hmc objects and allocate hmc pages * Return 0 if successful, otherwise clean up and return error */ -static enum irdma_status_code -irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, enum irdma_vers vers) +static int irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, + enum irdma_vers vers) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_hmc_create_obj_info info = {}; - enum irdma_status_code status = 0; - int i; + int i, status = 0; info.hmc_info = dev->hmc_info; info.privileged = privileged; @@ -868,9 +866,9 @@ irdma_create_hmc_objs(struct irdma_pci_f *rf, bool privileged, enum irdma_vers v * update the memptr to point to the new aligned memory * Return 0 if successful, otherwise return no memory error */ -static enum irdma_status_code -irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr, - u32 size, u32 mask) +static int irdma_obj_aligned_mem(struct irdma_pci_f *rf, + struct irdma_dma_mem *memptr, u32 size, + u32 mask) { unsigned long va, newva; unsigned long extra; @@ -884,7 +882,7 @@ irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr, memptr->pa = rf->obj_next.pa + extra; memptr->size = size; if (((u8 *)memptr->va + size) > ((u8 *)rf->obj_mem.va + rf->obj_mem.size)) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; rf->obj_next.va = (u8 *)memptr->va + size; rf->obj_next.pa = memptr->pa + size; @@ -899,25 +897,24 @@ irdma_obj_aligned_mem(struct irdma_pci_f *rf, struct irdma_dma_mem *memptr, * Return 0, if the cqp and all the resources associated with it * are successfully created, otherwise return error */ -static enum irdma_status_code irdma_create_cqp(struct irdma_pci_f *rf) +static int irdma_create_cqp(struct irdma_pci_f *rf) { - enum irdma_status_code status; u32 sqsize = IRDMA_CQP_SW_SQSIZE_2048; struct irdma_dma_mem mem; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp_init_info cqp_init_info = {}; struct irdma_cqp *cqp = &rf->cqp; u16 maj_err, min_err; - int i; + int i, status; cqp->cqp_requests = kcalloc(sqsize, sizeof(*cqp->cqp_requests), GFP_KERNEL); if (!cqp->cqp_requests) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL); if (!cqp->scratch_array) { kfree(cqp->cqp_requests); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } dev->cqp = &cqp->sc_cqp; @@ -929,7 +926,7 @@ static enum irdma_status_code irdma_create_cqp(struct irdma_pci_f *rf) if (!cqp->sq.va) { kfree(cqp->scratch_array); kfree(cqp->cqp_requests); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } status = irdma_obj_aligned_mem(rf, &mem, sizeof(struct irdma_cqp_ctx), @@ -999,12 +996,12 @@ exit: * Return 0, if the ccq and the resources associated with it * are successfully created, otherwise return error */ -static enum irdma_status_code irdma_create_ccq(struct irdma_pci_f *rf) +static int irdma_create_ccq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; - enum irdma_status_code status; struct irdma_ccq_init_info info = {}; struct irdma_ccq *ccq = &rf->ccq; + int status; dev->ccq = &ccq->sc_cq; dev->ccq->dev = dev; @@ -1015,7 +1012,7 @@ static enum irdma_status_code irdma_create_ccq(struct irdma_pci_f *rf) ccq->mem_cq.va = dma_alloc_coherent(dev->hw->device, ccq->mem_cq.size, &ccq->mem_cq.pa, GFP_KERNEL); if (!ccq->mem_cq.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; status = irdma_obj_aligned_mem(rf, &ccq->shadow_area, ccq->shadow_area.size, @@ -1054,9 +1051,9 @@ exit: * Allocate a mac ip entry and add it to the hw table Return 0 * if successful, otherwise return error */ -static enum irdma_status_code irdma_alloc_set_mac(struct irdma_device *iwdev) +static int irdma_alloc_set_mac(struct irdma_device *iwdev) { - enum irdma_status_code status; + int status; status = irdma_alloc_local_mac_entry(iwdev->rf, &iwdev->mac_ip_table_idx); @@ -1082,9 +1079,8 @@ static enum irdma_status_code irdma_alloc_set_mac(struct irdma_device *iwdev) * Allocate interrupt resources and enable irq handling * Return 0 if successful, otherwise return error */ -static enum irdma_status_code -irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, - u32 ceq_id, struct irdma_msix_vector *msix_vec) +static int irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, + u32 ceq_id, struct irdma_msix_vector *msix_vec) { int status; @@ -1103,7 +1099,7 @@ irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, irq_update_affinity_hint(msix_vec->irq, &msix_vec->mask); if (status) { ibdev_dbg(&rf->iwdev->ibdev, "ERR: ceq irq config fail\n"); - return IRDMA_ERR_CFG; + return status; } msix_vec->ceq_id = ceq_id; @@ -1119,7 +1115,7 @@ irdma_cfg_ceq_vector(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, * Allocate interrupt resources and enable irq handling * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_cfg_aeq_vector(struct irdma_pci_f *rf) +static int irdma_cfg_aeq_vector(struct irdma_pci_f *rf) { struct irdma_msix_vector *msix_vec = rf->iw_msixtbl; u32 ret = 0; @@ -1131,7 +1127,7 @@ static enum irdma_status_code irdma_cfg_aeq_vector(struct irdma_pci_f *rf) } if (ret) { ibdev_dbg(&rf->iwdev->ibdev, "ERR: aeq irq config fail\n"); - return IRDMA_ERR_CFG; + return -EINVAL; } rf->sc_dev.irq_ops->irdma_cfg_aeq(&rf->sc_dev, msix_vec->idx, true); @@ -1149,12 +1145,10 @@ static enum irdma_status_code irdma_cfg_aeq_vector(struct irdma_pci_f *rf) * Return 0, if the ceq and the resources associated with it * are successfully created, otherwise return error */ -static enum irdma_status_code irdma_create_ceq(struct irdma_pci_f *rf, - struct irdma_ceq *iwceq, - u32 ceq_id, - struct irdma_sc_vsi *vsi) +static int irdma_create_ceq(struct irdma_pci_f *rf, struct irdma_ceq *iwceq, + u32 ceq_id, struct irdma_sc_vsi *vsi) { - enum irdma_status_code status; + int status; struct irdma_ceq_init_info info = {}; struct irdma_sc_dev *dev = &rf->sc_dev; u64 scratch; @@ -1169,7 +1163,7 @@ static enum irdma_status_code irdma_create_ceq(struct irdma_pci_f *rf, iwceq->mem.va = dma_alloc_coherent(dev->hw->device, iwceq->mem.size, &iwceq->mem.pa, GFP_KERNEL); if (!iwceq->mem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; info.ceq_id = ceq_id; info.ceqe_base = iwceq->mem.va; @@ -1205,18 +1199,18 @@ static enum irdma_status_code irdma_create_ceq(struct irdma_pci_f *rf, * Create the ceq 0 and configure it's msix interrupt vector * Return 0, if successfully set up, otherwise return error */ -static enum irdma_status_code irdma_setup_ceq_0(struct irdma_pci_f *rf) +static int irdma_setup_ceq_0(struct irdma_pci_f *rf) { struct irdma_ceq *iwceq; struct irdma_msix_vector *msix_vec; u32 i; - enum irdma_status_code status = 0; + int status = 0; u32 num_ceqs; num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs); rf->ceqlist = kcalloc(num_ceqs, sizeof(*rf->ceqlist), GFP_KERNEL); if (!rf->ceqlist) { - status = IRDMA_ERR_NO_MEMORY; + status = -ENOMEM; goto exit; } @@ -1262,14 +1256,13 @@ exit: * Create the ceq's and configure their msix interrupt vectors * Return 0, if ceqs are successfully set up, otherwise return error */ -static enum irdma_status_code irdma_setup_ceqs(struct irdma_pci_f *rf, - struct irdma_sc_vsi *vsi) +static int irdma_setup_ceqs(struct irdma_pci_f *rf, struct irdma_sc_vsi *vsi) { u32 i; u32 ceq_id; struct irdma_ceq *iwceq; struct irdma_msix_vector *msix_vec; - enum irdma_status_code status; + int status; u32 num_ceqs; num_ceqs = min(rf->msix_count, rf->sc_dev.hmc_fpm_misc.max_ceqs); @@ -1303,22 +1296,21 @@ del_ceqs: return status; } -static enum irdma_status_code irdma_create_virt_aeq(struct irdma_pci_f *rf, - u32 size) +static int irdma_create_virt_aeq(struct irdma_pci_f *rf, u32 size) { - enum irdma_status_code status = IRDMA_ERR_NO_MEMORY; struct irdma_aeq *aeq = &rf->aeq; dma_addr_t *pg_arr; u32 pg_cnt; + int status; if (rf->rdma_ver < IRDMA_GEN_2) - return IRDMA_NOT_SUPPORTED; + return -EOPNOTSUPP; aeq->mem.size = sizeof(struct irdma_sc_aeqe) * size; aeq->mem.va = vzalloc(aeq->mem.size); if (!aeq->mem.va) - return status; + return -ENOMEM; pg_cnt = DIV_ROUND_UP(aeq->mem.size, PAGE_SIZE); status = irdma_get_pble(rf->pble_rsrc, &aeq->palloc, pg_cnt, true); @@ -1345,15 +1337,15 @@ static enum irdma_status_code irdma_create_virt_aeq(struct irdma_pci_f *rf, * Return 0, if the aeq and the resources associated with it * are successfully created, otherwise return error */ -static enum irdma_status_code irdma_create_aeq(struct irdma_pci_f *rf) +static int irdma_create_aeq(struct irdma_pci_f *rf) { - enum irdma_status_code status; struct irdma_aeq_init_info info = {}; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_aeq *aeq = &rf->aeq; struct irdma_hmc_info *hmc_info = rf->sc_dev.hmc_info; u32 aeq_size; u8 multiplier = (rf->protocol_used == IRDMA_IWARP_PROTOCOL_ONLY) ? 2 : 1; + int status; aeq_size = multiplier * hmc_info->hmc_obj[IRDMA_HMC_IW_QP].cnt + hmc_info->hmc_obj[IRDMA_HMC_IW_CQ].cnt; @@ -1412,10 +1404,10 @@ err: * Create the aeq and configure its msix interrupt vector * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_setup_aeq(struct irdma_pci_f *rf) +static int irdma_setup_aeq(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; - enum irdma_status_code status; + int status; status = irdma_create_aeq(rf); if (status) @@ -1439,10 +1431,10 @@ static enum irdma_status_code irdma_setup_aeq(struct irdma_pci_f *rf) * * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_initialize_ilq(struct irdma_device *iwdev) +static int irdma_initialize_ilq(struct irdma_device *iwdev) { struct irdma_puda_rsrc_info info = {}; - enum irdma_status_code status; + int status; info.type = IRDMA_PUDA_RSRC_TYPE_ILQ; info.cq_id = 1; @@ -1469,10 +1461,10 @@ static enum irdma_status_code irdma_initialize_ilq(struct irdma_device *iwdev) * * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_initialize_ieq(struct irdma_device *iwdev) +static int irdma_initialize_ieq(struct irdma_device *iwdev) { struct irdma_puda_rsrc_info info = {}; - enum irdma_status_code status; + int status; info.type = IRDMA_PUDA_RSRC_TYPE_IEQ; info.cq_id = 2; @@ -1515,9 +1507,9 @@ void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi) * the hmc objects and create the objects * Return 0 if successful, otherwise return error */ -static enum irdma_status_code irdma_hmc_setup(struct irdma_pci_f *rf) +static int irdma_hmc_setup(struct irdma_pci_f *rf) { - enum irdma_status_code status; + int status; u32 qpcnt; if (rf->rdma_ver == IRDMA_GEN_1) @@ -1570,9 +1562,9 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf) * Return 0 if successful, otherwise clean up the resources * and return error */ -static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf) +static int irdma_initialize_dev(struct irdma_pci_f *rf) { - enum irdma_status_code status; + int status; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_device_init_info info = {}; struct irdma_dma_mem mem; @@ -1584,7 +1576,7 @@ static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf) rf->hmc_info_mem = kzalloc(size, GFP_KERNEL); if (!rf->hmc_info_mem) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; rf->pble_rsrc = (struct irdma_hmc_pble_rsrc *)rf->hmc_info_mem; dev->hmc_info = &rf->hw.hmc; @@ -1608,7 +1600,7 @@ static enum irdma_status_code irdma_initialize_dev(struct irdma_pci_f *rf) info.fpm_commit_buf = mem.va; info.bar0 = rf->hw.hw_addr; - info.hmc_fn_id = PCI_FUNC(rf->pcidev->devfn); + info.hmc_fn_id = rf->pf_id; info.hw = &rf->hw; status = irdma_sc_dev_init(rf->rdma_ver, &rf->sc_dev, &info); if (status) @@ -1667,9 +1659,9 @@ void irdma_rt_deinit_hw(struct irdma_device *iwdev) destroy_workqueue(iwdev->cleanup_wq); } -static enum irdma_status_code irdma_setup_init_state(struct irdma_pci_f *rf) +static int irdma_setup_init_state(struct irdma_pci_f *rf) { - enum irdma_status_code status; + int status; status = irdma_save_msix_info(rf); if (status) @@ -1680,7 +1672,7 @@ static enum irdma_status_code irdma_setup_init_state(struct irdma_pci_f *rf) rf->obj_mem.va = dma_alloc_coherent(rf->hw.device, rf->obj_mem.size, &rf->obj_mem.pa, GFP_KERNEL); if (!rf->obj_mem.va) { - status = IRDMA_ERR_NO_MEMORY; + status = -ENOMEM; goto clean_msixtbl; } @@ -1763,14 +1755,14 @@ void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) * Create device queues ILQ, IEQ, CEQs and PBLEs. Setup irdma * device resource objects. */ -enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, - struct irdma_l2params *l2params) +int irdma_rt_init_hw(struct irdma_device *iwdev, + struct irdma_l2params *l2params) { struct irdma_pci_f *rf = iwdev->rf; struct irdma_sc_dev *dev = &rf->sc_dev; - enum irdma_status_code status; struct irdma_vsi_init_info vsi_info = {}; struct irdma_vsi_stats_info stats_info = {}; + int status; vsi_info.dev = dev; vsi_info.back_vsi = iwdev; @@ -1788,7 +1780,7 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL); if (!stats_info.pestat) { irdma_cleanup_cm_core(&iwdev->cm_core); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } stats_info.fcn_id = dev->hmc_fn_id; status = irdma_vsi_stats_init(&iwdev->vsi, &stats_info); @@ -1850,7 +1842,7 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, iwdev->cleanup_wq = alloc_workqueue("irdma-cleanup-wq", WQ_UNBOUND, WQ_UNBOUND_MAX_ACTIVE); if (!iwdev->cleanup_wq) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; irdma_get_used_rsrc(iwdev); init_waitqueue_head(&iwdev->suspend_wq); @@ -1870,10 +1862,10 @@ enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, * * Create admin queues, HMC obejcts and RF resource objects */ -enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf) +int irdma_ctrl_init_hw(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; - enum irdma_status_code status; + int status; do { status = irdma_setup_init_state(rf); if (status) @@ -1915,7 +1907,7 @@ enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf) rf->cqp_cmpl_wq = alloc_ordered_workqueue("cqp_cmpl_wq", WQ_HIGHPRI | WQ_UNBOUND); if (!rf->cqp_cmpl_wq) { - status = IRDMA_ERR_NO_MEMORY; + status = -ENOMEM; break; } INIT_WORK(&rf->cqp_cmpl_work, cqp_compl_worker); @@ -2202,11 +2194,11 @@ int irdma_add_local_mac_entry(struct irdma_pci_f *rf, const u8 *mac_addr, u16 id struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->post_sq = 1; @@ -2238,11 +2230,11 @@ int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx) struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status = 0; + int status = 0; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY; @@ -2264,18 +2256,17 @@ int irdma_alloc_local_mac_entry(struct irdma_pci_f *rf, u16 *mac_tbl_idx) * @accel_local_port: port for apbvt * @add_port: add ordelete port */ -static enum irdma_status_code -irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev, u16 accel_local_port, - bool add_port) +static int irdma_cqp_manage_apbvt_cmd(struct irdma_device *iwdev, + u16 accel_local_port, bool add_port) { struct irdma_apbvt_info *info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, add_port); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; @@ -2429,22 +2420,21 @@ static void irdma_send_syn_cqp_callback(struct irdma_cqp_request *cqp_request) * @cmnode: cmnode associated with connection * @wait: wait for completion */ -enum irdma_status_code -irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, - enum irdma_quad_entry_type etype, - enum irdma_quad_hash_manage_type mtype, void *cmnode, - bool wait) +int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, + enum irdma_quad_entry_type etype, + enum irdma_quad_hash_manage_type mtype, void *cmnode, + bool wait) { struct irdma_qhash_table_info *info; - enum irdma_status_code status; struct irdma_cqp *iwcqp = &iwdev->rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_cm_node *cm_node = cmnode; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_qhash_table_entry.info; @@ -2558,12 +2548,10 @@ static void irdma_hw_flush_wqes_callback(struct irdma_cqp_request *cqp_request) * @info: info for flush * @wait: flag wait for completion */ -enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf, - struct irdma_sc_qp *qp, - struct irdma_qp_flush_info *info, - bool wait) +int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, + struct irdma_qp_flush_info *info, bool wait) { - enum irdma_status_code status; + int status; struct irdma_qp_flush_info *hw_info; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; @@ -2571,7 +2559,7 @@ enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf, cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; if (!wait) @@ -2619,7 +2607,7 @@ enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf, info->sq = true; new_req = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!new_req) { - status = IRDMA_ERR_NO_MEMORY; + status = -ENOMEM; goto put_cqp; } cqp_info = &new_req->info; diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c index 64148ad8a604..e46fc110004d 100644 --- a/drivers/infiniband/hw/irdma/i40iw_hw.c +++ b/drivers/infiniband/hw/irdma/i40iw_hw.c @@ -3,7 +3,6 @@ #include "osdep.h" #include "type.h" #include "i40iw_hw.h" -#include "status.h" #include "protos.h" static u32 i40iw_regs[IRDMA_MAX_REGS] = { diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c index 43e962b97d6a..4053ead32416 100644 --- a/drivers/infiniband/hw/irdma/i40iw_if.c +++ b/drivers/infiniband/hw/irdma/i40iw_if.c @@ -77,6 +77,7 @@ static void i40iw_fill_device_info(struct irdma_device *iwdev, struct i40e_info rf->rdma_ver = IRDMA_GEN_1; rf->gen_ops.request_reset = i40iw_request_reset; rf->pcidev = cdev_info->pcidev; + rf->pf_id = cdev_info->fid; rf->hw.hw_addr = cdev_info->hw_addr; rf->cdev = cdev_info; rf->msix_count = cdev_info->msix_count; @@ -138,7 +139,7 @@ static int i40iw_open(struct i40e_info *cdev_info, struct i40e_client *client) if (last_qset == IRDMA_NO_QSET) last_qset = qset; else if ((qset != last_qset) && (qset != IRDMA_NO_QSET)) - iwdev->dcb = true; + iwdev->dcb_vlan_mode = true; } if (irdma_rt_init_hw(iwdev, &l2params)) { diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index 9fab29039f1c..514453777e07 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -79,6 +79,10 @@ static void irdma_fill_qos_info(struct irdma_l2params *l2params, } for (i = 0; i < IIDC_MAX_USER_PRIORITY; i++) l2params->up2tc[i] = qos_info->up2tc[i]; + if (qos_info->pfc_mode == IIDC_DSCP_PFC_MODE) { + l2params->dscp_mode = true; + memcpy(l2params->dscp_map, qos_info->dscp_map, sizeof(l2params->dscp_map)); + } } static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event) @@ -108,8 +112,9 @@ static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event l2params.tc_changed = true; ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n"); ice_get_qos_params(pf, &qos_info); - iwdev->dcb = qos_info.num_tc > 1; irdma_fill_qos_info(&l2params, &qos_info); + if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) + iwdev->dcb_vlan_mode = qos_info.num_tc > 1 && !l2params.dscp_mode; irdma_change_l2params(&iwdev->vsi, &l2params); } else if (*event->type & BIT(IIDC_EVENT_CRIT_ERR)) { ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n", @@ -157,8 +162,8 @@ static void irdma_request_reset(struct irdma_pci_f *rf) * @vsi: vsi structure * @tc_node: Traffic class node */ -static enum irdma_status_code irdma_lan_register_qset(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *tc_node) +static int irdma_lan_register_qset(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node) { struct irdma_device *iwdev = vsi->back_vsi; struct ice_pf *pf = iwdev->rf->cdev; @@ -171,7 +176,7 @@ static enum irdma_status_code irdma_lan_register_qset(struct irdma_sc_vsi *vsi, ret = ice_add_rdma_qset(pf, &qset); if (ret) { ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n"); - return IRDMA_ERR_REG_QSET; + return ret; } tc_node->l2_sched_node_id = qset.teid; @@ -226,6 +231,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf rf->hw.hw_addr = pf->hw.hw_addr; rf->pcidev = pf->pdev; rf->msix_count = pf->num_rdma_msix; + rf->pf_id = pf->hw.pf_id; rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector]; rf->default_vsi.vsi_idx = vsi->vsi_num; rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ? @@ -236,7 +242,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf rf->gen_ops.request_reset = irdma_request_reset; rf->limits_sel = 7; rf->iwdev = iwdev; - + mutex_init(&iwdev->ah_tbl_lock); iwdev->netdev = vsi->netdev; iwdev->vsi_num = vsi->vsi_num; iwdev->init_state = INITIAL_STATE; @@ -275,18 +281,19 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ irdma_fill_device_info(iwdev, pf, vsi); rf = iwdev->rf; - if (irdma_ctrl_init_hw(rf)) { - err = -EIO; + err = irdma_ctrl_init_hw(rf); + if (err) goto err_ctrl_init; - } l2params.mtu = iwdev->netdev->mtu; ice_get_qos_params(pf, &qos_info); irdma_fill_qos_info(&l2params, &qos_info); - if (irdma_rt_init_hw(iwdev, &l2params)) { - err = -EIO; + if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY) + iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode; + + err = irdma_rt_init_hw(iwdev, &l2params); + if (err) goto err_rt_init; - } err = irdma_ib_register_device(iwdev); if (err) diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index cb218cab79ac..5123f5feaa2f 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -40,7 +40,6 @@ #include #include #include -#include "status.h" #include "osdep.h" #include "defs.h" #include "hmc.h" @@ -242,8 +241,8 @@ struct irdma_qvlist_info { struct irdma_gen_ops { void (*request_reset)(struct irdma_pci_f *rf); - enum irdma_status_code (*register_qset)(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *tc_node); + int (*register_qset)(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node); void (*unregister_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); }; @@ -257,6 +256,7 @@ struct irdma_pci_f { u8 *mem_rsrc; u8 rdma_ver; u8 rst_to; + u8 pf_id; enum irdma_protocol_used protocol_used; u32 sd_type; u32 msix_count; @@ -332,6 +332,8 @@ struct irdma_device { struct workqueue_struct *cleanup_wq; struct irdma_sc_vsi vsi; struct irdma_cm_core cm_core; + DECLARE_HASHTABLE(ah_hash_tbl, 8); + struct mutex ah_tbl_lock; /* protect AH hash table access */ u32 roce_cwnd; u32 roce_ackcreds; u32 vendor_id; @@ -345,7 +347,7 @@ struct irdma_device { u8 iw_status; bool roce_mode:1; bool roce_dcqcn_en:1; - bool dcb:1; + bool dcb_vlan_mode:1; bool iw_ooo:1; enum init_completion_state init_state; @@ -457,10 +459,10 @@ static inline void irdma_free_rsrc(struct irdma_pci_f *rf, spin_unlock_irqrestore(&rf->rsrc_lock, flags); } -enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf); +int irdma_ctrl_init_hw(struct irdma_pci_f *rf); void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf); -enum irdma_status_code irdma_rt_init_hw(struct irdma_device *iwdev, - struct irdma_l2params *l2params); +int irdma_rt_init_hw(struct irdma_device *iwdev, + struct irdma_l2params *l2params); void irdma_rt_deinit_hw(struct irdma_device *iwdev); void irdma_qp_add_ref(struct ib_qp *ibqp); void irdma_qp_rem_ref(struct ib_qp *ibqp); @@ -489,9 +491,8 @@ void irdma_cm_disconn(struct irdma_qp *qp); bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd, u16 maj_err_code, u16 min_err_code); -enum irdma_status_code -irdma_handle_cqp_op(struct irdma_pci_f *rf, - struct irdma_cqp_request *cqp_request); +int irdma_handle_cqp_op(struct irdma_pci_f *rf, + struct irdma_cqp_request *cqp_request); int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); @@ -500,21 +501,17 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf); -enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev, - struct irdma_qp *iwqp, - struct irdma_modify_qp_info *info, - bool wait); -enum irdma_status_code irdma_qp_suspend_resume(struct irdma_sc_qp *qp, - bool suspend); -enum irdma_status_code -irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, - enum irdma_quad_entry_type etype, - enum irdma_quad_hash_manage_type mtype, void *cmnode, - bool wait); +int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, + struct irdma_modify_qp_info *info, bool wait); +int irdma_qp_suspend_resume(struct irdma_sc_qp *qp, bool suspend); +int irdma_manage_qhash(struct irdma_device *iwdev, struct irdma_cm_info *cminfo, + enum irdma_quad_entry_type etype, + enum irdma_quad_hash_manage_type mtype, void *cmnode, + bool wait); void irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf); void irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp); void irdma_free_qp_rsrc(struct irdma_qp *iwqp); -enum irdma_status_code irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver); +int irdma_setup_cm_core(struct irdma_device *iwdev, u8 ver); void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core); void irdma_next_iw_state(struct irdma_qp *iwqp, u8 state, u8 del_hash, u8 term, u8 term_len); @@ -523,10 +520,8 @@ int irdma_send_reset(struct irdma_cm_node *cm_node); struct irdma_cm_node *irdma_find_node(struct irdma_cm_core *cm_core, u16 rem_port, u32 *rem_addr, u16 loc_port, u32 *loc_addr, u16 vlan_id); -enum irdma_status_code irdma_hw_flush_wqes(struct irdma_pci_f *rf, - struct irdma_sc_qp *qp, - struct irdma_qp_flush_info *info, - bool wait); +int irdma_hw_flush_wqes(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, + struct irdma_qp_flush_info *info, bool wait); void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, struct irdma_gen_ae_info *info, bool wait); void irdma_copy_ip_ntohl(u32 *dst, __be32 *src); diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h index 63d8bb3a6903..fc1ba2a3e6fb 100644 --- a/drivers/infiniband/hw/irdma/osdep.h +++ b/drivers/infiniband/hw/irdma/osdep.h @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -42,32 +43,28 @@ enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev); bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev); void irdma_add_dev_ref(struct irdma_sc_dev *dev); void irdma_put_dev_ref(struct irdma_sc_dev *dev); -enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc, - void *addr, u32 len, u32 val); +int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, + u32 val); struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev, struct irdma_puda_buf *buf); void irdma_send_ieq_ack(struct irdma_sc_qp *qp); void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum); void irdma_free_hash_desc(struct shash_desc *hash_desc); -enum irdma_status_code irdma_init_hash_desc(struct shash_desc **hash_desc); -enum irdma_status_code -irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, - struct irdma_puda_buf *buf); -enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info); -enum irdma_status_code -irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, - struct irdma_hmc_fcn_info *hmcfcninfo, - u16 *pmf_idx); -enum irdma_status_code -irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -enum irdma_status_code -irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -enum irdma_status_code irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, - struct irdma_dma_mem *mem); +int irdma_init_hash_desc(struct shash_desc **hash_desc); +int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, + struct irdma_puda_buf *buf); +int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info); +int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, + struct irdma_hmc_fcn_info *hmcfcninfo, + u16 *pmf_idx); +int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id); +int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id); +int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, + struct irdma_dma_mem *mem); void *irdma_remove_cqp_head(struct irdma_sc_dev *dev); void irdma_term_modify_qp(struct irdma_sc_qp *qp, u8 next_state, u8 term, u8 term_len); @@ -79,7 +76,7 @@ void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi); void wr32(struct irdma_hw *hw, u32 reg, u32 val); u32 rd32(struct irdma_hw *hw, u32 reg); u64 rd64(struct irdma_hw *hw, u32 reg); -enum irdma_status_code irdma_map_vm_page_list(struct irdma_hw *hw, void *va, - dma_addr_t *pg_dma, u32 pg_cnt); +int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t *pg_dma, + u32 pg_cnt); void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t *pg_dma, u32 pg_cnt); #endif /* IRDMA_OSDEP_H */ diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c index fed49da770f3..cdc0b8a6ed48 100644 --- a/drivers/infiniband/hw/irdma/pble.c +++ b/drivers/infiniband/hw/irdma/pble.c @@ -1,15 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" #include "protos.h" #include "pble.h" -static enum irdma_status_code -add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc); +static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc); /** * irdma_destroy_pble_prm - destroy prm during module unload @@ -35,13 +33,12 @@ void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) * @dev: irdma_sc_dev struct * @pble_rsrc: pble resources */ -enum irdma_status_code -irdma_hmc_init_pble(struct irdma_sc_dev *dev, - struct irdma_hmc_pble_rsrc *pble_rsrc) +int irdma_hmc_init_pble(struct irdma_sc_dev *dev, + struct irdma_hmc_pble_rsrc *pble_rsrc) { struct irdma_hmc_info *hmc_info; u32 fpm_idx = 0; - enum irdma_status_code status = 0; + int status = 0; hmc_info = dev->hmc_info; pble_rsrc->dev = dev; @@ -60,7 +57,7 @@ irdma_hmc_init_pble(struct irdma_sc_dev *dev, INIT_LIST_HEAD(&pble_rsrc->pinfo.clist); if (add_pble_prm(pble_rsrc)) { irdma_destroy_pble_prm(pble_rsrc); - status = IRDMA_ERR_NO_MEMORY; + status = -ENOMEM; } return status; @@ -84,12 +81,11 @@ static void get_sd_pd_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, * @pble_rsrc: pble resource ptr * @info: page info for sd */ -static enum irdma_status_code -add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_add_page_info *info) +static int add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_add_page_info *info) { struct irdma_sc_dev *dev = pble_rsrc->dev; - enum irdma_status_code ret_code = 0; + int ret_code = 0; struct sd_pd_idx *idx = &info->idx; struct irdma_chunk *chunk = info->chunk; struct irdma_hmc_info *hmc_info = info->hmc_info; @@ -137,9 +133,8 @@ static u32 fpm_to_idx(struct irdma_hmc_pble_rsrc *pble_rsrc, u64 addr) * @pble_rsrc: pble resource management * @info: page info for sd */ -static enum irdma_status_code -add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_add_page_info *info) +static int add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_add_page_info *info) { struct irdma_sc_dev *dev = pble_rsrc->dev; u8 *addr; @@ -148,13 +143,13 @@ add_bp_pages(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_hmc_sd_entry *sd_entry = info->sd_entry; struct irdma_hmc_info *hmc_info = info->hmc_info; struct irdma_chunk *chunk = info->chunk; - enum irdma_status_code status = 0; + int status = 0; u32 rel_pd_idx = info->idx.rel_pd_idx; u32 pd_idx = info->idx.pd_idx; u32 i; if (irdma_pble_get_paged_mem(chunk, info->pages)) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; status = irdma_add_sd_table_entry(dev->hw, hmc_info, info->idx.sd_idx, IRDMA_SD_TYPE_PAGED, @@ -207,8 +202,7 @@ static enum irdma_sd_entry_type irdma_get_type(struct irdma_sc_dev *dev, * add_pble_prm - add a sd entry for pble resoure * @pble_rsrc: pble resource management */ -static enum irdma_status_code -add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) +static int add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) { struct irdma_sc_dev *dev = pble_rsrc->dev; struct irdma_hmc_sd_entry *sd_entry; @@ -216,22 +210,22 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc) struct irdma_chunk *chunk; struct irdma_add_page_info info; struct sd_pd_idx *idx = &info.idx; - enum irdma_status_code ret_code = 0; + int ret_code = 0; enum irdma_sd_entry_type sd_entry_type; u64 sd_reg_val = 0; struct irdma_virt_mem chunkmem; u32 pages; if (pble_rsrc->unallocated_pble < PBLE_PER_PAGE) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; if (pble_rsrc->next_fpm_addr & 0xfff) - return IRDMA_ERR_INVALID_PAGE_DESC_INDEX; + return -EINVAL; chunkmem.size = sizeof(*chunk); chunkmem.va = kzalloc(chunkmem.size, GFP_KERNEL); if (!chunkmem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; chunk = chunkmem.va; chunk->chunkmem = chunkmem; @@ -337,9 +331,8 @@ static void free_lvl2(struct irdma_hmc_pble_rsrc *pble_rsrc, * @pble_rsrc: pble resource management * @palloc: level 2 pble allocation */ -static enum irdma_status_code -get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_pble_alloc *palloc) +static int get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_pble_alloc *palloc) { u32 lf4k, lflast, total, i; u32 pblcnt = PBLE_PER_PAGE; @@ -347,7 +340,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_level2 *lvl2 = &palloc->level2; struct irdma_pble_info *root = &lvl2->root; struct irdma_pble_info *leaf; - enum irdma_status_code ret_code; + int ret_code; u64 fpm_addr; /* number of full 512 (4K) leafs) */ @@ -359,7 +352,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, lvl2->leafmem.size = (sizeof(*leaf) * total); lvl2->leafmem.va = kzalloc(lvl2->leafmem.size, GFP_KERNEL); if (!lvl2->leafmem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; lvl2->leaf = lvl2->leafmem.va; leaf = lvl2->leaf; @@ -368,7 +361,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, if (ret_code) { kfree(lvl2->leafmem.va); lvl2->leaf = NULL; - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } root->idx = fpm_to_idx(pble_rsrc, fpm_addr); @@ -397,7 +390,7 @@ get_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, error: free_lvl2(pble_rsrc, palloc); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } /** @@ -405,11 +398,10 @@ error: * @pble_rsrc: pble resource management * @palloc: level 1 pble allocation */ -static enum irdma_status_code -get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_pble_alloc *palloc) +static int get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_pble_alloc *palloc) { - enum irdma_status_code ret_code; + int ret_code; u64 fpm_addr; struct irdma_pble_info *lvl1 = &palloc->level1; @@ -417,7 +409,7 @@ get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, palloc->total_cnt << 3, &lvl1->addr, &fpm_addr); if (ret_code) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; palloc->level = PBLE_LEVEL_1; lvl1->idx = fpm_to_idx(pble_rsrc, fpm_addr); @@ -433,11 +425,10 @@ get_lvl1_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, * @palloc: contains all inforamtion regarding pble (idx + pble addr) * @level1_only: flag for a level 1 PBLE */ -static enum irdma_status_code -get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_pble_alloc *palloc, bool level1_only) +static int get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_pble_alloc *palloc, bool level1_only) { - enum irdma_status_code status = 0; + int status = 0; status = get_lvl1_pble(pble_rsrc, palloc); if (!status || level1_only || palloc->total_cnt <= PBLE_PER_PAGE) @@ -455,11 +446,11 @@ get_lvl1_lvl2_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, * @pble_cnt: #of pbles requested * @level1_only: true if only pble level 1 to acquire */ -enum irdma_status_code irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_pble_alloc *palloc, - u32 pble_cnt, bool level1_only) +int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_pble_alloc *palloc, u32 pble_cnt, + bool level1_only) { - enum irdma_status_code status = 0; + int status = 0; int max_sds = 0; int i; diff --git a/drivers/infiniband/hw/irdma/pble.h b/drivers/infiniband/hw/irdma/pble.h index d0d4f2b77d34..29d295463559 100644 --- a/drivers/infiniband/hw/irdma/pble.h +++ b/drivers/infiniband/hw/irdma/pble.h @@ -108,20 +108,18 @@ struct irdma_hmc_pble_rsrc { }; void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc); -enum irdma_status_code -irdma_hmc_init_pble(struct irdma_sc_dev *dev, - struct irdma_hmc_pble_rsrc *pble_rsrc); +int irdma_hmc_init_pble(struct irdma_sc_dev *dev, + struct irdma_hmc_pble_rsrc *pble_rsrc); void irdma_free_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, struct irdma_pble_alloc *palloc); -enum irdma_status_code irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, - struct irdma_pble_alloc *palloc, - u32 pble_cnt, bool level1_only); -enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, - struct irdma_chunk *pchunk); -enum irdma_status_code -irdma_prm_get_pbles(struct irdma_pble_prm *pprm, - struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size, - u64 **vaddr, u64 *fpm_addr); +int irdma_get_pble(struct irdma_hmc_pble_rsrc *pble_rsrc, + struct irdma_pble_alloc *palloc, u32 pble_cnt, + bool level1_only); +int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, + struct irdma_chunk *pchunk); +int irdma_prm_get_pbles(struct irdma_pble_prm *pprm, + struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size, + u64 **vaddr, u64 *fpm_addr); void irdma_prm_return_pbles(struct irdma_pble_prm *pprm, struct irdma_pble_chunkinfo *chunkinfo); void irdma_pble_acquire_lock(struct irdma_hmc_pble_rsrc *pble_rsrc, @@ -129,7 +127,6 @@ void irdma_pble_acquire_lock(struct irdma_hmc_pble_rsrc *pble_rsrc, void irdma_pble_release_lock(struct irdma_hmc_pble_rsrc *pble_rsrc, unsigned long *flags); void irdma_pble_free_paged_mem(struct irdma_chunk *chunk); -enum irdma_status_code irdma_pble_get_paged_mem(struct irdma_chunk *chunk, - u32 pg_cnt); +int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt); void irdma_prm_rem_bitmapmem(struct irdma_hw *hw, struct irdma_chunk *chunk); #endif /* IRDMA_PBLE_H */ diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h index a17c0ffb0cc8..9b6e919ae2a9 100644 --- a/drivers/infiniband/hw/irdma/protos.h +++ b/drivers/infiniband/hw/irdma/protos.h @@ -12,58 +12,51 @@ #define CQP_TIMEOUT_THRESHOLD 500 /* init operations */ -enum irdma_status_code irdma_sc_dev_init(enum irdma_vers ver, - struct irdma_sc_dev *dev, - struct irdma_device_init_info *info); +int irdma_sc_dev_init(enum irdma_vers ver, struct irdma_sc_dev *dev, + struct irdma_device_init_info *info); void irdma_sc_rt_init(struct irdma_sc_dev *dev); void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp); __le64 *irdma_sc_cqp_get_next_send_wqe(struct irdma_sc_cqp *cqp, u64 scratch); -enum irdma_status_code -irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, - struct irdma_fast_reg_stag_info *info, bool post_sq); +int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, + struct irdma_fast_reg_stag_info *info, + bool post_sq); /* HMC/FPM functions */ -enum irdma_status_code irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, - u8 hmc_fn_id); +int irdma_sc_init_iw_hmc(struct irdma_sc_dev *dev, u8 hmc_fn_id); /* stats misc */ -enum irdma_status_code -irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, - struct irdma_vsi_pestat *pestat, bool wait); +int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, + struct irdma_vsi_pestat *pestat, bool wait); void irdma_cqp_gather_stats_gen1(struct irdma_sc_dev *dev, struct irdma_vsi_pestat *pestat); void irdma_hw_stats_read_all(struct irdma_vsi_pestat *stats, struct irdma_dev_hw_stats *stats_values, u64 *hw_stats_regs_32, u64 *hw_stats_regs_64, u8 hw_rev); -enum irdma_status_code -irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, - struct irdma_ws_node_info *node_info); -enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_ceq *sc_ceq, u8 op); -enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_aeq *sc_aeq, u8 op); -enum irdma_status_code -irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, - struct irdma_stats_inst_info *stats_info); +int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, + struct irdma_ws_node_info *node_info); +int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq, + u8 op); +int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq, + u8 op); +int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, + struct irdma_stats_inst_info *stats_info); u16 irdma_alloc_ws_node_id(struct irdma_sc_dev *dev); void irdma_free_ws_node_id(struct irdma_sc_dev *dev, u16 node_id); void irdma_update_stats(struct irdma_dev_hw_stats *hw_stats, struct irdma_gather_stats *gather_stats, struct irdma_gather_stats *last_gather_stats); /* vsi functions */ -enum irdma_status_code irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, - struct irdma_vsi_stats_info *info); +int irdma_vsi_stats_init(struct irdma_sc_vsi *vsi, + struct irdma_vsi_stats_info *info); void irdma_vsi_stats_free(struct irdma_sc_vsi *vsi); void irdma_sc_vsi_init(struct irdma_sc_vsi *vsi, struct irdma_vsi_init_info *info); -enum irdma_status_code irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, - struct irdma_sc_cq *cq); +int irdma_sc_add_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq); void irdma_sc_remove_cq_ctx(struct irdma_sc_ceq *ceq, struct irdma_sc_cq *cq); /* misc L2 param change functions */ void irdma_change_l2params(struct irdma_sc_vsi *vsi, struct irdma_l2params *l2params); void irdma_sc_suspend_resume_qps(struct irdma_sc_vsi *vsi, u8 suspend); -enum irdma_status_code irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, - u8 cmd); +int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 cmd); void irdma_qp_add_qos(struct irdma_sc_qp *qp); void irdma_qp_rem_qos(struct irdma_sc_qp *qp); struct irdma_sc_qp *irdma_get_qp_from_list(struct list_head *head, @@ -81,31 +74,26 @@ void irdma_terminate_received(struct irdma_sc_qp *qp, /* misc */ u8 irdma_get_encoded_wqe_size(u32 wqsize, enum irdma_queue_type queue_type); void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp); -enum irdma_status_code -irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, - u8 hmc_fn_id, bool post_sq, - bool poll_registers); -enum irdma_status_code irdma_cfg_fpm_val(struct irdma_sc_dev *dev, - u32 qp_count); -enum irdma_status_code irdma_get_rdma_features(struct irdma_sc_dev *dev); +int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, + u8 hmc_fn_id, bool post_sq, + bool poll_registers); +int irdma_cfg_fpm_val(struct irdma_sc_dev *dev, u32 qp_count); +int irdma_get_rdma_features(struct irdma_sc_dev *dev); void free_sd_mem(struct irdma_sc_dev *dev); -enum irdma_status_code irdma_process_cqp_cmd(struct irdma_sc_dev *dev, - struct cqp_cmds_info *pcmdinfo); -enum irdma_status_code irdma_process_bh(struct irdma_sc_dev *dev); -enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info); -enum irdma_status_code -irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -enum irdma_status_code -irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -enum irdma_status_code irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, - struct irdma_dma_mem *mem); -enum irdma_status_code -irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, - struct irdma_hmc_fcn_info *hmcfcninfo, - u16 *pmf_idx); +int irdma_process_cqp_cmd(struct irdma_sc_dev *dev, + struct cqp_cmds_info *pcmdinfo); +int irdma_process_bh(struct irdma_sc_dev *dev); +int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info); +int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id); +int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id); +int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, + struct irdma_dma_mem *mem); +int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, + struct irdma_hmc_fcn_info *hmcfcninfo, + u16 *pmf_idx); void irdma_add_dev_ref(struct irdma_sc_dev *dev); void irdma_put_dev_ref(struct irdma_sc_dev *dev); void *irdma_remove_cqp_head(struct irdma_sc_dev *dev); diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c index 58e7d875643b..397f3d070f90 100644 --- a/drivers/infiniband/hw/irdma/puda.c +++ b/drivers/infiniband/hw/irdma/puda.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" @@ -114,8 +113,7 @@ static void irdma_puda_post_recvbuf(struct irdma_puda_rsrc *rsrc, u32 wqe_idx, * @rsrc: resource to use for buffer * @initial: flag if during init time */ -static enum irdma_status_code -irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial) +static int irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial) { u32 i; u32 invalid_cnt = rsrc->rxq_invalid_cnt; @@ -124,7 +122,7 @@ irdma_puda_replenish_rq(struct irdma_puda_rsrc *rsrc, bool initial) for (i = 0; i < invalid_cnt; i++) { buf = irdma_puda_get_bufpool(rsrc); if (!buf) - return IRDMA_ERR_list_empty; + return -ENOBUFS; irdma_puda_post_recvbuf(rsrc, rsrc->rx_wqe_idx, buf, initial); rsrc->rx_wqe_idx = ((rsrc->rx_wqe_idx + 1) % rsrc->rq_size); rsrc->rxq_invalid_cnt--; @@ -194,7 +192,7 @@ static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx) { __le64 *wqe = NULL; - enum irdma_status_code ret_code = 0; + int ret_code = 0; *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); if (!*wqe_idx) @@ -213,8 +211,8 @@ static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp, * @cq: cq for poll * @info: info return for successful completion */ -static enum irdma_status_code -irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) +static int irdma_puda_poll_info(struct irdma_sc_cq *cq, + struct irdma_puda_cmpl_info *info) { struct irdma_cq_uk *cq_uk = &cq->cq_uk; u64 qword0, qword2, qword3, qword6; @@ -233,7 +231,7 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) get_64bit_val(cqe, 24, &qword3); valid_bit = (bool)FIELD_GET(IRDMA_CQ_VALID, qword3); if (valid_bit != cq_uk->polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3); @@ -246,7 +244,7 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) if (!peek_head) polarity ^= 1; if (polarity != cq_uk->polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring); if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring)) @@ -267,7 +265,7 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) major_err = (u32)(FIELD_GET(IRDMA_CQ_MAJERR, qword3)); minor_err = (u32)(FIELD_GET(IRDMA_CQ_MINERR, qword3)); info->compl_error = major_err << 16 | minor_err; - return IRDMA_ERR_CQ_COMPL_ERROR; + return -EIO; } get_64bit_val(cqe, 0, &qword0); @@ -319,14 +317,13 @@ irdma_puda_poll_info(struct irdma_sc_cq *cq, struct irdma_puda_cmpl_info *info) * @cq: cq getting interrupt * @compl_err: return any completion err */ -enum irdma_status_code irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, - struct irdma_sc_cq *cq, - u32 *compl_err) +int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq, + u32 *compl_err) { struct irdma_qp_uk *qp; struct irdma_cq_uk *cq_uk = &cq->cq_uk; struct irdma_puda_cmpl_info info = {}; - enum irdma_status_code ret = 0; + int ret = 0; struct irdma_puda_buf *buf; struct irdma_puda_rsrc *rsrc; u8 cq_type = cq->cq_type; @@ -337,24 +334,24 @@ enum irdma_status_code irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, cq->vsi->ieq; } else { ibdev_dbg(to_ibdev(dev), "PUDA: qp_type error\n"); - return IRDMA_ERR_BAD_PTR; + return -EINVAL; } ret = irdma_puda_poll_info(cq, &info); *compl_err = info.compl_error; - if (ret == IRDMA_ERR_Q_EMPTY) + if (ret == -ENOENT) return ret; if (ret) goto done; qp = info.qp; if (!qp || !rsrc) { - ret = IRDMA_ERR_BAD_PTR; + ret = -EFAULT; goto done; } if (qp->qp_id != rsrc->qp_id) { - ret = IRDMA_ERR_BAD_PTR; + ret = -EFAULT; goto done; } @@ -422,8 +419,7 @@ done: * @qp: puda qp for send * @info: buffer information for transmit */ -enum irdma_status_code irdma_puda_send(struct irdma_sc_qp *qp, - struct irdma_puda_send_info *info) +int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info) { __le64 *wqe; u32 iplen, l4len; @@ -443,7 +439,7 @@ enum irdma_status_code irdma_puda_send(struct irdma_sc_qp *qp, wqe = irdma_puda_get_next_send_wqe(&qp->qp_uk, &wqe_idx); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; qp->qp_uk.sq_wrtrk_array[wqe_idx].wrid = (uintptr_t)info->scratch; /* Third line of WQE descriptor */ @@ -503,7 +499,7 @@ void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf) { struct irdma_puda_send_info info; - enum irdma_status_code ret = 0; + int ret = 0; unsigned long flags; spin_lock_irqsave(&rsrc->bufpool_lock, flags); @@ -603,19 +599,18 @@ static void irdma_puda_qp_setctx(struct irdma_puda_rsrc *rsrc) * @dev: Device * @qp: Resource qp */ -static enum irdma_status_code irdma_puda_qp_wqe(struct irdma_sc_dev *dev, - struct irdma_sc_qp *qp) +static int irdma_puda_qp_wqe(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) { struct irdma_sc_cqp *cqp; __le64 *wqe; u64 hdr; struct irdma_ccq_cqe_info compl_info; - enum irdma_status_code status = 0; + int status = 0; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 16, qp->hw_host_ctx_pa); set_64bit_val(wqe, 40, qp->shadow_area_pa); @@ -643,11 +638,11 @@ static enum irdma_status_code irdma_puda_qp_wqe(struct irdma_sc_dev *dev, * irdma_puda_qp_create - create qp for resource * @rsrc: resource to use for buffer */ -static enum irdma_status_code irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc) +static int irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc) { struct irdma_sc_qp *qp = &rsrc->qp; struct irdma_qp_uk *ukqp = &qp->qp_uk; - enum irdma_status_code ret = 0; + int ret = 0; u32 sq_size, rq_size; struct irdma_dma_mem *mem; @@ -659,7 +654,7 @@ static enum irdma_status_code irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc) rsrc->qpmem.size, &rsrc->qpmem.pa, GFP_KERNEL); if (!rsrc->qpmem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; mem = &rsrc->qpmem; memset(mem->va, 0, rsrc->qpmem.size); @@ -722,19 +717,18 @@ static enum irdma_status_code irdma_puda_qp_create(struct irdma_puda_rsrc *rsrc) * @dev: Device * @cq: resource for cq */ -static enum irdma_status_code irdma_puda_cq_wqe(struct irdma_sc_dev *dev, - struct irdma_sc_cq *cq) +static int irdma_puda_cq_wqe(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq) { __le64 *wqe; struct irdma_sc_cqp *cqp; u64 hdr; struct irdma_ccq_cqe_info compl_info; - enum irdma_status_code status = 0; + int status = 0; cqp = dev->cqp; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, 0); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, cq->cq_uk.cq_size); set_64bit_val(wqe, 8, (uintptr_t)cq >> 1); @@ -775,11 +769,11 @@ static enum irdma_status_code irdma_puda_cq_wqe(struct irdma_sc_dev *dev, * irdma_puda_cq_create - create cq for resource * @rsrc: resource for which cq to create */ -static enum irdma_status_code irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc) +static int irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc) { struct irdma_sc_dev *dev = rsrc->dev; struct irdma_sc_cq *cq = &rsrc->cq; - enum irdma_status_code ret = 0; + int ret = 0; u32 cqsize; struct irdma_dma_mem *mem; struct irdma_cq_init_info info = {}; @@ -792,7 +786,7 @@ static enum irdma_status_code irdma_puda_cq_create(struct irdma_puda_rsrc *rsrc) rsrc->cqmem.va = dma_alloc_coherent(dev->hw->device, rsrc->cqmem.size, &rsrc->cqmem.pa, GFP_KERNEL); if (!rsrc->cqmem.va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; mem = &rsrc->cqmem; info.dev = dev; @@ -833,7 +827,7 @@ error: */ static void irdma_puda_free_qp(struct irdma_puda_rsrc *rsrc) { - enum irdma_status_code ret; + int ret; struct irdma_ccq_cqe_info compl_info; struct irdma_sc_dev *dev = rsrc->dev; @@ -865,7 +859,7 @@ static void irdma_puda_free_qp(struct irdma_puda_rsrc *rsrc) */ static void irdma_puda_free_cq(struct irdma_puda_rsrc *rsrc) { - enum irdma_status_code ret; + int ret; struct irdma_ccq_cqe_info compl_info; struct irdma_sc_dev *dev = rsrc->dev; @@ -967,8 +961,7 @@ void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type, * @rsrc: resource for buffer allocation * @count: number of buffers to create */ -static enum irdma_status_code irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, - u32 count) +static int irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, u32 count) { u32 i; struct irdma_puda_buf *buf; @@ -978,7 +971,7 @@ static enum irdma_status_code irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, buf = irdma_puda_alloc_buf(rsrc->dev, rsrc->buf_size); if (!buf) { rsrc->stats_buf_alloc_fail++; - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } irdma_puda_ret_bufpool(rsrc, buf); rsrc->alloc_buf_count++; @@ -1001,11 +994,11 @@ static enum irdma_status_code irdma_puda_allocbufs(struct irdma_puda_rsrc *rsrc, * @vsi: sc VSI struct * @info: resource information */ -enum irdma_status_code irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, - struct irdma_puda_rsrc_info *info) +int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, + struct irdma_puda_rsrc_info *info) { struct irdma_sc_dev *dev = vsi->dev; - enum irdma_status_code ret = 0; + int ret = 0; struct irdma_puda_rsrc *rsrc; u32 pudasize; u32 sqwridsize, rqwridsize; @@ -1023,12 +1016,12 @@ enum irdma_status_code irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, vmem = &vsi->ieq_mem; break; default: - return IRDMA_NOT_SUPPORTED; + return -EOPNOTSUPP; } vmem->size = pudasize + sqwridsize + rqwridsize; vmem->va = kzalloc(vmem->size, GFP_KERNEL); if (!vmem->va) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; rsrc = vmem->va; spin_lock_init(&rsrc->bufpool_lock); @@ -1046,7 +1039,7 @@ enum irdma_status_code irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, rsrc->xmit_complete = irdma_ieq_tx_compl; break; default: - return IRDMA_NOT_SUPPORTED; + return -EOPNOTSUPP; } rsrc->type = info->type; @@ -1323,12 +1316,12 @@ static void irdma_ieq_compl_pfpdu(struct irdma_puda_rsrc *ieq, * @buf: first receive buffer * @fpdu_len: total length of fpdu */ -static enum irdma_status_code -irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, struct list_head *rxlist, - struct list_head *pbufl, struct irdma_puda_buf *buf, - u16 fpdu_len) +static int irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, + struct list_head *rxlist, + struct list_head *pbufl, + struct irdma_puda_buf *buf, u16 fpdu_len) { - enum irdma_status_code status = 0; + int status = 0; struct irdma_puda_buf *nextbuf; u32 nextseqnum; u16 plen = fpdu_len - buf->datalen; @@ -1338,13 +1331,13 @@ irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, struct list_head *rxlist, do { nextbuf = irdma_puda_get_listbuf(rxlist); if (!nextbuf) { - status = IRDMA_ERR_list_empty; + status = -ENOBUFS; break; } list_add_tail(&nextbuf->list, pbufl); if (nextbuf->seqnum != nextseqnum) { pfpdu->bad_seq_num++; - status = IRDMA_ERR_SEQ_NUM; + status = -ERANGE; break; } if (nextbuf->datalen >= plen) { @@ -1366,11 +1359,11 @@ irdma_ieq_create_pbufl(struct irdma_pfpdu *pfpdu, struct list_head *rxlist, * @buf: receive buffer * @fpdu_len: fpdu len in the buffer */ -static enum irdma_status_code -irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq, struct irdma_pfpdu *pfpdu, - struct irdma_puda_buf *buf, u16 fpdu_len) +static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq, + struct irdma_pfpdu *pfpdu, + struct irdma_puda_buf *buf, u16 fpdu_len) { - enum irdma_status_code status = 0; + int status = 0; u8 *crcptr; u32 mpacrc; u32 seqnum = buf->seqnum; @@ -1390,7 +1383,7 @@ irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq, struct irdma_pfpdu *pfpdu, txbuf = irdma_puda_get_bufpool(ieq); if (!txbuf) { pfpdu->no_tx_bufs++; - status = IRDMA_ERR_NO_TXBUFS; + status = -ENOBUFS; goto error; } @@ -1434,9 +1427,9 @@ error: * @pfpdu: partial management per user qp * @buf: receive buffer */ -static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, - struct irdma_pfpdu *pfpdu, - struct irdma_puda_buf *buf) +static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, + struct irdma_pfpdu *pfpdu, + struct irdma_puda_buf *buf) { u16 fpdu_len = 0; u16 datalen = buf->datalen; @@ -1450,7 +1443,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, bool partial = false; struct irdma_puda_buf *txbuf; struct list_head *rxlist = &pfpdu->rxlist; - enum irdma_status_code ret = 0; + int ret = 0; ioffset = (u16)(buf->data - (u8 *)buf->mem.va); while (datalen) { @@ -1459,7 +1452,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error bad fpdu len\n"); list_add(&buf->list, rxlist); - return IRDMA_ERR_MPA_CRC; + return -EINVAL; } if (datalen < fpdu_len) { @@ -1475,7 +1468,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, list_add(&buf->list, rxlist); ibdev_dbg(to_ibdev(ieq->dev), "ERR: IRDMA_ERR_MPA_CRC\n"); - return IRDMA_ERR_MPA_CRC; + return -EINVAL; } full++; pfpdu->fpdu_processed++; @@ -1490,7 +1483,7 @@ static enum irdma_status_code irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq, if (!txbuf) { pfpdu->no_tx_bufs++; list_add(&buf->list, rxlist); - return IRDMA_ERR_NO_TXBUFS; + return -ENOBUFS; } /* modify txbuf's buffer header */ irdma_ieq_setup_tx_buf(buf, txbuf); @@ -1539,7 +1532,7 @@ void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp, struct irdma_pfpdu *pfpdu = &qp->pfpdu; struct list_head *rxlist = &pfpdu->rxlist; struct irdma_puda_buf *buf; - enum irdma_status_code status; + int status; do { if (list_empty(rxlist)) @@ -1557,7 +1550,7 @@ void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp, } /* keep processing buffers from the head of the list */ status = irdma_ieq_process_buf(ieq, pfpdu, buf); - if (status == IRDMA_ERR_MPA_CRC) { + if (status == -EINVAL) { pfpdu->mpa_crc_err = true; while (!list_empty(rxlist)) { buf = irdma_puda_get_listbuf(rxlist); @@ -1576,8 +1569,7 @@ void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp, * @qp: qp pointer * @buf: buf received on IEQ used to create AH */ -static enum irdma_status_code irdma_ieq_create_ah(struct irdma_sc_qp *qp, - struct irdma_puda_buf *buf) +static int irdma_ieq_create_ah(struct irdma_sc_qp *qp, struct irdma_puda_buf *buf) { struct irdma_ah_info ah_info = {}; diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h index db3a51170020..5f5124db6ddf 100644 --- a/drivers/infiniband/hw/irdma/puda.h +++ b/drivers/infiniband/hw/irdma/puda.h @@ -151,42 +151,33 @@ void irdma_puda_ret_bufpool(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf); void irdma_puda_send_buf(struct irdma_puda_rsrc *rsrc, struct irdma_puda_buf *buf); -enum irdma_status_code irdma_puda_send(struct irdma_sc_qp *qp, - struct irdma_puda_send_info *info); -enum irdma_status_code -irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, - struct irdma_puda_rsrc_info *info); +int irdma_puda_send(struct irdma_sc_qp *qp, struct irdma_puda_send_info *info); +int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi, + struct irdma_puda_rsrc_info *info); void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type, bool reset); -enum irdma_status_code irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, - struct irdma_sc_cq *cq, - u32 *compl_err); +int irdma_puda_poll_cmpl(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq, + u32 *compl_err); struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev, struct irdma_puda_buf *buf); -enum irdma_status_code -irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, - struct irdma_puda_buf *buf); -enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc, - void *addr, u32 len, u32 val); -enum irdma_status_code irdma_init_hash_desc(struct shash_desc **desc); +int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, + struct irdma_puda_buf *buf); +int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, u32 val); +int irdma_init_hash_desc(struct shash_desc **desc); void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); void irdma_free_hash_desc(struct shash_desc *desc); -void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, - u32 seqnum); -enum irdma_status_code irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_qp *qp); -enum irdma_status_code irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_cq *cq); -enum irdma_status_code irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); +void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum); +int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); +int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq); +int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp); void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq); void irdma_puda_ieq_get_ah_info(struct irdma_sc_qp *qp, struct irdma_ah_info *ah_info); -enum irdma_status_code irdma_puda_create_ah(struct irdma_sc_dev *dev, - struct irdma_ah_info *ah_info, - bool wait, enum puda_rsrc_type type, - void *cb_param, - struct irdma_sc_ah **ah); +int irdma_puda_create_ah(struct irdma_sc_dev *dev, + struct irdma_ah_info *ah_info, bool wait, + enum puda_rsrc_type type, void *cb_param, + struct irdma_sc_ah **ah); void irdma_puda_free_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah); void irdma_ieq_process_fpdus(struct irdma_sc_qp *qp, struct irdma_puda_rsrc *ieq); diff --git a/drivers/infiniband/hw/irdma/status.h b/drivers/infiniband/hw/irdma/status.h deleted file mode 100644 index 22ea3888253a..000000000000 --- a/drivers/infiniband/hw/irdma/status.h +++ /dev/null @@ -1,71 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB */ -/* Copyright (c) 2015 - 2020 Intel Corporation */ -#ifndef IRDMA_STATUS_H -#define IRDMA_STATUS_H - -/* Error Codes */ -enum irdma_status_code { - IRDMA_SUCCESS = 0, - IRDMA_ERR_NVM = -1, - IRDMA_ERR_NVM_CHECKSUM = -2, - IRDMA_ERR_CFG = -4, - IRDMA_ERR_PARAM = -5, - IRDMA_ERR_DEVICE_NOT_SUPPORTED = -6, - IRDMA_ERR_RESET_FAILED = -7, - IRDMA_ERR_SWFW_SYNC = -8, - IRDMA_ERR_NO_MEMORY = -9, - IRDMA_ERR_BAD_PTR = -10, - IRDMA_ERR_INVALID_PD_ID = -11, - IRDMA_ERR_INVALID_QP_ID = -12, - IRDMA_ERR_INVALID_CQ_ID = -13, - IRDMA_ERR_INVALID_CEQ_ID = -14, - IRDMA_ERR_INVALID_AEQ_ID = -15, - IRDMA_ERR_INVALID_SIZE = -16, - IRDMA_ERR_INVALID_ARP_INDEX = -17, - IRDMA_ERR_INVALID_FPM_FUNC_ID = -18, - IRDMA_ERR_QP_INVALID_MSG_SIZE = -19, - IRDMA_ERR_QP_TOOMANY_WRS_POSTED = -20, - IRDMA_ERR_INVALID_FRAG_COUNT = -21, - IRDMA_ERR_Q_EMPTY = -22, - IRDMA_ERR_INVALID_ALIGNMENT = -23, - IRDMA_ERR_FLUSHED_Q = -24, - IRDMA_ERR_INVALID_PUSH_PAGE_INDEX = -25, - IRDMA_ERR_INVALID_INLINE_DATA_SIZE = -26, - IRDMA_ERR_TIMEOUT = -27, - IRDMA_ERR_OPCODE_MISMATCH = -28, - IRDMA_ERR_CQP_COMPL_ERROR = -29, - IRDMA_ERR_INVALID_VF_ID = -30, - IRDMA_ERR_INVALID_HMCFN_ID = -31, - IRDMA_ERR_BACKING_PAGE_ERROR = -32, - IRDMA_ERR_NO_PBLCHUNKS_AVAILABLE = -33, - IRDMA_ERR_INVALID_PBLE_INDEX = -34, - IRDMA_ERR_INVALID_SD_INDEX = -35, - IRDMA_ERR_INVALID_PAGE_DESC_INDEX = -36, - IRDMA_ERR_INVALID_SD_TYPE = -37, - IRDMA_ERR_MEMCPY_FAILED = -38, - IRDMA_ERR_INVALID_HMC_OBJ_INDEX = -39, - IRDMA_ERR_INVALID_HMC_OBJ_COUNT = -40, - IRDMA_ERR_BUF_TOO_SHORT = -43, - IRDMA_ERR_BAD_IWARP_CQE = -44, - IRDMA_ERR_NVM_BLANK_MODE = -45, - IRDMA_ERR_NOT_IMPL = -46, - IRDMA_ERR_PE_DOORBELL_NOT_ENA = -47, - IRDMA_ERR_NOT_READY = -48, - IRDMA_NOT_SUPPORTED = -49, - IRDMA_ERR_FIRMWARE_API_VER = -50, - IRDMA_ERR_RING_FULL = -51, - IRDMA_ERR_MPA_CRC = -61, - IRDMA_ERR_NO_TXBUFS = -62, - IRDMA_ERR_SEQ_NUM = -63, - IRDMA_ERR_list_empty = -64, - IRDMA_ERR_INVALID_MAC_ADDR = -65, - IRDMA_ERR_BAD_STAG = -66, - IRDMA_ERR_CQ_COMPL_ERROR = -67, - IRDMA_ERR_Q_DESTROYED = -68, - IRDMA_ERR_INVALID_FEAT_CNT = -69, - IRDMA_ERR_REG_CQ_FULL = -70, - IRDMA_ERR_VF_MSG_ERROR = -71, - IRDMA_ERR_NO_INTR = -72, - IRDMA_ERR_REG_QSET = -73, -}; -#endif /* IRDMA_STATUS_H */ diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 9483bb3e10ea..9e7b8ecb137a 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -2,7 +2,6 @@ /* Copyright (c) 2015 - 2021 Intel Corporation */ #ifndef IRDMA_TYPE_H #define IRDMA_TYPE_H -#include "status.h" #include "osdep.h" #include "irdma.h" #include "user.h" @@ -402,8 +401,8 @@ struct irdma_sc_cqp { u64 host_ctx_pa; void *back_cqp; struct irdma_sc_dev *dev; - enum irdma_status_code (*process_cqp_sds)(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *info); + int (*process_cqp_sds)(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *info); struct irdma_dma_mem sdbuf; struct irdma_ring sq_ring; struct irdma_cqp_quanta *sq_base; @@ -605,12 +604,14 @@ struct irdma_sc_vsi { struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY]; struct irdma_vsi_pestat *pestat; atomic_t qp_suspend_reqs; - enum irdma_status_code (*register_qset)(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *tc_node); + int (*register_qset)(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node); void (*unregister_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); u8 qos_rel_bw; u8 qos_prio_type; + u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; + bool dscp_mode:1; }; struct irdma_sc_dev { @@ -655,7 +656,7 @@ struct irdma_sc_dev { bool vchnl_up:1; bool ceq_valid:1; u8 pci_rev; - enum irdma_status_code (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri); + int (*ws_add)(struct irdma_sc_vsi *vsi, u8 user_pri); void (*ws_remove)(struct irdma_sc_vsi *vsi, u8 user_pri); void (*ws_reset)(struct irdma_sc_vsi *vsi); }; @@ -735,11 +736,13 @@ struct irdma_l2params { u16 qs_handle_list[IRDMA_MAX_USER_PRIORITY]; u16 mtu; u8 up2tc[IRDMA_MAX_USER_PRIORITY]; + u8 dscp_map[IIDC_MAX_DSCP_MAPPING]; u8 num_tc; u8 vsi_rel_bw; u8 vsi_prio_type; bool mtu_changed:1; bool tc_changed:1; + bool dscp_mode:1; }; struct irdma_vsi_init_info { @@ -750,8 +753,8 @@ struct irdma_vsi_init_info { u16 pf_data_vsi_num; enum irdma_vm_vf_type vm_vf_type; u16 vm_id; - enum irdma_status_code (*register_qset)(struct irdma_sc_vsi *vsi, - struct irdma_ws_node *tc_node); + int (*register_qset)(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *tc_node); void (*unregister_qset)(struct irdma_sc_vsi *vsi, struct irdma_ws_node *tc_node); }; @@ -1198,29 +1201,27 @@ struct irdma_irq_ops { }; void irdma_sc_ccq_arm(struct irdma_sc_cq *ccq); -enum irdma_status_code irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, - bool check_overflow, bool post_sq); -enum irdma_status_code irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, - bool post_sq); -enum irdma_status_code irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, - struct irdma_ccq_cqe_info *info); -enum irdma_status_code irdma_sc_ccq_init(struct irdma_sc_cq *ccq, - struct irdma_ccq_init_info *info); +int irdma_sc_ccq_create(struct irdma_sc_cq *ccq, u64 scratch, + bool check_overflow, bool post_sq); +int irdma_sc_ccq_destroy(struct irdma_sc_cq *ccq, u64 scratch, bool post_sq); +int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq, + struct irdma_ccq_cqe_info *info); +int irdma_sc_ccq_init(struct irdma_sc_cq *ccq, + struct irdma_ccq_init_info *info); -enum irdma_status_code irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch); -enum irdma_status_code irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq); +int irdma_sc_cceq_create(struct irdma_sc_ceq *ceq, u64 scratch); +int irdma_sc_cceq_destroy_done(struct irdma_sc_ceq *ceq); -enum irdma_status_code irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, - bool post_sq); -enum irdma_status_code irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, - struct irdma_ceq_init_info *info); +int irdma_sc_ceq_destroy(struct irdma_sc_ceq *ceq, u64 scratch, bool post_sq); +int irdma_sc_ceq_init(struct irdma_sc_ceq *ceq, + struct irdma_ceq_init_info *info); void irdma_sc_cleanup_ceqes(struct irdma_sc_cq *cq, struct irdma_sc_ceq *ceq); void *irdma_sc_process_ceq(struct irdma_sc_dev *dev, struct irdma_sc_ceq *ceq); -enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, - struct irdma_aeq_init_info *info); -enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, - struct irdma_aeqe_info *info); +int irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, + struct irdma_aeq_init_info *info); +int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, + struct irdma_aeqe_info *info); void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count); void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id, @@ -1228,31 +1229,27 @@ void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_i void irdma_cfg_aeq(struct irdma_sc_dev *dev, u32 idx, bool enable); void irdma_check_cqp_progress(struct irdma_cqp_timeout *cqp_timeout, struct irdma_sc_dev *dev); -enum irdma_status_code irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, - u16 *min_err); -enum irdma_status_code irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp); -enum irdma_status_code irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, - struct irdma_cqp_init_info *info); +int irdma_sc_cqp_create(struct irdma_sc_cqp *cqp, u16 *maj_err, u16 *min_err); +int irdma_sc_cqp_destroy(struct irdma_sc_cqp *cqp); +int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp, + struct irdma_cqp_init_info *info); void irdma_sc_cqp_post_sq(struct irdma_sc_cqp *cqp); -enum irdma_status_code irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 opcode, - struct irdma_ccq_cqe_info *cmpl_info); -enum irdma_status_code irdma_sc_fast_register(struct irdma_sc_qp *qp, - struct irdma_fast_reg_stag_info *info, - bool post_sq); -enum irdma_status_code irdma_sc_qp_create(struct irdma_sc_qp *qp, - struct irdma_create_qp_info *info, - u64 scratch, bool post_sq); -enum irdma_status_code irdma_sc_qp_destroy(struct irdma_sc_qp *qp, - u64 scratch, bool remove_hash_idx, - bool ignore_mw_bnd, bool post_sq); -enum irdma_status_code irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, - struct irdma_qp_flush_info *info, - u64 scratch, bool post_sq); -enum irdma_status_code irdma_sc_qp_init(struct irdma_sc_qp *qp, - struct irdma_qp_init_info *info); -enum irdma_status_code irdma_sc_qp_modify(struct irdma_sc_qp *qp, - struct irdma_modify_qp_info *info, - u64 scratch, bool post_sq); +int irdma_sc_poll_for_cqp_op_done(struct irdma_sc_cqp *cqp, u8 opcode, + struct irdma_ccq_cqe_info *cmpl_info); +int irdma_sc_fast_register(struct irdma_sc_qp *qp, + struct irdma_fast_reg_stag_info *info, bool post_sq); +int irdma_sc_qp_create(struct irdma_sc_qp *qp, + struct irdma_create_qp_info *info, u64 scratch, + bool post_sq); +int irdma_sc_qp_destroy(struct irdma_sc_qp *qp, u64 scratch, + bool remove_hash_idx, bool ignore_mw_bnd, bool post_sq); +int irdma_sc_qp_flush_wqes(struct irdma_sc_qp *qp, + struct irdma_qp_flush_info *info, u64 scratch, + bool post_sq); +int irdma_sc_qp_init(struct irdma_sc_qp *qp, struct irdma_qp_init_info *info); +int irdma_sc_qp_modify(struct irdma_sc_qp *qp, + struct irdma_modify_qp_info *info, u64 scratch, + bool post_sq); void irdma_sc_send_lsmm(struct irdma_sc_qp *qp, void *lsmm_buf, u32 size, irdma_stag stag); @@ -1261,14 +1258,12 @@ void irdma_sc_qp_setctx(struct irdma_sc_qp *qp, __le64 *qp_ctx, struct irdma_qp_host_ctx_info *info); void irdma_sc_qp_setctx_roce(struct irdma_sc_qp *qp, __le64 *qp_ctx, struct irdma_qp_host_ctx_info *info); -enum irdma_status_code irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, - bool post_sq); -enum irdma_status_code irdma_sc_cq_init(struct irdma_sc_cq *cq, - struct irdma_cq_init_info *info); +int irdma_sc_cq_destroy(struct irdma_sc_cq *cq, u64 scratch, bool post_sq); +int irdma_sc_cq_init(struct irdma_sc_cq *cq, struct irdma_cq_init_info *info); void irdma_sc_cq_resize(struct irdma_sc_cq *cq, struct irdma_modify_cq_info *info); -enum irdma_status_code irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, - u64 scratch, u8 hmc_fn_id, - bool post_sq, bool poll_registers); +int irdma_sc_static_hmc_pages_allocated(struct irdma_sc_cqp *cqp, u64 scratch, + u8 hmc_fn_id, bool post_sq, + bool poll_registers); void sc_vsi_update_stats(struct irdma_sc_vsi *vsi); struct cqp_info { diff --git a/drivers/infiniband/hw/irdma/uda.c b/drivers/infiniband/hw/irdma/uda.c index 7a9988ddbd01..284cec2a74de 100644 --- a/drivers/infiniband/hw/irdma/uda.c +++ b/drivers/infiniband/hw/irdma/uda.c @@ -3,7 +3,6 @@ #include #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" @@ -18,16 +17,15 @@ * @op: Operation * @scratch: u64 saved to be used during cqp completion */ -enum irdma_status_code irdma_sc_access_ah(struct irdma_sc_cqp *cqp, - struct irdma_ah_info *info, - u32 op, u64 scratch) +int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info, + u32 op, u64 scratch) { __le64 *wqe; u64 qw1, qw2; wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) - return IRDMA_ERR_RING_FULL; + return -ENOMEM; set_64bit_val(wqe, 0, ether_addr_to_u64(info->mac_addr) << 16); qw1 = FIELD_PREP(IRDMA_UDA_CQPSQ_MAV_PDINDEXLO, info->pd_idx) | @@ -86,8 +84,7 @@ enum irdma_status_code irdma_sc_access_ah(struct irdma_sc_cqp *cqp, * irdma_create_mg_ctx() - create a mcg context * @info: multicast group context info */ -static enum irdma_status_code -irdma_create_mg_ctx(struct irdma_mcast_grp_info *info) +static void irdma_create_mg_ctx(struct irdma_mcast_grp_info *info) { struct irdma_mcast_grp_ctx_entry_info *entry_info = NULL; u8 idx = 0; /* index in the array */ @@ -106,8 +103,6 @@ irdma_create_mg_ctx(struct irdma_mcast_grp_info *info) ctx_idx++; } } - - return 0; } /** @@ -117,27 +112,24 @@ irdma_create_mg_ctx(struct irdma_mcast_grp_info *info) * @op: operation to perform * @scratch: u64 saved to be used during cqp completion */ -enum irdma_status_code irdma_access_mcast_grp(struct irdma_sc_cqp *cqp, - struct irdma_mcast_grp_info *info, - u32 op, u64 scratch) +int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp, + struct irdma_mcast_grp_info *info, u32 op, + u64 scratch) { __le64 *wqe; - enum irdma_status_code ret_code = 0; if (info->mg_id >= IRDMA_UDA_MAX_FSI_MGS) { ibdev_dbg(to_ibdev(cqp->dev), "WQE: mg_id out of range\n"); - return IRDMA_ERR_PARAM; + return -EINVAL; } wqe = irdma_sc_cqp_get_next_send_wqe(cqp, scratch); if (!wqe) { ibdev_dbg(to_ibdev(cqp->dev), "WQE: ring full\n"); - return IRDMA_ERR_RING_FULL; + return -ENOMEM; } - ret_code = irdma_create_mg_ctx(info); - if (ret_code) - return ret_code; + irdma_create_mg_ctx(info); set_64bit_val(wqe, 32, info->dma_mem_mc.pa); set_64bit_val(wqe, 16, @@ -198,8 +190,8 @@ static bool irdma_compare_mgs(struct irdma_mcast_grp_ctx_entry_info *entry1, * @ctx: Multcast group context * @mg: Multcast group info */ -enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, - struct irdma_mcast_grp_ctx_entry_info *mg) +int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, + struct irdma_mcast_grp_ctx_entry_info *mg) { u32 idx; bool free_entry_found = false; @@ -228,7 +220,7 @@ enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, return 0; } - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } /** @@ -239,8 +231,8 @@ enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, * Finds and removes a specific mulicast group from context, all * parameters must match to remove a multicast group. */ -enum irdma_status_code irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, - struct irdma_mcast_grp_ctx_entry_info *mg) +int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, + struct irdma_mcast_grp_ctx_entry_info *mg) { u32 idx; @@ -269,5 +261,5 @@ enum irdma_status_code irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, } } - return IRDMA_ERR_PARAM; + return -EINVAL; } diff --git a/drivers/infiniband/hw/irdma/uda.h b/drivers/infiniband/hw/irdma/uda.h index a4ad0367dc96..fe4820ff0cca 100644 --- a/drivers/infiniband/hw/irdma/uda.h +++ b/drivers/infiniband/hw/irdma/uda.h @@ -32,56 +32,54 @@ struct irdma_sc_ah { struct irdma_ah_info ah_info; }; -enum irdma_status_code irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, - struct irdma_mcast_grp_ctx_entry_info *mg); -enum irdma_status_code irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, - struct irdma_mcast_grp_ctx_entry_info *mg); -enum irdma_status_code irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info, - u32 op, u64 scratch); -enum irdma_status_code irdma_access_mcast_grp(struct irdma_sc_cqp *cqp, - struct irdma_mcast_grp_info *info, - u32 op, u64 scratch); +int irdma_sc_add_mcast_grp(struct irdma_mcast_grp_info *ctx, + struct irdma_mcast_grp_ctx_entry_info *mg); +int irdma_sc_del_mcast_grp(struct irdma_mcast_grp_info *ctx, + struct irdma_mcast_grp_ctx_entry_info *mg); +int irdma_sc_access_ah(struct irdma_sc_cqp *cqp, struct irdma_ah_info *info, + u32 op, u64 scratch); +int irdma_access_mcast_grp(struct irdma_sc_cqp *cqp, + struct irdma_mcast_grp_info *info, u32 op, + u64 scratch); static inline void irdma_sc_init_ah(struct irdma_sc_dev *dev, struct irdma_sc_ah *ah) { ah->dev = dev; } -static inline enum irdma_status_code irdma_sc_create_ah(struct irdma_sc_cqp *cqp, - struct irdma_ah_info *info, - u64 scratch) +static inline int irdma_sc_create_ah(struct irdma_sc_cqp *cqp, + struct irdma_ah_info *info, u64 scratch) { return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_CREATE_ADDR_HANDLE, scratch); } -static inline enum irdma_status_code irdma_sc_destroy_ah(struct irdma_sc_cqp *cqp, - struct irdma_ah_info *info, - u64 scratch) +static inline int irdma_sc_destroy_ah(struct irdma_sc_cqp *cqp, + struct irdma_ah_info *info, u64 scratch) { return irdma_sc_access_ah(cqp, info, IRDMA_CQP_OP_DESTROY_ADDR_HANDLE, scratch); } -static inline enum irdma_status_code irdma_sc_create_mcast_grp(struct irdma_sc_cqp *cqp, - struct irdma_mcast_grp_info *info, - u64 scratch) +static inline int irdma_sc_create_mcast_grp(struct irdma_sc_cqp *cqp, + struct irdma_mcast_grp_info *info, + u64 scratch) { return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_CREATE_MCAST_GRP, scratch); } -static inline enum irdma_status_code irdma_sc_modify_mcast_grp(struct irdma_sc_cqp *cqp, - struct irdma_mcast_grp_info *info, - u64 scratch) +static inline int irdma_sc_modify_mcast_grp(struct irdma_sc_cqp *cqp, + struct irdma_mcast_grp_info *info, + u64 scratch) { return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_MODIFY_MCAST_GRP, scratch); } -static inline enum irdma_status_code irdma_sc_destroy_mcast_grp(struct irdma_sc_cqp *cqp, - struct irdma_mcast_grp_info *info, - u64 scratch) +static inline int irdma_sc_destroy_mcast_grp(struct irdma_sc_cqp *cqp, + struct irdma_mcast_grp_info *info, + u64 scratch) { return irdma_access_mcast_grp(cqp, info, IRDMA_CQP_OP_DESTROY_MCAST_GRP, scratch); diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 57a9444e9ea7..daeab5daed5b 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2015 - 2021 Intel Corporation */ #include "osdep.h" -#include "status.h" #include "defs.h" #include "user.h" #include "irdma.h" @@ -56,7 +55,7 @@ static void irdma_set_fragment_gen_1(__le64 *wqe, u32 offset, * irdma_nop_1 - insert a NOP wqe * @qp: hw qp ptr */ -static enum irdma_status_code irdma_nop_1(struct irdma_qp_uk *qp) +static int irdma_nop_1(struct irdma_qp_uk *qp) { u64 hdr; __le64 *wqe; @@ -64,7 +63,7 @@ static enum irdma_status_code irdma_nop_1(struct irdma_qp_uk *qp) bool signaled = false; if (!qp->sq_ring.head) - return IRDMA_ERR_PARAM; + return -EINVAL; wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); wqe = qp->sq_base[wqe_idx].elem; @@ -245,7 +244,7 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx) { __le64 *wqe; - enum irdma_status_code ret_code; + int ret_code; if (IRDMA_RING_FULL_ERR(qp->rq_ring)) return NULL; @@ -268,16 +267,15 @@ __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx) * @info: post sq information * @post_sq: flag to post sq */ -enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq) +int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq) { u64 hdr; __le64 *wqe; struct irdma_rdma_write *op_info; u32 i, wqe_idx; u32 total_size = 0, byte_off; - enum irdma_status_code ret_code; + int ret_code; u32 frag_cnt, addl_frag_cnt; bool read_fence = false; u16 quanta; @@ -286,7 +284,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, op_info = &info->op.rdma_write; if (op_info->num_lo_sges > qp->max_sq_frag_cnt) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; for (i = 0; i < op_info->num_lo_sges; i++) total_size += op_info->lo_sg_list[i].length; @@ -305,7 +303,7 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -370,12 +368,11 @@ enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, * @inv_stag: flag for inv_stag * @post_sq: flag to post sq */ -enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool inv_stag, bool post_sq) +int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool inv_stag, bool post_sq) { struct irdma_rdma_read *op_info; - enum irdma_status_code ret_code; + int ret_code; u32 i, byte_off, total_size = 0; bool local_fence = false; u32 addl_frag_cnt; @@ -388,7 +385,7 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, op_info = &info->op.rdma_read; if (qp->max_sq_frag_cnt < op_info->num_lo_sges) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; for (i = 0; i < op_info->num_lo_sges; i++) total_size += op_info->lo_sg_list[i].length; @@ -400,7 +397,7 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -457,15 +454,14 @@ enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, * @info: post sq information * @post_sq: flag to post sq */ -enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq) +int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq) { __le64 *wqe; struct irdma_post_send *op_info; u64 hdr; u32 i, wqe_idx, total_size = 0, byte_off; - enum irdma_status_code ret_code; + int ret_code; u32 frag_cnt, addl_frag_cnt; bool read_fence = false; u16 quanta; @@ -474,7 +470,7 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, op_info = &info->op.send; if (qp->max_sq_frag_cnt < op_info->num_sges) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; for (i = 0; i < op_info->num_sges; i++) total_size += op_info->sg_list[i].length; @@ -490,7 +486,7 @@ enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -678,9 +674,8 @@ static u16 irdma_inline_data_size_to_quanta(u32 data_size) * @info: post sq information * @post_sq: flag to post sq */ -enum irdma_status_code -irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, - bool post_sq) +int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_inline_rdma_write *op_info; @@ -693,13 +688,13 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *in op_info = &info->op.inline_rdma_write; if (op_info->len > qp->max_inline_data) - return IRDMA_ERR_INVALID_INLINE_DATA_SIZE; + return -EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len); wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -745,9 +740,8 @@ irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *in * @info: post sq information * @post_sq: flag to post sq */ -enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq) +int irdma_uk_inline_send(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; struct irdma_post_inline_send *op_info; @@ -760,13 +754,13 @@ enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, op_info = &info->op.inline_send; if (op_info->len > qp->max_inline_data) - return IRDMA_ERR_INVALID_INLINE_DATA_SIZE; + return -EINVAL; quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len); wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -817,9 +811,9 @@ enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, * @info: post sq information * @post_sq: flag to post sq */ -enum irdma_status_code -irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, bool post_sq) +int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, + bool post_sq) { __le64 *wqe; struct irdma_inv_local_stag *op_info; @@ -835,7 +829,7 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 0, info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -871,8 +865,8 @@ irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, * @qp: hw qp ptr * @info: post rq information */ -enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, - struct irdma_post_rq_info *info) +int irdma_uk_post_receive(struct irdma_qp_uk *qp, + struct irdma_post_rq_info *info) { u32 wqe_idx, i, byte_off; u32 addl_frag_cnt; @@ -880,11 +874,11 @@ enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, u64 hdr; if (qp->max_rq_frag_cnt < info->num_sges) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; qp->rq_wrid_array[wqe_idx] = info->wr_id; addl_frag_cnt = info->num_sges > 1 ? (info->num_sges - 1) : 0; @@ -1000,15 +994,15 @@ void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, * @cq: hw cq * @info: cq poll information returned */ -enum irdma_status_code -irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) +int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, + struct irdma_cq_poll_info *info) { u64 comp_ctx, qword0, qword2, qword3; __le64 *cqe; struct irdma_qp_uk *qp; struct irdma_ring *pring = NULL; u32 wqe_idx, q_type; - enum irdma_status_code ret_code; + int ret_code; bool move_cq_head = true; u8 polarity; bool ext_valid; @@ -1022,7 +1016,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) get_64bit_val(cqe, 24, &qword3); polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3); if (polarity != cq->polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; /* Ensure CQE contents are read after valid bit is checked */ dma_rmb(); @@ -1045,7 +1039,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) polarity ^= 1; } if (polarity != cq->polarity) - return IRDMA_ERR_Q_EMPTY; + return -ENOENT; /* Ensure ext CQE contents are read after ext valid bit is checked */ dma_rmb(); @@ -1112,7 +1106,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) info->solicited_event = (bool)FIELD_GET(IRDMACQ_SOEVENT, qword3); qp = (struct irdma_qp_uk *)(unsigned long)comp_ctx; if (!qp || qp->destroy_pending) { - ret_code = IRDMA_ERR_Q_DESTROYED; + ret_code = -EFAULT; goto exit; } wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); @@ -1126,7 +1120,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) if (info->comp_status == IRDMA_COMPL_STATUS_FLUSHED || info->comp_status == IRDMA_COMPL_STATUS_UNKNOWN) { if (!IRDMA_RING_MORE_WORK(qp->rq_ring)) { - ret_code = IRDMA_ERR_Q_EMPTY; + ret_code = -ENOENT; goto exit; } @@ -1186,7 +1180,7 @@ irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, struct irdma_cq_poll_info *info) wqe_idx + qp->sq_wrtrk_array[wqe_idx].quanta); } else { if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) { - ret_code = IRDMA_ERR_Q_EMPTY; + ret_code = -ENOENT; goto exit; } @@ -1303,15 +1297,15 @@ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, * @sqdepth: depth of SQ * */ -enum irdma_status_code irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, - u32 sq_size, u8 shift, u32 *sqdepth) +int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, + u32 *sqdepth) { *sqdepth = irdma_qp_round_up((sq_size << shift) + IRDMA_SQ_RSVD); if (*sqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift)) *sqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift; else if (*sqdepth > uk_attrs->max_hw_wq_quanta) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; return 0; } @@ -1323,15 +1317,15 @@ enum irdma_status_code irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, * @shift: shift which determines size of WQE * @rqdepth: depth of RQ */ -enum irdma_status_code irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, - u32 rq_size, u8 shift, u32 *rqdepth) +int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, + u32 *rqdepth) { *rqdepth = irdma_qp_round_up((rq_size << shift) + IRDMA_RQ_RSVD); if (*rqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift)) *rqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift; else if (*rqdepth > uk_attrs->max_hw_rq_quanta) - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; return 0; } @@ -1381,17 +1375,16 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, * allowed. Then size of wqe * the number of wqes should be the * amount of memory allocated for sq and rq. */ -enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp, - struct irdma_qp_uk_init_info *info) +int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { - enum irdma_status_code ret_code = 0; + int ret_code = 0; u32 sq_ring_size; u8 sqshift, rqshift; qp->uk_attrs = info->uk_attrs; if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags || info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags) - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; irdma_get_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, 0, &rqshift); if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) { @@ -1502,8 +1495,7 @@ void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq) * @signaled: signaled for completion * @post_sq: ring doorbell */ -enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, - bool signaled, bool post_sq) +int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq) { __le64 *wqe; u64 hdr; @@ -1515,7 +1507,7 @@ enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 0, &info); if (!wqe) - return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; + return -ENOMEM; irdma_clr_wqes(qp, wqe_idx); @@ -1541,7 +1533,7 @@ enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, * @frag_cnt: number of fragments * @quanta: quanta for frag_cnt */ -enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) +int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) { switch (frag_cnt) { case 0: @@ -1577,7 +1569,7 @@ enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) *quanta = 8; break; default: - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; } return 0; @@ -1588,7 +1580,7 @@ enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta) * @frag_cnt: number of fragments * @wqe_size: size in bytes given frag_cnt */ -enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size) +int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size) { switch (frag_cnt) { case 0: @@ -1615,7 +1607,7 @@ enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size) *wqe_size = 256; break; default: - return IRDMA_ERR_INVALID_FRAG_COUNT; + return -EINVAL; } return 0; diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 3c811fb88404..ddd0ebbdd7d5 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -270,29 +270,24 @@ struct irdma_cq_poll_info { bool imm_valid:1; }; -enum irdma_status_code irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq); -enum irdma_status_code irdma_uk_inline_send(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq); - -enum irdma_status_code irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, - bool signaled, bool post_sq); -enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, - struct irdma_post_rq_info *info); +int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq); +int irdma_uk_inline_send(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, bool post_sq); +int irdma_uk_post_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, + bool post_sq); +int irdma_uk_post_receive(struct irdma_qp_uk *qp, + struct irdma_post_rq_info *info); void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp); -enum irdma_status_code irdma_uk_rdma_read(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool inv_stag, bool post_sq); -enum irdma_status_code irdma_uk_rdma_write(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq); -enum irdma_status_code irdma_uk_send(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, bool post_sq); -enum irdma_status_code irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, - struct irdma_post_sq_info *info, - bool post_sq); +int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool inv_stag, bool post_sq); +int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq); +int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, + bool post_sq); +int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, + struct irdma_post_sq_info *info, + bool post_sq); struct irdma_wqe_uk_ops { void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity); @@ -303,16 +298,16 @@ struct irdma_wqe_uk_ops { struct irdma_bind_window *op_info); }; -enum irdma_status_code irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, - struct irdma_cq_poll_info *info); +int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, + struct irdma_cq_poll_info *info); void irdma_uk_cq_request_notification(struct irdma_cq_uk *cq, enum irdma_cmpl_notify cq_notify); void irdma_uk_cq_resize(struct irdma_cq_uk *cq, void *cq_base, int size); void irdma_uk_cq_set_resized_cnt(struct irdma_cq_uk *qp, u16 cnt); void irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info); -enum irdma_status_code irdma_uk_qp_init(struct irdma_qp_uk *qp, - struct irdma_qp_uk_init_info *info); +int irdma_uk_qp_init(struct irdma_qp_uk *qp, + struct irdma_qp_uk_init_info *info); struct irdma_sq_uk_wr_trk_info { u64 wrid; u32 wr_len; @@ -413,16 +408,15 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, struct irdma_post_sq_info *info); __le64 *irdma_qp_get_next_recv_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx); void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq); -enum irdma_status_code irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, - bool signaled, bool post_sq); -enum irdma_status_code irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta); -enum irdma_status_code irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size); +int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq); +int irdma_fragcnt_to_quanta_sq(u32 frag_cnt, u16 *quanta); +int irdma_fragcnt_to_wqesize_rq(u32 frag_cnt, u16 *wqe_size); void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, u32 inline_data, u8 *shift); -enum irdma_status_code irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, - u32 sq_size, u8 shift, u32 *wqdepth); -enum irdma_status_code irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, - u32 rq_size, u8 shift, u32 *wqdepth); +int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, + u32 *wqdepth); +int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, + u32 *wqdepth); void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, u32 wqe_idx, bool post_sq); void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 398736d8c78a..346c2c5dabdf 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -150,31 +150,35 @@ int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct in_ifaddr *ifa = ptr; - struct net_device *netdev = ifa->ifa_dev->dev; + struct net_device *real_dev, *netdev = ifa->ifa_dev->dev; struct irdma_device *iwdev; struct ib_device *ibdev; u32 local_ipaddr; - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; iwdev = to_iwdev(ibdev); local_ipaddr = ntohl(ifa->ifa_address); ibdev_dbg(&iwdev->ibdev, - "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", netdev, - event, &local_ipaddr, netdev->dev_addr); + "DEV: netdev %p event %lu local_ip=%pI4 MAC=%pM\n", real_dev, + event, &local_ipaddr, real_dev->dev_addr); switch (event) { case NETDEV_DOWN: - irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr, + irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr, &local_ipaddr, true, IRDMA_ARP_DELETE); - irdma_if_notify(iwdev, netdev, &local_ipaddr, true, false); + irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, false); irdma_gid_change_event(&iwdev->ibdev); break; case NETDEV_UP: case NETDEV_CHANGEADDR: - irdma_add_arp(iwdev->rf, &local_ipaddr, true, netdev->dev_addr); - irdma_if_notify(iwdev, netdev, &local_ipaddr, true, true); + irdma_add_arp(iwdev->rf, &local_ipaddr, true, real_dev->dev_addr); + irdma_if_notify(iwdev, real_dev, &local_ipaddr, true, true); irdma_gid_change_event(&iwdev->ibdev); break; default: @@ -196,32 +200,36 @@ int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct inet6_ifaddr *ifa = ptr; - struct net_device *netdev = ifa->idev->dev; + struct net_device *real_dev, *netdev = ifa->idev->dev; struct irdma_device *iwdev; struct ib_device *ibdev; u32 local_ipaddr6[4]; - ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; iwdev = to_iwdev(ibdev); irdma_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32); ibdev_dbg(&iwdev->ibdev, - "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", netdev, - event, local_ipaddr6, netdev->dev_addr); + "DEV: netdev %p event %lu local_ip=%pI6 MAC=%pM\n", real_dev, + event, local_ipaddr6, real_dev->dev_addr); switch (event) { case NETDEV_DOWN: - irdma_manage_arp_cache(iwdev->rf, netdev->dev_addr, + irdma_manage_arp_cache(iwdev->rf, real_dev->dev_addr, local_ipaddr6, false, IRDMA_ARP_DELETE); - irdma_if_notify(iwdev, netdev, local_ipaddr6, false, false); + irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, false); irdma_gid_change_event(&iwdev->ibdev); break; case NETDEV_UP: case NETDEV_CHANGEADDR: irdma_add_arp(iwdev->rf, local_ipaddr6, false, - netdev->dev_addr); - irdma_if_notify(iwdev, netdev, local_ipaddr6, false, true); + real_dev->dev_addr); + irdma_if_notify(iwdev, real_dev, local_ipaddr6, false, true); irdma_gid_change_event(&iwdev->ibdev); break; default: @@ -243,14 +251,18 @@ int irdma_net_event(struct notifier_block *notifier, unsigned long event, void *ptr) { struct neighbour *neigh = ptr; + struct net_device *real_dev, *netdev = (struct net_device *)neigh->dev; struct irdma_device *iwdev; struct ib_device *ibdev; __be32 *p; u32 local_ipaddr[4] = {}; bool ipv4 = true; - ibdev = ib_device_get_by_netdev((struct net_device *)neigh->dev, - RDMA_DRIVER_IRDMA); + real_dev = rdma_vlan_dev_real_dev(netdev); + if (!real_dev) + real_dev = netdev; + + ibdev = ib_device_get_by_netdev(real_dev, RDMA_DRIVER_IRDMA); if (!ibdev) return NOTIFY_DONE; @@ -551,12 +563,12 @@ void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf) * @rf: RDMA PCI function * @cqp_request: cqp request to wait */ -static enum irdma_status_code irdma_wait_event(struct irdma_pci_f *rf, - struct irdma_cqp_request *cqp_request) +static int irdma_wait_event(struct irdma_pci_f *rf, + struct irdma_cqp_request *cqp_request) { struct irdma_cqp_timeout cqp_timeout = {}; bool cqp_error = false; - enum irdma_status_code err_code = 0; + int err_code = 0; cqp_timeout.compl_cqp_cmds = rf->sc_dev.cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]; do { @@ -575,12 +587,12 @@ static enum irdma_status_code irdma_wait_event(struct irdma_pci_f *rf, rf->reset = true; rf->gen_ops.request_reset(rf); } - return IRDMA_ERR_TIMEOUT; + return -ETIMEDOUT; } while (1); cqp_error = cqp_request->compl_info.error; if (cqp_error) { - err_code = IRDMA_ERR_CQP_COMPL_ERROR; + err_code = -EIO; if (cqp_request->compl_info.maj_err_code == 0xFFFF && cqp_request->compl_info.min_err_code == 0x8029) { if (!rf->reset) { @@ -680,16 +692,16 @@ bool irdma_cqp_crit_err(struct irdma_sc_dev *dev, u8 cqp_cmd, * @rf: RDMA PCI function * @cqp_request: cqp request to process */ -enum irdma_status_code irdma_handle_cqp_op(struct irdma_pci_f *rf, - struct irdma_cqp_request *cqp_request) +int irdma_handle_cqp_op(struct irdma_pci_f *rf, + struct irdma_cqp_request *cqp_request) { struct irdma_sc_dev *dev = &rf->sc_dev; struct cqp_cmds_info *info = &cqp_request->info; - enum irdma_status_code status; + int status; bool put_cqp_request = true; if (rf->reset) - return IRDMA_ERR_NOT_READY; + return -EBUSY; irdma_get_cqp_request(cqp_request); status = irdma_process_cqp_cmd(dev, info); @@ -791,17 +803,17 @@ void *irdma_remove_cqp_head(struct irdma_sc_dev *dev) * @sdinfo: information for sd cqp * */ -enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, - struct irdma_update_sds_info *sdinfo) +int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, + struct irdma_update_sds_info *sdinfo) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; memcpy(&cqp_info->in.u.update_pe_sds.info, sdinfo, @@ -822,19 +834,18 @@ enum irdma_status_code irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, * @qp: hardware control qp * @op: suspend or resume */ -enum irdma_status_code irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, - u8 op) +int irdma_cqp_qp_suspend_resume(struct irdma_sc_qp *qp, u8 op) { struct irdma_sc_dev *dev = qp->dev; struct irdma_cqp_request *cqp_request; struct irdma_sc_cqp *cqp = dev->cqp; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, false); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = op; @@ -940,18 +951,17 @@ void irdma_terminate_del_timer(struct irdma_sc_qp *qp) * @val_mem: buffer for fpm * @hmc_fn_id: function id for fpm */ -enum irdma_status_code -irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id) +int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_request->param = NULL; @@ -975,18 +985,17 @@ irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, * @val_mem: buffer with fpm values * @hmc_fn_id: function id for fpm */ -enum irdma_status_code -irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id) +int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, + struct irdma_dma_mem *val_mem, u8 hmc_fn_id) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_request->param = NULL; @@ -1009,18 +1018,17 @@ irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, * @dev: device pointer * @cq: pointer to created cq */ -enum irdma_status_code irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_cq *cq) +int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->cqp_cmd = IRDMA_OP_CQ_CREATE; @@ -1039,19 +1047,18 @@ enum irdma_status_code irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, * @dev: device pointer * @qp: pointer to created qp */ -enum irdma_status_code irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_qp *qp) +int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_create_qp_info *qp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; qp_info = &cqp_request->info.in.u.qp_create.info; @@ -1079,7 +1086,7 @@ static void irdma_dealloc_push_page(struct irdma_pci_f *rf, { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; if (qp->push_idx == IRDMA_INVALID_PUSH_PAGE_INDEX) return; @@ -1179,12 +1186,10 @@ static void irdma_hw_modify_qp_callback(struct irdma_cqp_request *cqp_request) * @info: info for modify qp * @wait: flag to wait or not for modify qp completion */ -enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev, - struct irdma_qp *iwqp, - struct irdma_modify_qp_info *info, - bool wait) +int irdma_hw_modify_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, + struct irdma_modify_qp_info *info, bool wait) { - enum irdma_status_code status; + int status; struct irdma_pci_f *rf = iwdev->rf; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; @@ -1192,7 +1197,7 @@ enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev, cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; if (!wait) { cqp_request->callback_fcn = irdma_hw_modify_qp_callback; @@ -1230,7 +1235,7 @@ enum irdma_status_code irdma_hw_modify_qp(struct irdma_device *iwdev, cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; m_info = &cqp_info->in.u.qp_modify.info; @@ -1271,17 +1276,17 @@ void irdma_cqp_cq_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq) * @dev: device pointer * @qp: pointer to qp */ -enum irdma_status_code irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) +int irdma_cqp_qp_destroy_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); @@ -1317,20 +1322,20 @@ void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp) * irdma_init_hash_desc - initialize hash for crc calculation * @desc: cryption type */ -enum irdma_status_code irdma_init_hash_desc(struct shash_desc **desc) +int irdma_init_hash_desc(struct shash_desc **desc) { struct crypto_shash *tfm; struct shash_desc *tdesc; tfm = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(tfm)) - return IRDMA_ERR_MPA_CRC; + return -EINVAL; tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm), GFP_KERNEL); if (!tdesc) { crypto_free_shash(tfm); - return IRDMA_ERR_MPA_CRC; + return -EINVAL; } tdesc->tfm = tfm; @@ -1358,19 +1363,19 @@ void irdma_free_hash_desc(struct shash_desc *desc) * @len: length of buffer * @val: value to be compared */ -enum irdma_status_code irdma_ieq_check_mpacrc(struct shash_desc *desc, - void *addr, u32 len, u32 val) +int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, + u32 val) { u32 crc = 0; int ret; - enum irdma_status_code ret_code = 0; + int ret_code = 0; crypto_shash_init(desc); ret = crypto_shash_update(desc, addr, len); if (!ret) crypto_shash_final(desc, (u8 *)&crc); if (crc != val) - ret_code = IRDMA_ERR_MPA_CRC; + ret_code = -EINVAL; return ret_code; } @@ -1524,9 +1529,8 @@ void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, * @info: to get information * @buf: puda buffer */ -static enum irdma_status_code -irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, - struct irdma_puda_buf *buf) +static int irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, + struct irdma_puda_buf *buf) { struct iphdr *iph; struct ipv6hdr *ip6h; @@ -1563,7 +1567,7 @@ irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, ibdev_dbg(to_ibdev(buf->vsi->dev), "ERR: payload_len = 0x%x totallen expected0x%x\n", info->payload_len, buf->totallen); - return IRDMA_ERR_INVALID_SIZE; + return -EINVAL; } buf->tcphlen = tcph->doff << 2; @@ -1580,9 +1584,8 @@ irdma_gen1_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, * @info: to get information * @buf: puda buffer */ -enum irdma_status_code -irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, - struct irdma_puda_buf *buf) +int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info, + struct irdma_puda_buf *buf) { struct tcphdr *tcph; u32 pkt_len; @@ -1861,20 +1864,19 @@ static void irdma_process_cqp_stats(struct irdma_cqp_request *cqp_request) * @pestat: pointer to stats info * @wait: flag to wait or not wait for stats */ -enum irdma_status_code -irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, - struct irdma_vsi_pestat *pestat, bool wait) +int irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, + struct irdma_vsi_pestat *pestat, bool wait) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); @@ -1900,22 +1902,21 @@ irdma_cqp_gather_stats_cmd(struct irdma_sc_dev *dev, * @cmd: command to allocate or free * @stats_info: pointer to allocate stats info */ -enum irdma_status_code -irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, - struct irdma_stats_inst_info *stats_info) +int irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, + struct irdma_stats_inst_info *stats_info) { struct irdma_pci_f *rf = dev_to_rf(vsi->dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; bool wait = false; if (cmd == IRDMA_OP_STATS_ALLOCATE) wait = true; cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, wait); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); @@ -1938,17 +1939,17 @@ irdma_cqp_stats_inst_cmd(struct irdma_sc_vsi *vsi, u8 cmd, * @sc_ceq: pointer to ceq structure * @op: Create or Destroy */ -enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_ceq *sc_ceq, u8 op) +int irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_ceq *sc_ceq, + u8 op) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->post_sq = 1; @@ -1968,17 +1969,17 @@ enum irdma_status_code irdma_cqp_ceq_cmd(struct irdma_sc_dev *dev, * @sc_aeq: pointer to aeq structure * @op: Create or Destroy */ -enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, - struct irdma_sc_aeq *sc_aeq, u8 op) +int irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, struct irdma_sc_aeq *sc_aeq, + u8 op) { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_pci_f *rf = dev_to_rf(dev); - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; cqp_info->post_sq = 1; @@ -1998,16 +1999,15 @@ enum irdma_status_code irdma_cqp_aeq_cmd(struct irdma_sc_dev *dev, * @cmd: Add, modify or delete * @node_info: pointer to ws node info */ -enum irdma_status_code -irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, - struct irdma_ws_node_info *node_info) +int irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, + struct irdma_ws_node_info *node_info) { struct irdma_pci_f *rf = dev_to_rf(dev); struct irdma_cqp *iwcqp = &rf->cqp; struct irdma_sc_cqp *cqp = &iwcqp->sc_cqp; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; bool poll; if (!rf->sc_dev.ceq_valid) @@ -2017,7 +2017,7 @@ irdma_cqp_ws_node_cmd(struct irdma_sc_dev *dev, u8 cmd, cqp_request = irdma_alloc_and_get_cqp_request(iwcqp, !poll); if (!cqp_request) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; cqp_info = &cqp_request->info; memset(cqp_info, 0, sizeof(*cqp_info)); @@ -2066,7 +2066,7 @@ int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd, { struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; if (cmd != IRDMA_OP_AH_CREATE && cmd != IRDMA_OP_AH_DESTROY) return -EINVAL; @@ -2148,11 +2148,10 @@ static void irdma_ilq_ah_cb(struct irdma_cqp_request *cqp_request) * @ah_ret: Returned pointer to address handle if created * */ -enum irdma_status_code irdma_puda_create_ah(struct irdma_sc_dev *dev, - struct irdma_ah_info *ah_info, - bool wait, enum puda_rsrc_type type, - void *cb_param, - struct irdma_sc_ah **ah_ret) +int irdma_puda_create_ah(struct irdma_sc_dev *dev, + struct irdma_ah_info *ah_info, bool wait, + enum puda_rsrc_type type, void *cb_param, + struct irdma_sc_ah **ah_ret) { struct irdma_sc_ah *ah; struct irdma_pci_f *rf = dev_to_rf(dev); @@ -2161,7 +2160,7 @@ enum irdma_status_code irdma_puda_create_ah(struct irdma_sc_dev *dev, ah = kzalloc(sizeof(*ah), GFP_ATOMIC); *ah_ret = ah; if (!ah) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah_info->ah_idx, &rf->next_ah); @@ -2187,7 +2186,7 @@ error: err_free: kfree(ah); *ah_ret = NULL; - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } /** @@ -2229,19 +2228,19 @@ void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request) * @pprm: pble resource manager * @pchunk: chunk of memory to add */ -enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, - struct irdma_chunk *pchunk) +int irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, + struct irdma_chunk *pchunk) { u64 sizeofbitmap; if (pchunk->size & 0xfff) - return IRDMA_ERR_PARAM; + return -EINVAL; sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift; pchunk->bitmapbuf = bitmap_zalloc(sizeofbitmap, GFP_KERNEL); if (!pchunk->bitmapbuf) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; pchunk->sizeofbitmap = sizeofbitmap; /* each pble is 8 bytes hence shift by 3 */ @@ -2259,10 +2258,9 @@ enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm, * @vaddr: returns virtual address of pble memory * @fpm_addr: returns fpm address of pble memory */ -enum irdma_status_code -irdma_prm_get_pbles(struct irdma_pble_prm *pprm, - struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size, - u64 **vaddr, u64 *fpm_addr) +int irdma_prm_get_pbles(struct irdma_pble_prm *pprm, + struct irdma_pble_chunkinfo *chunkinfo, u64 mem_size, + u64 **vaddr, u64 *fpm_addr) { u64 bits_needed; u64 bit_idx = PBLE_INVALID_IDX; @@ -2290,7 +2288,7 @@ irdma_prm_get_pbles(struct irdma_pble_prm *pprm, if (!pchunk || bit_idx >= pchunk->sizeofbitmap) { spin_unlock_irqrestore(&pprm->prm_lock, flags); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } bitmap_set(pchunk->bitmapbuf, bit_idx, bits_needed); @@ -2325,8 +2323,8 @@ void irdma_prm_return_pbles(struct irdma_pble_prm *pprm, spin_unlock_irqrestore(&pprm->prm_lock, flags); } -enum irdma_status_code irdma_map_vm_page_list(struct irdma_hw *hw, void *va, - dma_addr_t *pg_dma, u32 pg_cnt) +int irdma_map_vm_page_list(struct irdma_hw *hw, void *va, dma_addr_t *pg_dma, + u32 pg_cnt) { struct page *vm_page; int i; @@ -2350,7 +2348,7 @@ enum irdma_status_code irdma_map_vm_page_list(struct irdma_hw *hw, void *va, err: irdma_unmap_vm_page_list(hw, pg_dma, i); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } void irdma_unmap_vm_page_list(struct irdma_hw *hw, dma_addr_t *pg_dma, u32 pg_cnt) @@ -2386,15 +2384,14 @@ done: * @chunk: chunk to add for paged memory * @pg_cnt: number of pages needed */ -enum irdma_status_code irdma_pble_get_paged_mem(struct irdma_chunk *chunk, - u32 pg_cnt) +int irdma_pble_get_paged_mem(struct irdma_chunk *chunk, u32 pg_cnt) { u32 size; void *va; chunk->dmainfo.dmaaddrs = kzalloc(pg_cnt << 3, GFP_KERNEL); if (!chunk->dmainfo.dmaaddrs) - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; size = PAGE_SIZE * pg_cnt; va = vmalloc(size); @@ -2416,7 +2413,7 @@ err: kfree(chunk->dmainfo.dmaaddrs); chunk->dmainfo.dmaaddrs = NULL; - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } /** diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 460e757d3fe6..46f475394af5 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -256,7 +256,7 @@ static void irdma_alloc_push_page(struct irdma_qp *iwqp) struct cqp_cmds_info *cqp_info; struct irdma_device *iwdev = iwqp->iwdev; struct irdma_sc_qp *qp = &iwqp->sc_qp; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) @@ -592,7 +592,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, u32 sqdepth, rqdepth; u8 sqshift, rqshift; u32 size; - enum irdma_status_code status; + int status; struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; @@ -603,7 +603,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, status = irdma_get_sqdepth(uk_attrs, ukinfo->sq_size, sqshift, &sqdepth); if (status) - return -ENOMEM; + return status; if (uk_attrs->hw_rev == IRDMA_GEN_1) rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; @@ -614,7 +614,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, status = irdma_get_rqdepth(uk_attrs, ukinfo->rq_size, rqshift, &rqdepth); if (status) - return -ENOMEM; + return status; iwqp->kqp.sq_wrid_mem = kcalloc(sqdepth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); @@ -668,7 +668,7 @@ static int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp) struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_create_qp_info *qp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); if (!cqp_request) @@ -688,7 +688,7 @@ static int irdma_cqp_create_qp_cmd(struct irdma_qp *iwqp) status = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); - return status ? -ENOMEM : 0; + return status; } static void irdma_roce_fill_and_set_qpctx_info(struct irdma_qp *iwqp, @@ -806,7 +806,6 @@ static int irdma_create_qp(struct ib_qp *ibqp, struct irdma_create_qp_req req; struct irdma_create_qp_resp uresp = {}; u32 qp_num = 0; - enum irdma_status_code ret; int err_code; int sq_size; int rq_size; @@ -936,9 +935,8 @@ static int irdma_create_qp(struct ib_qp *ibqp, if (dev->hw_attrs.uk_attrs.hw_rev > IRDMA_GEN_1) init_info.qp_uk_init_info.qp_caps |= IRDMA_PUSH_MODE; - ret = irdma_sc_qp_init(qp, &init_info); - if (ret) { - err_code = -EPROTO; + err_code = irdma_sc_qp_init(qp, &init_info); + if (err_code) { ibdev_dbg(&iwdev->ibdev, "VERBS: qp_init fail\n"); goto error; } @@ -1189,7 +1187,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (ret) return ret; - if (vlan_id >= VLAN_N_VID && iwdev->dcb) + if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) vlan_id = 0; if (vlan_id < VLAN_N_VID) { udp_info->insert_vlan_tag = true; @@ -1202,7 +1200,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, av->attrs = attr->ah_attr; rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid); - if (av->sgid_addr.saddr.sa_family == AF_INET6) { + if (av->net_type == RDMA_NETWORK_IPV6) { __be32 *daddr = av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32; __be32 *saddr = @@ -1218,7 +1216,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, &local_ip[0], false, NULL, IRDMA_ARP_RESOLVE); - } else { + } else if (av->net_type == RDMA_NETWORK_IPV4) { __be32 saddr = av->sgid_addr.saddr_in.sin_addr.s_addr; __be32 daddr = av->dgid_addr.saddr_in.sin_addr.s_addr; @@ -1792,7 +1790,6 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries, struct irdma_device *iwdev; struct irdma_pci_f *rf; struct irdma_cq_buf *cq_buf = NULL; - enum irdma_status_code status = 0; unsigned long flags; int ret; @@ -1885,12 +1882,10 @@ static int irdma_resize_cq(struct ib_cq *ibcq, int entries, cqp_info->in.u.cq_modify.cq = &iwcq->sc_cq; cqp_info->in.u.cq_modify.scratch = (uintptr_t)cqp_request; cqp_info->post_sq = 1; - status = irdma_handle_cqp_op(rf, cqp_request); + ret = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); - if (status) { - ret = -EPROTO; + if (ret) goto error; - } spin_lock_irqsave(&iwcq->lock, flags); if (cq_buf) { @@ -1945,7 +1940,6 @@ static int irdma_create_cq(struct ib_cq *ibcq, struct irdma_sc_cq *cq; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cq_init_info info = {}; - enum irdma_status_code status; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; struct irdma_cq_uk_init_info *ukinfo = &info.cq_uk_init_info; @@ -2095,12 +2089,10 @@ static int irdma_create_cq(struct ib_cq *ibcq, cqp_info->in.u.cq_create.cq = cq; cqp_info->in.u.cq_create.check_overflow = true; cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request; - status = irdma_handle_cqp_op(rf, cqp_request); + err_code = irdma_handle_cqp_op(rf, cqp_request); irdma_put_cqp_request(&rf->cqp, cqp_request); - if (status) { - err_code = -ENOMEM; + if (err_code) goto cq_free_rsrc; - } if (udata) { struct irdma_create_cq_resp resp = {}; @@ -2309,14 +2301,14 @@ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr, struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_pble_info *pinfo; u64 *pbl; - enum irdma_status_code status; + int status; enum irdma_pble_level level = PBLE_LEVEL_1; if (use_pbles) { status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt, false); if (status) - return -ENOMEM; + return status; iwpbl->pbl_allocated = true; level = palloc->level; @@ -2434,7 +2426,7 @@ static int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr) struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) @@ -2457,7 +2449,7 @@ static int irdma_hw_alloc_mw(struct irdma_device *iwdev, struct irdma_mr *iwmr) status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); - return status ? -ENOMEM : 0; + return status; } /** @@ -2509,7 +2501,7 @@ static int irdma_dealloc_mw(struct ib_mw *ibmw) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); - info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff; + info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = ibmw->rkey >> IRDMA_CQPSQ_STAG_IDX_S; info->mr = false; cqp_info->cqp_cmd = IRDMA_OP_DEALLOC_STAG; @@ -2533,8 +2525,7 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev, { struct irdma_allocate_stag_info *info; struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); - enum irdma_status_code status; - int err = 0; + int status; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; @@ -2556,10 +2547,8 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev, cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); - if (status) - err = -ENOMEM; - return err; + return status; } /** @@ -2575,9 +2564,8 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, struct irdma_pble_alloc *palloc; struct irdma_pbl *iwpbl; struct irdma_mr *iwmr; - enum irdma_status_code status; u32 stag; - int err_code = -ENOMEM; + int err_code; iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); if (!iwmr) @@ -2599,9 +2587,9 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, iwmr->type = IRDMA_MEMREG_TYPE_MEM; palloc = &iwpbl->pble_alloc; iwmr->page_cnt = max_num_sg; - status = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt, - true); - if (status) + err_code = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt, + true); + if (err_code) goto err_get_pble; err_code = irdma_hw_alloc_stag(iwdev, iwmr); @@ -2672,10 +2660,9 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, struct irdma_reg_ns_stag_info *stag_info; struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; - enum irdma_status_code status; - int err = 0; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; + int ret; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) @@ -2712,12 +2699,10 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, cqp_info->post_sq = 1; cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->rf->sc_dev; cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request; - status = irdma_handle_cqp_op(iwdev->rf, cqp_request); + ret = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); - if (status) - err = -ENOMEM; - return err; + return ret; } /** @@ -2897,7 +2882,6 @@ struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access struct irdma_device *iwdev = to_iwdev(pd->device); struct irdma_pbl *iwpbl; struct irdma_mr *iwmr; - enum irdma_status_code status; u32 stag; int ret; @@ -2925,10 +2909,9 @@ struct ib_mr *irdma_reg_phys_mr(struct ib_pd *pd, u64 addr, u64 size, int access iwmr->pgaddrmem[0] = addr; iwmr->len = size; iwmr->page_size = SZ_4K; - status = irdma_hwreg_mr(iwdev, iwmr, access); - if (status) { + ret = irdma_hwreg_mr(iwdev, iwmr, access); + if (ret) { irdma_free_stag(iwdev, stag); - ret = -ENOMEM; goto err; } @@ -3021,7 +3004,7 @@ static int irdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) cqp_info = &cqp_request->info; info = &cqp_info->in.u.dealloc_stag.info; memset(info, 0, sizeof(*info)); - info->pd_id = iwpd->sc_pd.pd_id & 0x00007fff; + info->pd_id = iwpd->sc_pd.pd_id; info->stag_idx = ib_mr->rkey >> IRDMA_CQPSQ_STAG_IDX_S; info->mr = true; if (iwpbl->pbl_allocated) @@ -3057,7 +3040,6 @@ static int irdma_post_send(struct ib_qp *ibqp, struct irdma_qp_uk *ukqp; struct irdma_sc_dev *dev; struct irdma_post_sq_info info; - enum irdma_status_code ret; int err = 0; unsigned long flags; bool inv_stag; @@ -3116,7 +3098,7 @@ static int irdma_post_send(struct ib_qp *ibqp, info.op.inline_send.qkey = ud_wr(ib_wr)->remote_qkey; info.op.inline_send.dest_qp = ud_wr(ib_wr)->remote_qpn; } - ret = irdma_uk_inline_send(ukqp, &info, false); + err = irdma_uk_inline_send(ukqp, &info, false); } else { info.op.send.num_sges = ib_wr->num_sge; info.op.send.sg_list = ib_wr->sg_list; @@ -3127,14 +3109,7 @@ static int irdma_post_send(struct ib_qp *ibqp, info.op.send.qkey = ud_wr(ib_wr)->remote_qkey; info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn; } - ret = irdma_uk_send(ukqp, &info, false); - } - - if (ret) { - if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED) - err = -ENOMEM; - else - err = -EINVAL; + err = irdma_uk_send(ukqp, &info, false); } break; case IB_WR_RDMA_WRITE_WITH_IMM: @@ -3160,20 +3135,13 @@ static int irdma_post_send(struct ib_qp *ibqp, rdma_wr(ib_wr)->remote_addr; info.op.inline_rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; - ret = irdma_uk_inline_rdma_write(ukqp, &info, false); + err = irdma_uk_inline_rdma_write(ukqp, &info, false); } else { info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_write.num_lo_sges = ib_wr->num_sge; info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr; info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; - ret = irdma_uk_rdma_write(ukqp, &info, false); - } - - if (ret) { - if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED) - err = -ENOMEM; - else - err = -EINVAL; + err = irdma_uk_rdma_write(ukqp, &info, false); } break; case IB_WR_RDMA_READ_WITH_INV: @@ -3190,21 +3158,12 @@ static int irdma_post_send(struct ib_qp *ibqp, info.op.rdma_read.rem_addr.lkey = rdma_wr(ib_wr)->rkey; info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; info.op.rdma_read.num_lo_sges = ib_wr->num_sge; - - ret = irdma_uk_rdma_read(ukqp, &info, inv_stag, false); - if (ret) { - if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED) - err = -ENOMEM; - else - err = -EINVAL; - } + err = irdma_uk_rdma_read(ukqp, &info, inv_stag, false); break; case IB_WR_LOCAL_INV: info.op_type = IRDMA_OP_TYPE_INV_STAG; info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey; - ret = irdma_uk_stag_local_invalidate(ukqp, &info, true); - if (ret) - err = -ENOMEM; + err = irdma_uk_stag_local_invalidate(ukqp, &info, true); break; case IB_WR_REG_MR: { struct irdma_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr); @@ -3226,10 +3185,8 @@ static int irdma_post_send(struct ib_qp *ibqp, stag_info.local_fence = ib_wr->send_flags & IB_SEND_FENCE; if (iwmr->npages > IRDMA_MIN_PAGES_PER_FMR) stag_info.chunk_size = 1; - ret = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info, + err = irdma_sc_mr_fast_register(&iwqp->sc_qp, &stag_info, true); - if (ret) - err = -ENOMEM; break; } default: @@ -3274,7 +3231,6 @@ static int irdma_post_recv(struct ib_qp *ibqp, struct irdma_qp *iwqp; struct irdma_qp_uk *ukqp; struct irdma_post_rq_info post_recv = {}; - enum irdma_status_code ret = 0; unsigned long flags; int err = 0; bool reflush = false; @@ -3289,14 +3245,10 @@ static int irdma_post_recv(struct ib_qp *ibqp, post_recv.num_sges = ib_wr->num_sge; post_recv.wr_id = ib_wr->wr_id; post_recv.sg_list = ib_wr->sg_list; - ret = irdma_uk_post_receive(ukqp, &post_recv); - if (ret) { + err = irdma_uk_post_receive(ukqp, &post_recv); + if (err) { ibdev_dbg(&iwqp->iwdev->ibdev, - "VERBS: post_recv err %d\n", ret); - if (ret == IRDMA_ERR_QP_TOOMANY_WRS_POSTED) - err = -ENOMEM; - else - err = -EINVAL; + "VERBS: post_recv err %d\n", err); goto out; } @@ -3483,7 +3435,7 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc struct irdma_cq_buf *last_buf = NULL; struct irdma_cq_poll_info *cur_cqe = &iwcq->cur_cqe; struct irdma_cq_buf *cq_buf; - enum irdma_status_code ret; + int ret; struct irdma_device *iwdev; struct irdma_cq_uk *ukcq; bool cq_new_cqe = false; @@ -3503,10 +3455,10 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc cq_new_cqe = true; continue; } - if (ret == IRDMA_ERR_Q_EMPTY) + if (ret == -ENOENT) break; /* QP using the CQ is destroyed. Skip reporting this CQE */ - if (ret == IRDMA_ERR_Q_DESTROYED) { + if (ret == -EFAULT) { cq_new_cqe = true; continue; } @@ -3528,10 +3480,10 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc continue; } - if (ret == IRDMA_ERR_Q_EMPTY) + if (ret == -ENOENT) break; /* QP using the CQ is destroyed. Skip reporting this CQE */ - if (ret == IRDMA_ERR_Q_DESTROYED) { + if (ret == -EFAULT) { cq_new_cqe = true; continue; } @@ -3553,7 +3505,7 @@ error: ibdev_dbg(&iwdev->ibdev, "%s: Error polling CQ, irdma_err: %d\n", __func__, ret); - return -EINVAL; + return ret; } /** @@ -3859,7 +3811,7 @@ static int irdma_mcast_cqp_op(struct irdma_device *iwdev, { struct cqp_cmds_info *cqp_info; struct irdma_cqp_request *cqp_request; - enum irdma_status_code status; + int status; cqp_request = irdma_alloc_and_get_cqp_request(&iwdev->rf->cqp, true); if (!cqp_request) @@ -3873,10 +3825,8 @@ static int irdma_mcast_cqp_op(struct irdma_device *iwdev, cqp_info->in.u.mc_create.cqp = &iwdev->rf->cqp.sc_cqp; status = irdma_handle_cqp_op(iwdev->rf, cqp_request); irdma_put_cqp_request(&iwdev->rf->cqp, cqp_request); - if (status) - return -ENOMEM; - return 0; + return status; } /** @@ -3932,11 +3882,7 @@ static int irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) int ret = 0; bool ipv4; u16 vlan_id; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr; + union irdma_sockaddr sgid_addr; unsigned char dmac[ETH_ALEN]; rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid); @@ -4072,11 +4018,7 @@ static int irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) struct irdma_mcast_grp_ctx_entry_info mcg_info = {}; int ret; unsigned long flags; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr; + union irdma_sockaddr sgid_addr; rdma_gid2ip((struct sockaddr *)&sgid_addr, ibgid); if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) @@ -4132,17 +4074,47 @@ static int irdma_detach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) return 0; } -/** - * irdma_create_ah - create address handle - * @ibah: address handle - * @attr: address handle attributes - * @udata: User data - * - * returns 0 on success, error otherwise - */ -static int irdma_create_ah(struct ib_ah *ibah, - struct rdma_ah_init_attr *attr, - struct ib_udata *udata) +static int irdma_create_hw_ah(struct irdma_device *iwdev, struct irdma_ah *ah, bool sleep) +{ + struct irdma_pci_f *rf = iwdev->rf; + int err; + + err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah->sc_ah.ah_info.ah_idx, + &rf->next_ah); + if (err) + return err; + + err = irdma_ah_cqp_op(rf, &ah->sc_ah, IRDMA_OP_AH_CREATE, sleep, + irdma_gsi_ud_qp_ah_cb, &ah->sc_ah); + + if (err) { + ibdev_dbg(&iwdev->ibdev, "VERBS: CQP-OP Create AH fail"); + goto err_ah_create; + } + + if (!sleep) { + int cnt = CQP_COMPL_WAIT_TIME_MS * CQP_TIMEOUT_THRESHOLD; + + do { + irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); + mdelay(1); + } while (!ah->sc_ah.ah_info.ah_valid && --cnt); + + if (!cnt) { + ibdev_dbg(&iwdev->ibdev, "VERBS: CQP create AH timed out"); + err = -ETIMEDOUT; + goto err_ah_create; + } + } + return 0; + +err_ah_create: + irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah->sc_ah.ah_info.ah_idx); + + return err; +} + +static int irdma_setup_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr) { struct irdma_pd *pd = to_iwpd(ibah->pd); struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah); @@ -4151,25 +4123,13 @@ static int irdma_create_ah(struct ib_ah *ibah, struct irdma_device *iwdev = to_iwdev(ibah->pd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_sc_ah *sc_ah; - u32 ah_id = 0; struct irdma_ah_info *ah_info; - struct irdma_create_ah_resp uresp; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr, dgid_addr; + union irdma_sockaddr sgid_addr, dgid_addr; int err; u8 dmac[ETH_ALEN]; - err = irdma_alloc_rsrc(rf, rf->allocated_ahs, rf->max_ah, &ah_id, - &rf->next_ah); - if (err) - return err; - ah->pd = pd; sc_ah = &ah->sc_ah; - sc_ah->ah_info.ah_idx = ah_id; sc_ah->ah_info.vsi = &iwdev->vsi; irdma_sc_init_ah(&rf->sc_dev, sc_ah); ah->sgid_index = ah_attr->grh.sgid_index; @@ -4179,10 +4139,7 @@ static int irdma_create_ah(struct ib_ah *ibah, rdma_gid2ip((struct sockaddr *)&dgid_addr, &ah_attr->grh.dgid); ah->av.attrs = *ah_attr; ah->av.net_type = rdma_gid_attr_network_type(sgid_attr); - ah->av.sgid_addr.saddr = sgid_addr.saddr; - ah->av.dgid_addr.saddr = dgid_addr.saddr; ah_info = &sc_ah->ah_info; - ah_info->ah_idx = ah_id; ah_info->pd_idx = pd->sc_pd.pd_id; if (ah_attr->ah_flags & IB_AH_GRH) { ah_info->flow_label = ah_attr->grh.flow_label; @@ -4191,7 +4148,7 @@ static int irdma_create_ah(struct ib_ah *ibah, } ether_addr_copy(dmac, ah_attr->roce.dmac); - if (rdma_gid_attr_network_type(sgid_attr) == RDMA_NETWORK_IPV4) { + if (ah->av.net_type == RDMA_NETWORK_IPV4) { ah_info->ipv4_valid = true; ah_info->dest_ip_addr[0] = ntohl(dgid_addr.saddr_in.sin_addr.s_addr); @@ -4219,17 +4176,15 @@ static int irdma_create_ah(struct ib_ah *ibah, err = rdma_read_gid_l2_fields(sgid_attr, &ah_info->vlan_tag, ah_info->mac_addr); if (err) - goto error; + return err; ah_info->dst_arpindex = irdma_add_arp(iwdev->rf, ah_info->dest_ip_addr, ah_info->ipv4_valid, dmac); - if (ah_info->dst_arpindex == -1) { - err = -EINVAL; - goto error; - } + if (ah_info->dst_arpindex == -1) + return -EINVAL; - if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb) + if (ah_info->vlan_tag >= VLAN_N_VID && iwdev->dcb_vlan_mode) ah_info->vlan_tag = 0; if (ah_info->vlan_tag < VLAN_N_VID) { @@ -4238,43 +4193,38 @@ static int irdma_create_ah(struct ib_ah *ibah, rt_tos2priority(ah_info->tc_tos) << VLAN_PRIO_SHIFT; } - err = irdma_ah_cqp_op(iwdev->rf, sc_ah, IRDMA_OP_AH_CREATE, - attr->flags & RDMA_CREATE_AH_SLEEPABLE, - irdma_gsi_ud_qp_ah_cb, sc_ah); + return 0; +} - if (err) { - ibdev_dbg(&iwdev->ibdev, - "VERBS: CQP-OP Create AH fail"); - goto error; - } +/** + * irdma_ah_exists - Check for existing identical AH + * @iwdev: irdma device + * @new_ah: AH to check for + * + * returns true if AH is found, false if not found. + */ +static bool irdma_ah_exists(struct irdma_device *iwdev, + struct irdma_ah *new_ah) +{ + struct irdma_ah *ah; + u32 key = new_ah->sc_ah.ah_info.dest_ip_addr[0] ^ + new_ah->sc_ah.ah_info.dest_ip_addr[1] ^ + new_ah->sc_ah.ah_info.dest_ip_addr[2] ^ + new_ah->sc_ah.ah_info.dest_ip_addr[3]; - if (!(attr->flags & RDMA_CREATE_AH_SLEEPABLE)) { - int cnt = CQP_COMPL_WAIT_TIME_MS * CQP_TIMEOUT_THRESHOLD; - - do { - irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq); - mdelay(1); - } while (!sc_ah->ah_info.ah_valid && --cnt); - - if (!cnt) { - ibdev_dbg(&iwdev->ibdev, - "VERBS: CQP create AH timed out"); - err = -ETIMEDOUT; - goto error; + hash_for_each_possible(iwdev->ah_hash_tbl, ah, list, key) { + /* Set ah_valid and ah_id the same so memcmp can work */ + new_ah->sc_ah.ah_info.ah_idx = ah->sc_ah.ah_info.ah_idx; + new_ah->sc_ah.ah_info.ah_valid = ah->sc_ah.ah_info.ah_valid; + if (!memcmp(&ah->sc_ah.ah_info, &new_ah->sc_ah.ah_info, + sizeof(ah->sc_ah.ah_info))) { + refcount_inc(&ah->refcnt); + new_ah->parent_ah = ah; + return true; } } - if (udata) { - uresp.ah_id = ah->sc_ah.ah_info.ah_idx; - err = ib_copy_to_udata(udata, &uresp, - min(sizeof(uresp), udata->outlen)); - } - return 0; - -error: - irdma_free_rsrc(iwdev->rf, iwdev->rf->allocated_ahs, ah_id); - - return err; + return false; } /** @@ -4287,6 +4237,17 @@ static int irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags) struct irdma_device *iwdev = to_iwdev(ibah->device); struct irdma_ah *ah = to_iwah(ibah); + if ((ah_flags & RDMA_DESTROY_AH_SLEEPABLE) && ah->parent_ah) { + mutex_lock(&iwdev->ah_tbl_lock); + if (!refcount_dec_and_test(&ah->parent_ah->refcnt)) { + mutex_unlock(&iwdev->ah_tbl_lock); + return 0; + } + hash_del(&ah->parent_ah->list); + kfree(ah->parent_ah); + mutex_unlock(&iwdev->ah_tbl_lock); + } + irdma_ah_cqp_op(iwdev->rf, &ah->sc_ah, IRDMA_OP_AH_DESTROY, false, NULL, ah); @@ -4296,6 +4257,80 @@ static int irdma_destroy_ah(struct ib_ah *ibah, u32 ah_flags) return 0; } +/** + * irdma_create_user_ah - create user address handle + * @ibah: address handle + * @attr: address handle attributes + * @udata: User data + * + * returns 0 on success, error otherwise + */ +static int irdma_create_user_ah(struct ib_ah *ibah, + struct rdma_ah_init_attr *attr, + struct ib_udata *udata) +{ + struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah); + struct irdma_device *iwdev = to_iwdev(ibah->pd->device); + struct irdma_create_ah_resp uresp; + struct irdma_ah *parent_ah; + int err; + + err = irdma_setup_ah(ibah, attr); + if (err) + return err; + mutex_lock(&iwdev->ah_tbl_lock); + if (!irdma_ah_exists(iwdev, ah)) { + err = irdma_create_hw_ah(iwdev, ah, true); + if (err) { + mutex_unlock(&iwdev->ah_tbl_lock); + return err; + } + /* Add new AH to list */ + parent_ah = kmemdup(ah, sizeof(*ah), GFP_KERNEL); + if (parent_ah) { + u32 key = parent_ah->sc_ah.ah_info.dest_ip_addr[0] ^ + parent_ah->sc_ah.ah_info.dest_ip_addr[1] ^ + parent_ah->sc_ah.ah_info.dest_ip_addr[2] ^ + parent_ah->sc_ah.ah_info.dest_ip_addr[3]; + + ah->parent_ah = parent_ah; + hash_add(iwdev->ah_hash_tbl, &parent_ah->list, key); + refcount_set(&parent_ah->refcnt, 1); + } + } + mutex_unlock(&iwdev->ah_tbl_lock); + + uresp.ah_id = ah->sc_ah.ah_info.ah_idx; + err = ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen)); + if (err) + irdma_destroy_ah(ibah, attr->flags); + + return err; +} + +/** + * irdma_create_ah - create address handle + * @ibah: address handle + * @attr: address handle attributes + * @udata: NULL + * + * returns 0 on success, error otherwise + */ +static int irdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr, + struct ib_udata *udata) +{ + struct irdma_ah *ah = container_of(ibah, struct irdma_ah, ibah); + struct irdma_device *iwdev = to_iwdev(ibah->pd->device); + int err; + + err = irdma_setup_ah(ibah, attr); + if (err) + return err; + err = irdma_create_hw_ah(iwdev, ah, attr->flags & RDMA_CREATE_AH_SLEEPABLE); + + return err; +} + /** * irdma_query_ah - Query address handle * @ibah: pointer to address handle @@ -4329,7 +4364,7 @@ static enum rdma_link_layer irdma_get_link_layer(struct ib_device *ibdev, static const struct ib_device_ops irdma_roce_dev_ops = { .attach_mcast = irdma_attach_mcast, .create_ah = irdma_create_ah, - .create_user_ah = irdma_create_ah, + .create_user_ah = irdma_create_user_ah, .destroy_ah = irdma_destroy_ah, .detach_mcast = irdma_detach_mcast, .get_link_layer = irdma_get_link_layer, diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index d0fdef8d09ea..08ba24d0b843 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -25,14 +25,16 @@ struct irdma_pd { struct irdma_sc_pd sc_pd; }; +union irdma_sockaddr { + struct sockaddr_in saddr_in; + struct sockaddr_in6 saddr_in6; +}; + struct irdma_av { u8 macaddr[16]; struct rdma_ah_attr attrs; - union { - struct sockaddr saddr; - struct sockaddr_in saddr_in; - struct sockaddr_in6 saddr_in6; - } sgid_addr, dgid_addr; + union irdma_sockaddr sgid_addr; + union irdma_sockaddr dgid_addr; u8 net_type; }; @@ -43,6 +45,9 @@ struct irdma_ah { struct irdma_av av; u8 sgid_index; union ib_gid dgid; + struct hlist_node list; + refcount_t refcnt; + struct irdma_ah *parent_ah; /* AH from cached list */ }; struct irdma_hmc_pble { diff --git a/drivers/infiniband/hw/irdma/ws.c b/drivers/infiniband/hw/irdma/ws.c index b0d6ee0739f5..20bc8d0d7f1f 100644 --- a/drivers/infiniband/hw/irdma/ws.c +++ b/drivers/infiniband/hw/irdma/ws.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB /* Copyright (c) 2017 - 2021 Intel Corporation */ #include "osdep.h" -#include "status.h" #include "hmc.h" #include "defs.h" #include "type.h" @@ -87,8 +86,8 @@ static void irdma_free_node(struct irdma_sc_vsi *vsi, * @node: pointer to node * @cmd: add, remove or modify */ -static enum irdma_status_code -irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, struct irdma_ws_node *node, u8 cmd) +static int irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, + struct irdma_ws_node *node, u8 cmd) { struct irdma_ws_node_info node_info = {}; @@ -106,7 +105,7 @@ irdma_ws_cqp_cmd(struct irdma_sc_vsi *vsi, struct irdma_ws_node *node, u8 cmd) node_info.enable = node->enable; if (irdma_cqp_ws_node_cmd(vsi->dev, cmd, &node_info)) { ibdev_dbg(to_ibdev(vsi->dev), "WS: CQP WS CMD failed\n"); - return IRDMA_ERR_NO_MEMORY; + return -ENOMEM; } if (node->type_leaf && cmd == IRDMA_OP_WS_ADD_NODE) { @@ -234,18 +233,18 @@ static void irdma_remove_leaf(struct irdma_sc_vsi *vsi, u8 user_pri) * @vsi: vsi pointer * @user_pri: user priority */ -enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) +int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) { struct irdma_ws_node *ws_tree_root; struct irdma_ws_node *vsi_node; struct irdma_ws_node *tc_node; u16 traffic_class; - enum irdma_status_code ret = 0; + int ret = 0; int i; mutex_lock(&vsi->dev->ws_mutex); if (vsi->tc_change_pending) { - ret = IRDMA_ERR_NOT_READY; + ret = -EBUSY; goto exit; } @@ -258,7 +257,7 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) ws_tree_root = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_PARENT, NULL); if (!ws_tree_root) { - ret = IRDMA_ERR_NO_MEMORY; + ret = -ENOMEM; goto exit; } @@ -283,7 +282,7 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) vsi_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_PARENT, ws_tree_root); if (!vsi_node) { - ret = IRDMA_ERR_NO_MEMORY; + ret = -ENOMEM; goto vsi_add_err; } @@ -310,7 +309,7 @@ enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri) tc_node = irdma_alloc_node(vsi, user_pri, WS_NODE_TYPE_LEAF, vsi_node); if (!tc_node) { - ret = IRDMA_ERR_NO_MEMORY; + ret = -ENOMEM; goto leaf_add_err; } diff --git a/drivers/infiniband/hw/irdma/ws.h b/drivers/infiniband/hw/irdma/ws.h index f0e16f630701..d431e3327d26 100644 --- a/drivers/infiniband/hw/irdma/ws.h +++ b/drivers/infiniband/hw/irdma/ws.h @@ -34,7 +34,7 @@ struct irdma_ws_node { }; struct irdma_sc_vsi; -enum irdma_status_code irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri); +int irdma_ws_add(struct irdma_sc_vsi *vsi, u8 user_pri); void irdma_ws_remove(struct irdma_sc_vsi *vsi, u8 user_pri); void irdma_ws_reset(struct irdma_sc_vsi *vsi); diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index e2e1f5daddc4..111fa88a3be4 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 6a381751c0d8..c4cf91235eee 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -320,7 +320,6 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { err = -EIO; *bad_wr = wr; - nreq = 0; goto out; } diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index fc036b4794fd..2a2a9e9afc9d 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1895,8 +1895,10 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, key_level2, obj_event, GFP_KERNEL); - if (err) + if (err) { + kfree(obj_event); return err; + } INIT_LIST_HEAD(&obj_event->obj_sub_list); } diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c index f2f62875d072..afeb5e53254f 100644 --- a/drivers/infiniband/hw/mlx5/ib_virt.c +++ b/drivers/infiniband/hw/mlx5/ib_virt.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include #include #include "mlx5_ib.h" diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 844545064c9e..6191aa833ac2 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include #include #include #include "mlx5_ib.h" diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index cbc20e400be0..4f04bb55c4c6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -763,9 +763,9 @@ struct mlx5_cache_ent { char name[4]; u32 order; - u32 xlt; u32 access_mode; u32 page; + unsigned int ndescs; u8 disabled:1; u8 fill_to_high_water:1; @@ -788,7 +788,6 @@ struct mlx5_cache_ent { u32 miss; struct mlx5_ib_dev *dev; - struct work_struct work; struct delayed_work dwork; }; @@ -1344,7 +1343,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry, int access_flags); + struct mlx5_cache_ent *ent, + int access_flags); int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_mr_status *mr_status); @@ -1539,12 +1539,6 @@ static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_suppor MLX5_UARS_IN_PAGE : 1; } -static inline int get_num_static_uars(struct mlx5_ib_dev *dev, - struct mlx5_bfreg_info *bfregi) -{ - return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_static_sys_pages; -} - extern void *xlt_emergency_page; int bfregn_to_uar_index(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 32cb7068f0ca..956f8e875daa 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -68,7 +68,6 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, struct ib_pd *pd) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - bool ro_pci_enabled = pcie_relaxed_ordering_enabled(dev->mdev->pdev); MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); @@ -76,12 +75,13 @@ static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, mkc, lr, 1); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) - MLX5_SET(mkc, mkc, relaxed_ordering_write, - (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) - MLX5_SET(mkc, mkc, relaxed_ordering_read, - (acc & IB_ACCESS_RELAXED_ORDERING) && ro_pci_enabled); + if ((acc & IB_ACCESS_RELAXED_ORDERING) && + pcie_relaxed_ordering_enabled(dev->mdev->pdev)) { + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, 1); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, 1); + } MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, qpn, 0xffffff); @@ -189,6 +189,25 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) spin_unlock_irqrestore(&ent->lock, flags); } +static int get_mkc_octo_size(unsigned int access_mode, unsigned int ndescs) +{ + int ret = 0; + + switch (access_mode) { + case MLX5_MKC_ACCESS_MODE_MTT: + ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / + sizeof(struct mlx5_mtt)); + break; + case MLX5_MKC_ACCESS_MODE_KSM: + ret = DIV_ROUND_UP(ndescs, MLX5_IB_UMR_OCTOWORD / + sizeof(struct mlx5_klm)); + break; + default: + WARN_ON(1); + } + return ret; +} + static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) { struct mlx5_ib_mr *mr; @@ -204,7 +223,8 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); + MLX5_SET(mkc, mkc, translations_octword_size, + get_mkc_octo_size(ent->access_mode, ent->ndescs)); MLX5_SET(mkc, mkc, log_page_size, ent->page); return mr; } @@ -478,14 +498,14 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) return; if (ent->available_mrs < ent->limit) { ent->fill_to_high_water = true; - queue_work(ent->dev->cache.wq, &ent->work); + mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); } else if (ent->fill_to_high_water && ent->available_mrs + ent->pending < 2 * ent->limit) { /* * Once we start populating due to hitting a low water mark * continue until we pass the high water mark. */ - queue_work(ent->dev->cache.wq, &ent->work); + mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); } else if (ent->available_mrs == 2 * ent->limit) { ent->fill_to_high_water = false; } else if (ent->available_mrs > 2 * ent->limit) { @@ -495,7 +515,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent) queue_delayed_work(ent->dev->cache.wq, &ent->dwork, msecs_to_jiffies(1000)); else - queue_work(ent->dev->cache.wq, &ent->work); + mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0); } } @@ -571,33 +591,20 @@ static void delayed_cache_work_func(struct work_struct *work) __cache_work_func(ent); } -static void cache_work_func(struct work_struct *work) -{ - struct mlx5_cache_ent *ent; - - ent = container_of(work, struct mlx5_cache_ent, work); - __cache_work_func(ent); -} - -/* Allocate a special entry from the cache */ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry, int access_flags) + struct mlx5_cache_ent *ent, + int access_flags) { - struct mlx5_mr_cache *cache = &dev->cache; - struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - if (WARN_ON(entry <= MR_CACHE_LAST_STD_ENTRY || - entry >= ARRAY_SIZE(cache->ent))) - return ERR_PTR(-EINVAL); - /* Matches access in alloc_cache_mr() */ if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) return ERR_PTR(-EOPNOTSUPP); - ent = &cache->ent[entry]; spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { + queue_adjust_cache_locked(ent); + ent->miss++; spin_unlock_irq(&ent->lock); mr = create_cache_mr(ent); if (IS_ERR(mr)) @@ -611,32 +618,9 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, mlx5_clear_mr(mr); } - mr->access_flags = access_flags; return mr; } -/* Return a MR already available in the cache */ -static struct mlx5_ib_mr *get_cache_mr(struct mlx5_cache_ent *req_ent) -{ - struct mlx5_ib_mr *mr = NULL; - struct mlx5_cache_ent *ent = req_ent; - - spin_lock_irq(&ent->lock); - if (!list_empty(&ent->head)) { - mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); - list_del(&mr->list); - ent->available_mrs--; - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - mlx5_clear_mr(mr); - return mr; - } - queue_adjust_cache_locked(ent); - spin_unlock_irq(&ent->lock); - req_ent->miss++; - return NULL; -} - static void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) { struct mlx5_cache_ent *ent = mr->cache_ent; @@ -739,7 +723,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->dev = dev; ent->limit = 0; - INIT_WORK(&ent->work, cache_work_func); INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); if (i > MR_CACHE_LAST_STD_ENTRY) { @@ -751,8 +734,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) continue; ent->page = PAGE_SHIFT; - ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / - MLX5_IB_UMR_OCTOWORD; + ent->ndescs = 1 << ent->order; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) && !dev->is_rep && mlx5_core_is_pf(dev->mdev) && @@ -783,7 +765,6 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) spin_lock_irq(&ent->lock); ent->disabled = true; spin_unlock_irq(&ent->lock); - cancel_work_sync(&ent->work); cancel_delayed_work_sync(&ent->dwork); } @@ -972,16 +953,9 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, return mr; } - mr = get_cache_mr(ent); - if (!mr) { - mr = create_cache_mr(ent); - /* - * The above already tried to do the same stuff as reg_create(), - * no reason to try it again. - */ - if (IS_ERR(mr)) - return mr; - } + mr = mlx5_mr_cache_alloc(dev, ent, access_flags); + if (IS_ERR(mr)) + return mr; mr->ibmr.pd = pd; mr->umem = umem; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 86842cd580ba..41c964a45f89 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -407,6 +407,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, unsigned long idx) { + struct mlx5_ib_dev *dev = mr_to_mdev(imr); struct ib_umem_odp *odp; struct mlx5_ib_mr *mr; struct mlx5_ib_mr *ret; @@ -418,13 +419,14 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - mr = mlx5_mr_cache_alloc( - mr_to_mdev(imr), MLX5_IMR_MTT_CACHE_ENTRY, imr->access_flags); + mr = mlx5_mr_cache_alloc(dev, &dev->cache.ent[MLX5_IMR_MTT_CACHE_ENTRY], + imr->access_flags); if (IS_ERR(mr)) { ib_umem_odp_release(odp); return mr; } + mr->access_flags = imr->access_flags; mr->ibmr.pd = imr->ibmr.pd; mr->ibmr.device = &mr_to_mdev(imr)->ib_dev; mr->umem = &odp->umem; @@ -493,12 +495,15 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags); + imr = mlx5_mr_cache_alloc(dev, + &dev->cache.ent[MLX5_IMR_KSM_CACHE_ENTRY], + access_flags); if (IS_ERR(imr)) { ib_umem_odp_release(umem_odp); return imr; } + imr->access_flags = access_flags; imr->ibmr.pd = &pd->ibpd; imr->ibmr.iova = 0; imr->umem = &umem_odp->umem; @@ -1593,18 +1598,14 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) switch (ent->order - 2) { case MLX5_IMR_MTT_CACHE_ENTRY: ent->page = PAGE_SHIFT; - ent->xlt = MLX5_IMR_MTT_ENTRIES * - sizeof(struct mlx5_mtt) / - MLX5_IB_UMR_OCTOWORD; + ent->ndescs = MLX5_IMR_MTT_ENTRIES; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; ent->limit = 0; break; case MLX5_IMR_KSM_CACHE_ENTRY: ent->page = MLX5_KSM_PAGE_SHIFT; - ent->xlt = mlx5_imr_ksm_entries * - sizeof(struct mlx5_klm) / - MLX5_IB_UMR_OCTOWORD; + ent->ndescs = mlx5_imr_ksm_entries; ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; ent->limit = 0; break; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index b7fe47107d76..3f467557d34e 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -31,7 +31,6 @@ */ #include -#include #include #include #include @@ -615,7 +614,8 @@ enum { static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi) { - return get_num_static_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR; + return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * + bfregi->num_static_sys_pages * MLX5_NON_FP_BFREGS_PER_UAR; } static int num_med_bfreg(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 191c4ee7db62..09b365a98bbf 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -3,7 +3,6 @@ * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. */ -#include #include #include #include diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index f507c4cd46d3..b54bc8865dae 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -939,12 +939,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) { - dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask.\n"); - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (err) { - dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); - goto err_free_res; - } + dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting.\n"); + goto err_free_res; } /* We can handle large RDMA requests, so allow larger segments. */ diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c b/drivers/infiniband/hw/mthca/mthca_profile.c index 7ea970774839..69af65f1b332 100644 --- a/drivers/infiniband/hw/mthca/mthca_profile.c +++ b/drivers/infiniband/hw/mthca/mthca_profile.c @@ -31,8 +31,6 @@ * SOFTWARE. */ -#include -#include #include #include diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index a0c5f3bdc324..a973905afd13 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -32,7 +32,6 @@ * SOFTWARE. */ -#include #include #include #include diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 80a8dd6c7814..37b628a162e0 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -634,7 +634,7 @@ static const struct qib_hwerror_msgs qib_7220_hwerror_msgs[] = { QLOGIC_IB_HWE_MSG(QLOGIC_IB_HWE_PCIECPLTIMEOUT, "PCIe completion timeout"), /* - * In practice, it's unlikely wthat we'll see PCIe PLL, or bus + * In practice, it's unlikely that we'll see PCIe PLL, or bus * parity or memory parity error failures, because most likely we * won't be able to talk to the core of the chip. Nonetheless, we * might see them, if they are in parts of the PCIe core that aren't @@ -2988,7 +2988,7 @@ done: * the utility. Names need to be 12 chars or less (w/o newline), for proper * display by utility. * Non-error counters are first. - * Start of "error" conters is indicated by a leading "E " on the first + * Start of "error" counters is indicated by a leading "E " on the first * "error" counter, and doesn't count in label length. * The EgrOvfl list needs to be last so we truncate them at the configured * context count for the device. diff --git a/drivers/infiniband/hw/usnic/usnic_debugfs.c b/drivers/infiniband/hw/usnic/usnic_debugfs.c index e5a3f02fb078..10a8cd5ba076 100644 --- a/drivers/infiniband/hw/usnic/usnic_debugfs.c +++ b/drivers/infiniband/hw/usnic/usnic_debugfs.c @@ -32,7 +32,6 @@ */ #include -#include #include "usnic.h" #include "usnic_log.h" diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index 3b60fa9cb58d..59bfbfaee325 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -32,7 +32,6 @@ */ #include #include -#include #include #include "usnic_log.h" diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index 7d868f033bbf..fdb63a8fb997 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -31,7 +31,6 @@ * */ -#include #include #include diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 5a0e26cd648e..d3a9670bf971 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include #include #include #include diff --git a/drivers/infiniband/hw/usnic/usnic_transport.c b/drivers/infiniband/hw/usnic/usnic_transport.c index 82dd810bc000..dc37066900a5 100644 --- a/drivers/infiniband/hw/usnic/usnic_transport.c +++ b/drivers/infiniband/hw/usnic/usnic_transport.c @@ -32,7 +32,6 @@ */ #include #include -#include #include #include diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c index ebe08f348453..0c47f73aaed5 100644 --- a/drivers/infiniband/hw/usnic/usnic_vnic.c +++ b/drivers/infiniband/hw/usnic/usnic_vnic.c @@ -31,7 +31,6 @@ * */ #include -#include #include #include "usnic_ib.h" diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 105f3a155939..343288b02792 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -811,12 +811,10 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, } /* Enable 64-Bit DMA */ - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0) { - ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); - if (ret != 0) { - dev_err(&pdev->dev, "dma_set_mask failed\n"); - goto err_free_resource; - } + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "dma_set_mask failed\n"); + goto err_free_resource; } dma_set_max_seg_size(&pdev->dev, UINT_MAX); pci_set_master(pdev); diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index fab291245366..2dae7538a2ea 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -28,8 +28,8 @@ void rxe_dealloc(struct ib_device *ib_dev) rxe_pool_cleanup(&rxe->cq_pool); rxe_pool_cleanup(&rxe->mr_pool); rxe_pool_cleanup(&rxe->mw_pool); - rxe_pool_cleanup(&rxe->mc_grp_pool); - rxe_pool_cleanup(&rxe->mc_elem_pool); + + WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree)); if (rxe->tfm) crypto_free_shash(rxe->tfm); @@ -114,106 +114,37 @@ static void rxe_init_ports(struct rxe_dev *rxe) } /* init pools of managed objects */ -static int rxe_init_pools(struct rxe_dev *rxe) +static void rxe_init_pools(struct rxe_dev *rxe) { - int err; - - err = rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC, - rxe->max_ucontext); - if (err) - goto err1; - - err = rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD, - rxe->attr.max_pd); - if (err) - goto err2; - - err = rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH, - rxe->attr.max_ah); - if (err) - goto err3; - - err = rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ, - rxe->attr.max_srq); - if (err) - goto err4; - - err = rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP, - rxe->attr.max_qp); - if (err) - goto err5; - - err = rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ, - rxe->attr.max_cq); - if (err) - goto err6; - - err = rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR, - rxe->attr.max_mr); - if (err) - goto err7; - - err = rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW, - rxe->attr.max_mw); - if (err) - goto err8; - - err = rxe_pool_init(rxe, &rxe->mc_grp_pool, RXE_TYPE_MC_GRP, - rxe->attr.max_mcast_grp); - if (err) - goto err9; - - err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM, - rxe->attr.max_total_mcast_qp_attach); - if (err) - goto err10; - - return 0; - -err10: - rxe_pool_cleanup(&rxe->mc_grp_pool); -err9: - rxe_pool_cleanup(&rxe->mw_pool); -err8: - rxe_pool_cleanup(&rxe->mr_pool); -err7: - rxe_pool_cleanup(&rxe->cq_pool); -err6: - rxe_pool_cleanup(&rxe->qp_pool); -err5: - rxe_pool_cleanup(&rxe->srq_pool); -err4: - rxe_pool_cleanup(&rxe->ah_pool); -err3: - rxe_pool_cleanup(&rxe->pd_pool); -err2: - rxe_pool_cleanup(&rxe->uc_pool); -err1: - return err; + rxe_pool_init(rxe, &rxe->uc_pool, RXE_TYPE_UC); + rxe_pool_init(rxe, &rxe->pd_pool, RXE_TYPE_PD); + rxe_pool_init(rxe, &rxe->ah_pool, RXE_TYPE_AH); + rxe_pool_init(rxe, &rxe->srq_pool, RXE_TYPE_SRQ); + rxe_pool_init(rxe, &rxe->qp_pool, RXE_TYPE_QP); + rxe_pool_init(rxe, &rxe->cq_pool, RXE_TYPE_CQ); + rxe_pool_init(rxe, &rxe->mr_pool, RXE_TYPE_MR); + rxe_pool_init(rxe, &rxe->mw_pool, RXE_TYPE_MW); } /* initialize rxe device state */ -static int rxe_init(struct rxe_dev *rxe) +static void rxe_init(struct rxe_dev *rxe) { - int err; - /* init default device parameters */ rxe_init_device_param(rxe); rxe_init_ports(rxe); - - err = rxe_init_pools(rxe); - if (err) - return err; + rxe_init_pools(rxe); /* init pending mmap list */ spin_lock_init(&rxe->mmap_offset_lock); spin_lock_init(&rxe->pending_lock); INIT_LIST_HEAD(&rxe->pending_mmaps); - mutex_init(&rxe->usdev_lock); + /* init multicast support */ + spin_lock_init(&rxe->mcg_lock); + rxe->mcg_tree = RB_ROOT; - return 0; + mutex_init(&rxe->usdev_lock); } void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) @@ -235,12 +166,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) */ int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name) { - int err; - - err = rxe_init(rxe); - if (err) - return err; - + rxe_init(rxe); rxe_set_mtu(rxe, mtu); return rxe_register_device(rxe, ibdev_name); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index fb9066e6f5f0..30fbdf3bc76a 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -12,7 +12,6 @@ #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include #include #include diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 38c7b6fb39d7..3b05314ca739 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -99,11 +99,14 @@ void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr) av->network_type = type; } -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp) { struct rxe_ah *ah; u32 ah_num; + if (ahp) + *ahp = NULL; + if (!pkt || !pkt->qp) return NULL; @@ -117,10 +120,22 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt) if (ah_num) { /* only new user provider or kernel client */ ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); - if (!ah || ah->ah_num != ah_num || rxe_ah_pd(ah) != pkt->qp->pd) { + if (!ah) { pr_warn("Unable to find AH matching ah_num\n"); return NULL; } + + if (rxe_ah_pd(ah) != pkt->qp->pd) { + pr_warn("PDs don't match for AH and QP\n"); + rxe_put(ah); + return NULL; + } + + if (ahp) + *ahp = ah; + else + rxe_put(ah); + return &ah->av; } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index f363fe3fa414..138b3e7d3a5f 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -526,7 +526,7 @@ static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) struct rxe_queue *q = qp->sq.queue; while ((skb = skb_dequeue(&qp->resp_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } @@ -548,7 +548,7 @@ static void free_pkt(struct rxe_pkt_info *pkt) struct ib_device *dev = qp->ibqp.device; kfree_skb(skb); - rxe_drop_ref(qp); + rxe_put(qp); ib_device_put(dev); } @@ -562,7 +562,7 @@ int rxe_completer(void *arg) enum comp_state state; int ret = 0; - rxe_add_ref(qp); + rxe_get(qp); if (!qp->valid || qp->req.state == QP_STATE_ERROR || qp->req.state == QP_STATE_RESET) { @@ -761,7 +761,7 @@ int rxe_completer(void *arg) done: if (pkt) free_pkt(pkt); - rxe_drop_ref(qp); + rxe_put(qp); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 6baaaa34458e..642b52539ac3 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -42,13 +42,14 @@ err1: static void rxe_send_complete(struct tasklet_struct *t) { struct rxe_cq *cq = from_tasklet(cq, t, comp_task); + unsigned long flags; - spin_lock_bh(&cq->cq_lock); + spin_lock_irqsave(&cq->cq_lock, flags); if (cq->is_dying) { - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); return; } - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); } @@ -107,12 +108,13 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) struct ib_event ev; int full; void *addr; + unsigned long flags; - spin_lock_bh(&cq->cq_lock); + spin_lock_irqsave(&cq->cq_lock, flags); full = queue_full(cq->queue, QUEUE_TYPE_TO_CLIENT); if (unlikely(full)) { - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); if (cq->ibcq.event_handler) { ev.device = cq->ibcq.device; ev.element.cq = &cq->ibcq; @@ -128,7 +130,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) queue_advance_producer(cq->queue, QUEUE_TYPE_TO_CLIENT); - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); if ((cq->notify == IB_CQ_NEXT_COMP) || (cq->notify == IB_CQ_SOLICITED && solicited)) { @@ -141,9 +143,11 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) void rxe_cq_disable(struct rxe_cq *cq) { - spin_lock_bh(&cq->cq_lock); + unsigned long flags; + + spin_lock_irqsave(&cq->cq_lock, flags); cq->is_dying = true; - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); } void rxe_cq_cleanup(struct rxe_pool_elem *elem) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index b1e174afb1d4..2ffbe3390668 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -19,7 +19,7 @@ void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); -struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt); +struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp); /* rxe_cq.c */ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, @@ -40,18 +40,10 @@ void rxe_cq_disable(struct rxe_cq *cq); void rxe_cq_cleanup(struct rxe_pool_elem *arg); /* rxe_mcast.c */ -int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, - struct rxe_mc_grp **grp_p); - -int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - struct rxe_mc_grp *grp); - -int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - union ib_gid *mgid); - -void rxe_drop_all_mcast_groups(struct rxe_qp *qp); - -void rxe_mc_cleanup(struct rxe_pool_elem *arg); +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid); +int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid); +int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid); +void rxe_cleanup_mcg(struct kref *kref); /* rxe_mmap.c */ struct rxe_mmap_info { @@ -102,35 +94,27 @@ void rxe_mw_cleanup(struct rxe_pool_elem *arg); /* rxe_net.c */ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, int paylen, struct rxe_pkt_info *pkt); -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb); +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb); int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct sk_buff *skb); const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); -int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid); -int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid); /* rxe_qp.c */ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); - int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct ib_qp_init_attr *init, struct rxe_create_qp_resp __user *uresp, struct ib_pd *ibpd, struct ib_udata *udata); - int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init); - int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); - int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata); - int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask); - void rxe_qp_error(struct rxe_qp *qp); - +int rxe_qp_chk_destroy(struct rxe_qp *qp); void rxe_qp_destroy(struct rxe_qp *qp); - void rxe_qp_cleanup(struct rxe_pool_elem *elem); static inline int qp_num(struct rxe_qp *qp) diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index bd1ac88b8700..ae8f11cb704a 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -1,178 +1,488 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* + * Copyright (c) 2022 Hewlett Packard Enterprise, Inc. All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ +/* + * rxe_mcast.c implements driver support for multicast transport. + * It is based on two data structures struct rxe_mcg ('mcg') and + * struct rxe_mca ('mca'). An mcg is allocated each time a qp is + * attached to a new mgid for the first time. These are indexed by + * a red-black tree using the mgid. This data structure is searched + * for the mcg when a multicast packet is received and when another + * qp is attached to the same mgid. It is cleaned up when the last qp + * is detached from the mcg. Each time a qp is attached to an mcg an + * mca is created. It holds a pointer to the qp and is added to a list + * of qp's that are attached to the mcg. The qp_list is used to replicate + * mcast packets in the rxe receive path. + */ + #include "rxe.h" -#include "rxe_loc.h" -/* caller should hold mc_grp_pool->pool_lock */ -static struct rxe_mc_grp *create_grp(struct rxe_dev *rxe, - struct rxe_pool *pool, - union ib_gid *mgid) +/** + * rxe_mcast_add - add multicast address to rxe device + * @rxe: rxe device object + * @mgid: multicast address as a gid + * + * Returns 0 on success else an error + */ +static int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { - int err; - struct rxe_mc_grp *grp; + unsigned char ll_addr[ETH_ALEN]; - grp = rxe_alloc_locked(&rxe->mc_grp_pool); - if (!grp) - return ERR_PTR(-ENOMEM); + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - INIT_LIST_HEAD(&grp->qp_list); - spin_lock_init(&grp->mcg_lock); - grp->rxe = rxe; - rxe_add_key_locked(grp, mgid); - - err = rxe_mcast_add(rxe, mgid); - if (unlikely(err)) { - rxe_drop_key_locked(grp); - rxe_drop_ref(grp); - return ERR_PTR(err); - } - - return grp; + return dev_mc_add(rxe->ndev, ll_addr); } -int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, - struct rxe_mc_grp **grp_p) +/** + * rxe_mcast_delete - delete multicast address from rxe device + * @rxe: rxe device object + * @mgid: multicast address as a gid + * + * Returns 0 on success else an error + */ +static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) { - int err; - struct rxe_mc_grp *grp; - struct rxe_pool *pool = &rxe->mc_grp_pool; + unsigned char ll_addr[ETH_ALEN]; - if (rxe->attr.max_mcast_qp_attach == 0) - return -EINVAL; + ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - write_lock_bh(&pool->pool_lock); + return dev_mc_del(rxe->ndev, ll_addr); +} - grp = rxe_pool_get_key_locked(pool, mgid); - if (grp) - goto done; +/** + * __rxe_insert_mcg - insert an mcg into red-black tree (rxe->mcg_tree) + * @mcg: mcg object with an embedded red-black tree node + * + * Context: caller must hold a reference to mcg and rxe->mcg_lock and + * is responsible to avoid adding the same mcg twice to the tree. + */ +static void __rxe_insert_mcg(struct rxe_mcg *mcg) +{ + struct rb_root *tree = &mcg->rxe->mcg_tree; + struct rb_node **link = &tree->rb_node; + struct rb_node *node = NULL; + struct rxe_mcg *tmp; + int cmp; - grp = create_grp(rxe, pool, mgid); - if (IS_ERR(grp)) { - write_unlock_bh(&pool->pool_lock); - err = PTR_ERR(grp); - return err; + while (*link) { + node = *link; + tmp = rb_entry(node, struct rxe_mcg, node); + + cmp = memcmp(&tmp->mgid, &mcg->mgid, sizeof(mcg->mgid)); + if (cmp > 0) + link = &(*link)->rb_left; + else + link = &(*link)->rb_right; } -done: - write_unlock_bh(&pool->pool_lock); - *grp_p = grp; + rb_link_node(&mcg->node, node, link); + rb_insert_color(&mcg->node, tree); +} + +/** + * __rxe_remove_mcg - remove an mcg from red-black tree holding lock + * @mcg: mcast group object with an embedded red-black tree node + * + * Context: caller must hold a reference to mcg and rxe->mcg_lock + */ +static void __rxe_remove_mcg(struct rxe_mcg *mcg) +{ + rb_erase(&mcg->node, &mcg->rxe->mcg_tree); +} + +/** + * __rxe_lookup_mcg - lookup mcg in rxe->mcg_tree while holding lock + * @rxe: rxe device object + * @mgid: multicast IP address + * + * Context: caller must hold rxe->mcg_lock + * Returns: mcg on success and takes a ref to mcg else NULL + */ +static struct rxe_mcg *__rxe_lookup_mcg(struct rxe_dev *rxe, + union ib_gid *mgid) +{ + struct rb_root *tree = &rxe->mcg_tree; + struct rxe_mcg *mcg; + struct rb_node *node; + int cmp; + + node = tree->rb_node; + + while (node) { + mcg = rb_entry(node, struct rxe_mcg, node); + + cmp = memcmp(&mcg->mgid, mgid, sizeof(*mgid)); + + if (cmp > 0) + node = node->rb_left; + else if (cmp < 0) + node = node->rb_right; + else + break; + } + + if (node) { + kref_get(&mcg->ref_cnt); + return mcg; + } + + return NULL; +} + +/** + * rxe_lookup_mcg - lookup up mcg in red-back tree + * @rxe: rxe device object + * @mgid: multicast IP address + * + * Returns: mcg if found else NULL + */ +struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid) +{ + struct rxe_mcg *mcg; + unsigned long flags; + + spin_lock_irqsave(&rxe->mcg_lock, flags); + mcg = __rxe_lookup_mcg(rxe, mgid); + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + + return mcg; +} + +/** + * __rxe_init_mcg - initialize a new mcg + * @rxe: rxe device + * @mgid: multicast address as a gid + * @mcg: new mcg object + * + * Context: caller should hold rxe->mcg lock + * Returns: 0 on success else an error + */ +static int __rxe_init_mcg(struct rxe_dev *rxe, union ib_gid *mgid, + struct rxe_mcg *mcg) +{ + int err; + + err = rxe_mcast_add(rxe, mgid); + if (unlikely(err)) + return err; + + kref_init(&mcg->ref_cnt); + memcpy(&mcg->mgid, mgid, sizeof(mcg->mgid)); + INIT_LIST_HEAD(&mcg->qp_list); + mcg->rxe = rxe; + + /* caller holds a ref on mcg but that will be + * dropped when mcg goes out of scope. We need to take a ref + * on the pointer that will be saved in the red-black tree + * by __rxe_insert_mcg and used to lookup mcg from mgid later. + * Inserting mcg makes it visible to outside so this should + * be done last after the object is ready. + */ + kref_get(&mcg->ref_cnt); + __rxe_insert_mcg(mcg); + return 0; } -int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - struct rxe_mc_grp *grp) +/** + * rxe_get_mcg - lookup or allocate a mcg + * @rxe: rxe device object + * @mgid: multicast IP address as a gid + * + * Returns: mcg on success else ERR_PTR(error) + */ +static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid) { + struct rxe_mcg *mcg, *tmp; + unsigned long flags; int err; - struct rxe_mc_elem *elem; - /* check to see of the qp is already a member of the group */ - spin_lock_bh(&qp->grp_lock); - spin_lock_bh(&grp->mcg_lock); - list_for_each_entry(elem, &grp->qp_list, qp_list) { - if (elem->qp == qp) { + if (rxe->attr.max_mcast_grp == 0) + return ERR_PTR(-EINVAL); + + /* check to see if mcg already exists */ + mcg = rxe_lookup_mcg(rxe, mgid); + if (mcg) + return mcg; + + /* speculative alloc of new mcg */ + mcg = kzalloc(sizeof(*mcg), GFP_KERNEL); + if (!mcg) + return ERR_PTR(-ENOMEM); + + spin_lock_irqsave(&rxe->mcg_lock, flags); + /* re-check to see if someone else just added it */ + tmp = __rxe_lookup_mcg(rxe, mgid); + if (tmp) { + kfree(mcg); + mcg = tmp; + goto out; + } + + if (atomic_inc_return(&rxe->mcg_num) > rxe->attr.max_mcast_grp) { + err = -ENOMEM; + goto err_dec; + } + + err = __rxe_init_mcg(rxe, mgid, mcg); + if (err) + goto err_dec; +out: + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + return mcg; + +err_dec: + atomic_dec(&rxe->mcg_num); + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + kfree(mcg); + return ERR_PTR(err); +} + +/** + * rxe_cleanup_mcg - cleanup mcg for kref_put + * @kref: struct kref embnedded in mcg + */ +void rxe_cleanup_mcg(struct kref *kref) +{ + struct rxe_mcg *mcg = container_of(kref, typeof(*mcg), ref_cnt); + + kfree(mcg); +} + +/** + * __rxe_destroy_mcg - destroy mcg object holding rxe->mcg_lock + * @mcg: the mcg object + * + * Context: caller is holding rxe->mcg_lock + * no qp's are attached to mcg + */ +static void __rxe_destroy_mcg(struct rxe_mcg *mcg) +{ + struct rxe_dev *rxe = mcg->rxe; + + /* remove mcg from red-black tree then drop ref */ + __rxe_remove_mcg(mcg); + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + rxe_mcast_delete(mcg->rxe, &mcg->mgid); + atomic_dec(&rxe->mcg_num); +} + +/** + * rxe_destroy_mcg - destroy mcg object + * @mcg: the mcg object + * + * Context: no qp's are attached to mcg + */ +static void rxe_destroy_mcg(struct rxe_mcg *mcg) +{ + unsigned long flags; + + spin_lock_irqsave(&mcg->rxe->mcg_lock, flags); + __rxe_destroy_mcg(mcg); + spin_unlock_irqrestore(&mcg->rxe->mcg_lock, flags); +} + +/** + * __rxe_init_mca - initialize a new mca holding lock + * @qp: qp object + * @mcg: mcg object + * @mca: empty space for new mca + * + * Context: caller must hold references on qp and mcg, rxe->mcg_lock + * and pass memory for new mca + * + * Returns: 0 on success else an error + */ +static int __rxe_init_mca(struct rxe_qp *qp, struct rxe_mcg *mcg, + struct rxe_mca *mca) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int n; + + n = atomic_inc_return(&rxe->mcg_attach); + if (n > rxe->attr.max_total_mcast_qp_attach) { + atomic_dec(&rxe->mcg_attach); + return -ENOMEM; + } + + n = atomic_inc_return(&mcg->qp_num); + if (n > rxe->attr.max_mcast_qp_attach) { + atomic_dec(&mcg->qp_num); + atomic_dec(&rxe->mcg_attach); + return -ENOMEM; + } + + atomic_inc(&qp->mcg_num); + + rxe_get(qp); + mca->qp = qp; + + list_add_tail(&mca->qp_list, &mcg->qp_list); + + return 0; +} + +/** + * rxe_attach_mcg - attach qp to mcg if not already attached + * @qp: qp object + * @mcg: mcg object + * + * Context: caller must hold reference on qp and mcg. + * Returns: 0 on success else an error + */ +static int rxe_attach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) +{ + struct rxe_dev *rxe = mcg->rxe; + struct rxe_mca *mca, *tmp; + unsigned long flags; + int err; + + /* check to see if the qp is already a member of the group */ + spin_lock_irqsave(&rxe->mcg_lock, flags); + list_for_each_entry(mca, &mcg->qp_list, qp_list) { + if (mca->qp == qp) { + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + return 0; + } + } + spin_unlock_irqrestore(&rxe->mcg_lock, flags); + + /* speculative alloc new mca without using GFP_ATOMIC */ + mca = kzalloc(sizeof(*mca), GFP_KERNEL); + if (!mca) + return -ENOMEM; + + spin_lock_irqsave(&rxe->mcg_lock, flags); + /* re-check to see if someone else just attached qp */ + list_for_each_entry(tmp, &mcg->qp_list, qp_list) { + if (tmp->qp == qp) { + kfree(mca); err = 0; goto out; } } - if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) { - err = -ENOMEM; - goto out; - } - - elem = rxe_alloc_locked(&rxe->mc_elem_pool); - if (!elem) { - err = -ENOMEM; - goto out; - } - - /* each qp holds a ref on the grp */ - rxe_add_ref(grp); - - grp->num_qp++; - elem->qp = qp; - elem->grp = grp; - - list_add(&elem->qp_list, &grp->qp_list); - list_add(&elem->grp_list, &qp->grp_list); - - err = 0; + err = __rxe_init_mca(qp, mcg, mca); + if (err) + kfree(mca); out: - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); + spin_unlock_irqrestore(&rxe->mcg_lock, flags); return err; } -int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, - union ib_gid *mgid) +/** + * __rxe_cleanup_mca - cleanup mca object holding lock + * @mca: mca object + * @mcg: mcg object + * + * Context: caller must hold a reference to mcg and rxe->mcg_lock + */ +static void __rxe_cleanup_mca(struct rxe_mca *mca, struct rxe_mcg *mcg) { - struct rxe_mc_grp *grp; - struct rxe_mc_elem *elem, *tmp; + list_del(&mca->qp_list); - grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid); - if (!grp) - goto err1; + atomic_dec(&mcg->qp_num); + atomic_dec(&mcg->rxe->mcg_attach); + atomic_dec(&mca->qp->mcg_num); + rxe_put(mca->qp); - spin_lock_bh(&qp->grp_lock); - spin_lock_bh(&grp->mcg_lock); + kfree(mca); +} - list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) { - if (elem->qp == qp) { - list_del(&elem->qp_list); - list_del(&elem->grp_list); - grp->num_qp--; +/** + * rxe_detach_mcg - detach qp from mcg + * @mcg: mcg object + * @qp: qp object + * + * Returns: 0 on success else an error if qp is not attached. + */ +static int rxe_detach_mcg(struct rxe_mcg *mcg, struct rxe_qp *qp) +{ + struct rxe_dev *rxe = mcg->rxe; + struct rxe_mca *mca, *tmp; + unsigned long flags; - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); - rxe_drop_ref(elem); - rxe_drop_ref(grp); /* ref held by QP */ - rxe_drop_ref(grp); /* ref from get_key */ + spin_lock_irqsave(&rxe->mcg_lock, flags); + list_for_each_entry_safe(mca, tmp, &mcg->qp_list, qp_list) { + if (mca->qp == qp) { + __rxe_cleanup_mca(mca, mcg); + + /* if the number of qp's attached to the + * mcast group falls to zero go ahead and + * tear it down. This will not free the + * object since we are still holding a ref + * from the caller + */ + if (atomic_read(&mcg->qp_num) <= 0) + __rxe_destroy_mcg(mcg); + + spin_unlock_irqrestore(&rxe->mcg_lock, flags); return 0; } } - spin_unlock_bh(&grp->mcg_lock); - spin_unlock_bh(&qp->grp_lock); - rxe_drop_ref(grp); /* ref from get_key */ -err1: + /* we didn't find the qp on the list */ + spin_unlock_irqrestore(&rxe->mcg_lock, flags); return -EINVAL; } -void rxe_drop_all_mcast_groups(struct rxe_qp *qp) +/** + * rxe_attach_mcast - attach qp to multicast group (see IBA-11.3.1) + * @ibqp: (IB) qp object + * @mgid: multicast IP address + * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) + * + * Returns: 0 on success else an errno + */ +int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) { - struct rxe_mc_grp *grp; - struct rxe_mc_elem *elem; + int err; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_mcg *mcg; - while (1) { - spin_lock_bh(&qp->grp_lock); - if (list_empty(&qp->grp_list)) { - spin_unlock_bh(&qp->grp_lock); - break; - } - elem = list_first_entry(&qp->grp_list, struct rxe_mc_elem, - grp_list); - list_del(&elem->grp_list); - spin_unlock_bh(&qp->grp_lock); + /* takes a ref on mcg if successful */ + mcg = rxe_get_mcg(rxe, mgid); + if (IS_ERR(mcg)) + return PTR_ERR(mcg); - grp = elem->grp; - spin_lock_bh(&grp->mcg_lock); - list_del(&elem->qp_list); - grp->num_qp--; - spin_unlock_bh(&grp->mcg_lock); - rxe_drop_ref(grp); - rxe_drop_ref(elem); - } + err = rxe_attach_mcg(mcg, qp); + + /* if we failed to attach the first qp to mcg tear it down */ + if (atomic_read(&mcg->qp_num) == 0) + rxe_destroy_mcg(mcg); + + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + return err; } -void rxe_mc_cleanup(struct rxe_pool_elem *elem) +/** + * rxe_detach_mcast - detach qp from multicast group (see IBA-11.3.2) + * @ibqp: address of (IB) qp object + * @mgid: multicast IP address + * @mlid: multicast LID, ignored for RoCEv2 (see IBA-A17.5.6) + * + * Returns: 0 on success else an errno + */ +int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) { - struct rxe_mc_grp *grp = container_of(elem, typeof(*grp), elem); - struct rxe_dev *rxe = grp->rxe; + struct rxe_dev *rxe = to_rdev(ibqp->device); + struct rxe_qp *qp = to_rqp(ibqp); + struct rxe_mcg *mcg; + int err; - rxe_drop_key(grp); - rxe_mcast_delete(rxe, &grp->mgid); + mcg = rxe_lookup_mcg(rxe, mgid); + if (!mcg) + return -EINVAL; + + err = rxe_detach_mcg(mcg, qp); + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); + + return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 035f226af133..9149b6095429 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -4,7 +4,6 @@ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ -#include #include #include #include diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 453ef3c9d535..60a31b718774 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -459,7 +459,7 @@ int copy_data( if (offset >= sge->length) { if (mr) { - rxe_drop_ref(mr); + rxe_put(mr); mr = NULL; } sge++; @@ -504,13 +504,13 @@ int copy_data( dma->resid = resid; if (mr) - rxe_drop_ref(mr); + rxe_put(mr); return 0; err2: if (mr) - rxe_drop_ref(mr); + rxe_put(mr); err1: return err; } @@ -569,7 +569,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || mr_pd(mr) != pd || (access && !(access & mr->access)) || mr->state != RXE_MR_STATE_VALID)) { - rxe_drop_ref(mr); + rxe_put(mr); mr = NULL; } @@ -613,7 +613,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) ret = 0; err_drop_ref: - rxe_drop_ref(mr); + rxe_put(mr); err: return ret; } @@ -690,9 +690,8 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) } mr->state = RXE_MR_STATE_INVALID; - rxe_drop_ref(mr_pd(mr)); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_put(mr_pd(mr)); + rxe_put(mr); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 32dd8c0b8b9e..c86b2efd58f2 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -12,15 +12,14 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) struct rxe_dev *rxe = to_rdev(ibmw->device); int ret; - rxe_add_ref(pd); + rxe_get(pd); ret = rxe_add_to_pool(&rxe->mw_pool, mw); if (ret) { - rxe_drop_ref(pd); + rxe_put(pd); return ret; } - rxe_add_index(mw); mw->rkey = ibmw->rkey = (mw->elem.index << 8) | rxe_get_next_key(-1); mw->state = (mw->ibmw.type == IB_MW_TYPE_2) ? RXE_MW_STATE_FREE : RXE_MW_STATE_VALID; @@ -36,14 +35,14 @@ static void rxe_do_dealloc_mw(struct rxe_mw *mw) mw->mr = NULL; atomic_dec(&mr->num_mw); - rxe_drop_ref(mr); + rxe_put(mr); } if (mw->qp) { struct rxe_qp *qp = mw->qp; mw->qp = NULL; - rxe_drop_ref(qp); + rxe_put(qp); } mw->access = 0; @@ -61,8 +60,8 @@ int rxe_dealloc_mw(struct ib_mw *ibmw) rxe_do_dealloc_mw(mw); spin_unlock_bh(&mw->lock); - rxe_drop_ref(mw); - rxe_drop_ref(pd); + rxe_put(mw); + rxe_put(pd); return 0; } @@ -171,7 +170,7 @@ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, mw->length = wqe->wr.wr.mw.length; if (mw->mr) { - rxe_drop_ref(mw->mr); + rxe_put(mw->mr); atomic_dec(&mw->mr->num_mw); mw->mr = NULL; } @@ -179,11 +178,11 @@ static void rxe_do_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (mw->length) { mw->mr = mr; atomic_inc(&mr->num_mw); - rxe_add_ref(mr); + rxe_get(mr); } if (mw->ibmw.type == IB_MW_TYPE_2) { - rxe_add_ref(qp); + rxe_get(qp); mw->qp = qp; } } @@ -234,9 +233,9 @@ err_unlock: spin_unlock_bh(&mw->lock); err_drop_mr: if (mr) - rxe_drop_ref(mr); + rxe_put(mr); err_drop_mw: - rxe_drop_ref(mw); + rxe_put(mw); err: return ret; } @@ -261,13 +260,13 @@ static void rxe_do_invalidate_mw(struct rxe_mw *mw) /* valid type 2 MW will always have a QP pointer */ qp = mw->qp; mw->qp = NULL; - rxe_drop_ref(qp); + rxe_put(qp); /* valid type 2 MW will always have an MR pointer */ mr = mw->mr; mw->mr = NULL; atomic_dec(&mr->num_mw); - rxe_drop_ref(mr); + rxe_put(mr); mw->access = 0; mw->addr = 0; @@ -302,7 +301,7 @@ int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey) err_unlock: spin_unlock_bh(&mw->lock); err_drop_ref: - rxe_drop_ref(mw); + rxe_put(mw); err: return ret; } @@ -323,16 +322,9 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey) (mw->length == 0) || (access && !(access & mw->access)) || mw->state != RXE_MW_STATE_VALID)) { - rxe_drop_ref(mw); + rxe_put(mw); return NULL; } return mw; } - -void rxe_mw_cleanup(struct rxe_pool_elem *elem) -{ - struct rxe_mw *mw = container_of(elem, typeof(*mw), elem); - - rxe_drop_index(mw); -} diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index be72bdbfb4ba..c53f4529f098 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -20,24 +20,6 @@ static struct rxe_recv_sockets recv_sockets; -int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) -{ - unsigned char ll_addr[ETH_ALEN]; - - ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - - return dev_mc_add(rxe->ndev, ll_addr); -} - -int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) -{ - unsigned char ll_addr[ETH_ALEN]; - - ipv6_eth_mc_map((struct in6_addr *)mgid->raw, ll_addr); - - return dev_mc_del(rxe->ndev, ll_addr); -} - static struct dst_entry *rxe_find_route4(struct net_device *ndev, struct in_addr *saddr, struct in_addr *daddr) @@ -289,13 +271,13 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); - struct rxe_av *av = rxe_get_av(pkt); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; @@ -315,11 +297,11 @@ static int prepare4(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) +static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { struct rxe_qp *qp = pkt->qp; struct dst_entry *dst; - struct rxe_av *av = rxe_get_av(pkt); struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; @@ -340,16 +322,17 @@ static int prepare6(struct rxe_pkt_info *pkt, struct sk_buff *skb) return 0; } -int rxe_prepare(struct rxe_pkt_info *pkt, struct sk_buff *skb) +int rxe_prepare(struct rxe_av *av, struct rxe_pkt_info *pkt, + struct sk_buff *skb) { int err = 0; if (skb->protocol == htons(ETH_P_IP)) - err = prepare4(pkt, skb); + err = prepare4(av, pkt, skb); else if (skb->protocol == htons(ETH_P_IPV6)) - err = prepare6(pkt, skb); + err = prepare6(av, pkt, skb); - if (ether_addr_equal(skb->dev->dev_addr, rxe_get_av(pkt)->dmac)) + if (ether_addr_equal(skb->dev->dev_addr, av->dmac)) pkt->mask |= RXE_LOOPBACK_MASK; return err; @@ -365,7 +348,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) rxe_run_task(&qp->req.task, 1); - rxe_drop_ref(qp); + rxe_put(qp); } static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) @@ -375,7 +358,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) skb->destructor = rxe_skb_tx_dtor; skb->sk = pkt->qp->sk->sk; - rxe_add_ref(pkt->qp); + rxe_get(pkt->qp); atomic_inc(&pkt->qp->skb_out); if (skb->protocol == htons(ETH_P_IP)) { @@ -385,7 +368,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) } else { pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); atomic_dec(&pkt->qp->skb_out); - rxe_drop_ref(pkt->qp); + rxe_put(pkt->qp); kfree_skb(skb); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 4cb003885e00..87066d04ed18 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -12,129 +12,93 @@ static const struct rxe_type_info { const char *name; size_t size; size_t elem_offset; - void (*cleanup)(struct rxe_pool_elem *obj); + void (*cleanup)(struct rxe_pool_elem *elem); enum rxe_pool_flags flags; u32 min_index; u32 max_index; - size_t key_offset; - size_t key_size; + u32 max_elem; } rxe_type_info[RXE_NUM_TYPES] = { [RXE_TYPE_UC] = { - .name = "rxe-uc", + .name = "uc", .size = sizeof(struct rxe_ucontext), .elem_offset = offsetof(struct rxe_ucontext, elem), - .flags = RXE_POOL_NO_ALLOC, + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_PD] = { - .name = "rxe-pd", + .name = "pd", .size = sizeof(struct rxe_pd), .elem_offset = offsetof(struct rxe_pd, elem), - .flags = RXE_POOL_NO_ALLOC, + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_AH] = { - .name = "rxe-ah", + .name = "ah", .size = sizeof(struct rxe_ah), .elem_offset = offsetof(struct rxe_ah, elem), - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_AH_INDEX, .max_index = RXE_MAX_AH_INDEX, + .max_elem = RXE_MAX_AH_INDEX - RXE_MIN_AH_INDEX + 1, }, [RXE_TYPE_SRQ] = { - .name = "rxe-srq", + .name = "srq", .size = sizeof(struct rxe_srq), .elem_offset = offsetof(struct rxe_srq, elem), - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_SRQ_INDEX, .max_index = RXE_MAX_SRQ_INDEX, + .max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1, }, [RXE_TYPE_QP] = { - .name = "rxe-qp", + .name = "qp", .size = sizeof(struct rxe_qp), .elem_offset = offsetof(struct rxe_qp, elem), .cleanup = rxe_qp_cleanup, - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_QP_INDEX, .max_index = RXE_MAX_QP_INDEX, + .max_elem = RXE_MAX_QP_INDEX - RXE_MIN_QP_INDEX + 1, }, [RXE_TYPE_CQ] = { - .name = "rxe-cq", + .name = "cq", .size = sizeof(struct rxe_cq), .elem_offset = offsetof(struct rxe_cq, elem), - .flags = RXE_POOL_NO_ALLOC, .cleanup = rxe_cq_cleanup, + .min_index = 1, + .max_index = UINT_MAX, + .max_elem = UINT_MAX, }, [RXE_TYPE_MR] = { - .name = "rxe-mr", + .name = "mr", .size = sizeof(struct rxe_mr), .elem_offset = offsetof(struct rxe_mr, elem), .cleanup = rxe_mr_cleanup, - .flags = RXE_POOL_INDEX, + .flags = RXE_POOL_ALLOC, .min_index = RXE_MIN_MR_INDEX, .max_index = RXE_MAX_MR_INDEX, + .max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1, }, [RXE_TYPE_MW] = { - .name = "rxe-mw", + .name = "mw", .size = sizeof(struct rxe_mw), .elem_offset = offsetof(struct rxe_mw, elem), - .cleanup = rxe_mw_cleanup, - .flags = RXE_POOL_INDEX | RXE_POOL_NO_ALLOC, .min_index = RXE_MIN_MW_INDEX, .max_index = RXE_MAX_MW_INDEX, - }, - [RXE_TYPE_MC_GRP] = { - .name = "rxe-mc_grp", - .size = sizeof(struct rxe_mc_grp), - .elem_offset = offsetof(struct rxe_mc_grp, elem), - .cleanup = rxe_mc_cleanup, - .flags = RXE_POOL_KEY, - .key_offset = offsetof(struct rxe_mc_grp, mgid), - .key_size = sizeof(union ib_gid), - }, - [RXE_TYPE_MC_ELEM] = { - .name = "rxe-mc_elem", - .size = sizeof(struct rxe_mc_elem), - .elem_offset = offsetof(struct rxe_mc_elem, elem), + .max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1, }, }; -static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) -{ - int err = 0; - - if ((max - min + 1) < pool->max_elem) { - pr_warn("not enough indices for max_elem\n"); - err = -EINVAL; - goto out; - } - - pool->index.max_index = max; - pool->index.min_index = min; - - pool->index.table = bitmap_zalloc(max - min + 1, GFP_KERNEL); - if (!pool->index.table) { - err = -ENOMEM; - goto out; - } - -out: - return err; -} - -int rxe_pool_init( - struct rxe_dev *rxe, - struct rxe_pool *pool, - enum rxe_elem_type type, - unsigned int max_elem) +void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type) { const struct rxe_type_info *info = &rxe_type_info[type]; - int err = 0; memset(pool, 0, sizeof(*pool)); pool->rxe = rxe; pool->name = info->name; pool->type = type; - pool->max_elem = max_elem; + pool->max_elem = info->max_elem; pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN); pool->elem_offset = info->elem_offset; pool->flags = info->flags; @@ -142,225 +106,31 @@ int rxe_pool_init( atomic_set(&pool->num_elem, 0); - rwlock_init(&pool->pool_lock); - - if (pool->flags & RXE_POOL_INDEX) { - pool->index.tree = RB_ROOT; - err = rxe_pool_init_index(pool, info->max_index, - info->min_index); - if (err) - goto out; - } - - if (pool->flags & RXE_POOL_KEY) { - pool->key.tree = RB_ROOT; - pool->key.key_offset = info->key_offset; - pool->key.key_size = info->key_size; - } - -out: - return err; + xa_init_flags(&pool->xa, XA_FLAGS_ALLOC); + pool->limit.min = info->min_index; + pool->limit.max = info->max_index; } void rxe_pool_cleanup(struct rxe_pool *pool) { - if (atomic_read(&pool->num_elem) > 0) - pr_warn("%s pool destroyed with unfree'd elem\n", - pool->name); - - if (pool->flags & RXE_POOL_INDEX) - bitmap_free(pool->index.table); -} - -static u32 alloc_index(struct rxe_pool *pool) -{ - u32 index; - u32 range = pool->index.max_index - pool->index.min_index + 1; - - index = find_next_zero_bit(pool->index.table, range, pool->index.last); - if (index >= range) - index = find_first_zero_bit(pool->index.table, range); - - WARN_ON_ONCE(index >= range); - set_bit(index, pool->index.table); - pool->index.last = index; - return index + pool->index.min_index; -} - -static int rxe_insert_index(struct rxe_pool *pool, struct rxe_pool_elem *new) -{ - struct rb_node **link = &pool->index.tree.rb_node; - struct rb_node *parent = NULL; - struct rxe_pool_elem *elem; - - while (*link) { - parent = *link; - elem = rb_entry(parent, struct rxe_pool_elem, index_node); - - if (elem->index == new->index) { - pr_warn("element already exists!\n"); - return -EINVAL; - } - - if (elem->index > new->index) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->index_node, parent, link); - rb_insert_color(&new->index_node, &pool->index.tree); - - return 0; -} - -static int rxe_insert_key(struct rxe_pool *pool, struct rxe_pool_elem *new) -{ - struct rb_node **link = &pool->key.tree.rb_node; - struct rb_node *parent = NULL; - struct rxe_pool_elem *elem; - int cmp; - - while (*link) { - parent = *link; - elem = rb_entry(parent, struct rxe_pool_elem, key_node); - - cmp = memcmp((u8 *)elem + pool->key.key_offset, - (u8 *)new + pool->key.key_offset, - pool->key.key_size); - - if (cmp == 0) { - pr_warn("key already exists!\n"); - return -EINVAL; - } - - if (cmp > 0) - link = &(*link)->rb_left; - else - link = &(*link)->rb_right; - } - - rb_link_node(&new->key_node, parent, link); - rb_insert_color(&new->key_node, &pool->key.tree); - - return 0; -} - -int __rxe_add_key_locked(struct rxe_pool_elem *elem, void *key) -{ - struct rxe_pool *pool = elem->pool; - int err; - - memcpy((u8 *)elem + pool->key.key_offset, key, pool->key.key_size); - err = rxe_insert_key(pool, elem); - - return err; -} - -int __rxe_add_key(struct rxe_pool_elem *elem, void *key) -{ - struct rxe_pool *pool = elem->pool; - int err; - - write_lock_bh(&pool->pool_lock); - err = __rxe_add_key_locked(elem, key); - write_unlock_bh(&pool->pool_lock); - - return err; -} - -void __rxe_drop_key_locked(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - - rb_erase(&elem->key_node, &pool->key.tree); -} - -void __rxe_drop_key(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - - write_lock_bh(&pool->pool_lock); - __rxe_drop_key_locked(elem); - write_unlock_bh(&pool->pool_lock); -} - -int __rxe_add_index_locked(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - int err; - - elem->index = alloc_index(pool); - err = rxe_insert_index(pool, elem); - - return err; -} - -int __rxe_add_index(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - int err; - - write_lock_bh(&pool->pool_lock); - err = __rxe_add_index_locked(elem); - write_unlock_bh(&pool->pool_lock); - - return err; -} - -void __rxe_drop_index_locked(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - - clear_bit(elem->index - pool->index.min_index, pool->index.table); - rb_erase(&elem->index_node, &pool->index.tree); -} - -void __rxe_drop_index(struct rxe_pool_elem *elem) -{ - struct rxe_pool *pool = elem->pool; - - write_lock_bh(&pool->pool_lock); - __rxe_drop_index_locked(elem); - write_unlock_bh(&pool->pool_lock); -} - -void *rxe_alloc_locked(struct rxe_pool *pool) -{ - struct rxe_pool_elem *elem; - void *obj; - - if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; - - obj = kzalloc(pool->elem_size, GFP_ATOMIC); - if (!obj) - goto out_cnt; - - elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset); - - elem->pool = pool; - elem->obj = obj; - kref_init(&elem->ref_cnt); - - return obj; - -out_cnt: - atomic_dec(&pool->num_elem); - return NULL; + WARN_ON(!xa_empty(&pool->xa)); } void *rxe_alloc(struct rxe_pool *pool) { struct rxe_pool_elem *elem; void *obj; + int err; + + if (WARN_ON(!(pool->flags & RXE_POOL_ALLOC))) + return NULL; if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + goto err_cnt; obj = kzalloc(pool->elem_size, GFP_KERNEL); if (!obj) - goto out_cnt; + goto err_cnt; elem = (struct rxe_pool_elem *)((u8 *)obj + pool->elem_offset); @@ -368,127 +138,86 @@ void *rxe_alloc(struct rxe_pool *pool) elem->obj = obj; kref_init(&elem->ref_cnt); + err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit, + &pool->next, GFP_KERNEL); + if (err) + goto err_free; + return obj; -out_cnt: +err_free: + kfree(obj); +err_cnt: atomic_dec(&pool->num_elem); return NULL; } int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem) { + int err; + + if (WARN_ON(pool->flags & RXE_POOL_ALLOC)) + return -EINVAL; + if (atomic_inc_return(&pool->num_elem) > pool->max_elem) - goto out_cnt; + goto err_cnt; elem->pool = pool; elem->obj = (u8 *)elem - pool->elem_offset; kref_init(&elem->ref_cnt); + err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit, + &pool->next, GFP_KERNEL); + if (err) + goto err_cnt; + return 0; -out_cnt: +err_cnt: atomic_dec(&pool->num_elem); return -EINVAL; } -void rxe_elem_release(struct kref *kref) +void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) { - struct rxe_pool_elem *elem = - container_of(kref, struct rxe_pool_elem, ref_cnt); - struct rxe_pool *pool = elem->pool; + struct rxe_pool_elem *elem; + struct xarray *xa = &pool->xa; + unsigned long flags; void *obj; + xa_lock_irqsave(xa, flags); + elem = xa_load(xa, index); + if (elem && kref_get_unless_zero(&elem->ref_cnt)) + obj = elem->obj; + else + obj = NULL; + xa_unlock_irqrestore(xa, flags); + + return obj; +} + +static void rxe_elem_release(struct kref *kref) +{ + struct rxe_pool_elem *elem = container_of(kref, typeof(*elem), ref_cnt); + struct rxe_pool *pool = elem->pool; + + xa_erase(&pool->xa, elem->index); + if (pool->cleanup) pool->cleanup(elem); - if (!(pool->flags & RXE_POOL_NO_ALLOC)) { - obj = elem->obj; - kfree(obj); - } + if (pool->flags & RXE_POOL_ALLOC) + kfree(elem->obj); atomic_dec(&pool->num_elem); } -void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index) +int __rxe_get(struct rxe_pool_elem *elem) { - struct rb_node *node; - struct rxe_pool_elem *elem; - void *obj; - - node = pool->index.tree.rb_node; - - while (node) { - elem = rb_entry(node, struct rxe_pool_elem, index_node); - - if (elem->index > index) - node = node->rb_left; - else if (elem->index < index) - node = node->rb_right; - else - break; - } - - if (node) { - kref_get(&elem->ref_cnt); - obj = elem->obj; - } else { - obj = NULL; - } - - return obj; + return kref_get_unless_zero(&elem->ref_cnt); } -void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) +int __rxe_put(struct rxe_pool_elem *elem) { - void *obj; - - read_lock_bh(&pool->pool_lock); - obj = rxe_pool_get_index_locked(pool, index); - read_unlock_bh(&pool->pool_lock); - - return obj; -} - -void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key) -{ - struct rb_node *node; - struct rxe_pool_elem *elem; - void *obj; - int cmp; - - node = pool->key.tree.rb_node; - - while (node) { - elem = rb_entry(node, struct rxe_pool_elem, key_node); - - cmp = memcmp((u8 *)elem + pool->key.key_offset, - key, pool->key.key_size); - - if (cmp > 0) - node = node->rb_left; - else if (cmp < 0) - node = node->rb_right; - else - break; - } - - if (node) { - kref_get(&elem->ref_cnt); - obj = elem->obj; - } else { - obj = NULL; - } - - return obj; -} - -void *rxe_pool_get_key(struct rxe_pool *pool, void *key) -{ - void *obj; - - read_lock_bh(&pool->pool_lock); - obj = rxe_pool_get_key_locked(pool, key); - read_unlock_bh(&pool->pool_lock); - - return obj; + return kref_put(&elem->ref_cnt, rxe_elem_release); } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 214279310f4d..24bcc786c1b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -8,9 +8,7 @@ #define RXE_POOL_H enum rxe_pool_flags { - RXE_POOL_INDEX = BIT(1), - RXE_POOL_KEY = BIT(2), - RXE_POOL_NO_ALLOC = BIT(4), + RXE_POOL_ALLOC = BIT(1), }; enum rxe_elem_type { @@ -22,8 +20,6 @@ enum rxe_elem_type { RXE_TYPE_CQ, RXE_TYPE_MR, RXE_TYPE_MW, - RXE_TYPE_MC_GRP, - RXE_TYPE_MC_ELEM, RXE_NUM_TYPES, /* keep me last */ }; @@ -32,20 +28,13 @@ struct rxe_pool_elem { void *obj; struct kref ref_cnt; struct list_head list; - - /* only used if keyed */ - struct rb_node key_node; - - /* only used if indexed */ - struct rb_node index_node; u32 index; }; struct rxe_pool { struct rxe_dev *rxe; const char *name; - rwlock_t pool_lock; /* protects pool add/del/search */ - void (*cleanup)(struct rxe_pool_elem *obj); + void (*cleanup)(struct rxe_pool_elem *elem); enum rxe_pool_flags flags; enum rxe_elem_type type; @@ -54,36 +43,22 @@ struct rxe_pool { size_t elem_size; size_t elem_offset; - /* only used if indexed */ - struct { - struct rb_root tree; - unsigned long *table; - u32 last; - u32 max_index; - u32 min_index; - } index; - - /* only used if keyed */ - struct { - struct rb_root tree; - size_t key_offset; - size_t key_size; - } key; + struct xarray xa; + struct xa_limit limit; + u32 next; }; /* initialize a pool of objects with given limit on * number of elements. gets parameters from rxe_type_info * pool elements will be allocated out of a slab cache */ -int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, - enum rxe_elem_type type, u32 max_elem); +void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool, + enum rxe_elem_type type); /* free resources from object pool */ void rxe_pool_cleanup(struct rxe_pool *pool); -/* allocate an object from pool holding and not holding the pool lock */ -void *rxe_alloc_locked(struct rxe_pool *pool); - +/* allocate an object from pool */ void *rxe_alloc(struct rxe_pool *pool); /* connect already allocated object to pool */ @@ -91,69 +66,17 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem); #define rxe_add_to_pool(pool, obj) __rxe_add_to_pool(pool, &(obj)->elem) -/* assign an index to an indexed object and insert object into - * pool's rb tree holding and not holding the pool_lock - */ -int __rxe_add_index_locked(struct rxe_pool_elem *elem); - -#define rxe_add_index_locked(obj) __rxe_add_index_locked(&(obj)->elem) - -int __rxe_add_index(struct rxe_pool_elem *elem); - -#define rxe_add_index(obj) __rxe_add_index(&(obj)->elem) - -/* drop an index and remove object from rb tree - * holding and not holding the pool_lock - */ -void __rxe_drop_index_locked(struct rxe_pool_elem *elem); - -#define rxe_drop_index_locked(obj) __rxe_drop_index_locked(&(obj)->elem) - -void __rxe_drop_index(struct rxe_pool_elem *elem); - -#define rxe_drop_index(obj) __rxe_drop_index(&(obj)->elem) - -/* assign a key to a keyed object and insert object into - * pool's rb tree holding and not holding pool_lock - */ -int __rxe_add_key_locked(struct rxe_pool_elem *elem, void *key); - -#define rxe_add_key_locked(obj, key) __rxe_add_key_locked(&(obj)->elem, key) - -int __rxe_add_key(struct rxe_pool_elem *elem, void *key); - -#define rxe_add_key(obj, key) __rxe_add_key(&(obj)->elem, key) - -/* remove elem from rb tree holding and not holding the pool_lock */ -void __rxe_drop_key_locked(struct rxe_pool_elem *elem); - -#define rxe_drop_key_locked(obj) __rxe_drop_key_locked(&(obj)->elem) - -void __rxe_drop_key(struct rxe_pool_elem *elem); - -#define rxe_drop_key(obj) __rxe_drop_key(&(obj)->elem) - -/* lookup an indexed object from index holding and not holding the pool_lock. - * takes a reference on object - */ -void *rxe_pool_get_index_locked(struct rxe_pool *pool, u32 index); - +/* lookup an indexed object from index. takes a reference on object */ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index); -/* lookup keyed object from key holding and not holding the pool_lock. - * takes a reference on the objecti - */ -void *rxe_pool_get_key_locked(struct rxe_pool *pool, void *key); +int __rxe_get(struct rxe_pool_elem *elem); -void *rxe_pool_get_key(struct rxe_pool *pool, void *key); +#define rxe_get(obj) __rxe_get(&(obj)->elem) -/* cleanup an object when all references are dropped */ -void rxe_elem_release(struct kref *kref); +int __rxe_put(struct rxe_pool_elem *elem); -/* take a reference on an object */ -#define rxe_add_ref(obj) kref_get(&(obj)->elem.ref_cnt) +#define rxe_put(obj) __rxe_put(&(obj)->elem) -/* drop a reference on an object */ -#define rxe_drop_ref(obj) kref_put(&(obj)->elem.ref_cnt, rxe_elem_release) +#define rxe_read(obj) kref_read(&(obj)->elem.ref_cnt) #endif /* RXE_POOL_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 5018b9387694..62acf890af6c 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -135,12 +135,8 @@ static void free_rd_atomic_resources(struct rxe_qp *qp) void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res) { - if (res->type == RXE_ATOMIC_MASK) { + if (res->type == RXE_ATOMIC_MASK) kfree_skb(res->atomic.skb); - } else if (res->type == RXE_READ_MASK) { - if (res->read.mr) - rxe_drop_ref(res->read.mr); - } res->type = 0; } @@ -188,9 +184,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, break; } - INIT_LIST_HEAD(&qp->grp_list); - - spin_lock_init(&qp->grp_lock); spin_lock_init(&qp->state_lock); atomic_set(&qp->ssn, 0); @@ -330,11 +323,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, struct rxe_cq *scq = to_rcq(init->send_cq); struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL; - rxe_add_ref(pd); - rxe_add_ref(rcq); - rxe_add_ref(scq); + rxe_get(pd); + rxe_get(rcq); + rxe_get(scq); if (srq) - rxe_add_ref(srq); + rxe_get(srq); qp->pd = pd; qp->rcq = rcq; @@ -366,10 +359,10 @@ err1: qp->srq = NULL; if (srq) - rxe_drop_ref(srq); - rxe_drop_ref(scq); - rxe_drop_ref(rcq); - rxe_drop_ref(pd); + rxe_put(srq); + rxe_put(scq); + rxe_put(rcq); + rxe_put(pd); return err; } @@ -528,7 +521,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) qp->resp.sent_psn_nak = 0; if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -770,6 +763,20 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) return 0; } +int rxe_qp_chk_destroy(struct rxe_qp *qp) +{ + /* See IBA o10-2.2.3 + * An attempt to destroy a QP while attached to a mcast group + * will fail immediately. + */ + if (atomic_read(&qp->mcg_num)) { + pr_debug("Attempt to destroy QP while attached to multicast group\n"); + return -EBUSY; + } + + return 0; +} + /* called by the destroy qp verb */ void rxe_qp_destroy(struct rxe_qp *qp) { @@ -798,28 +805,24 @@ static void rxe_qp_do_cleanup(struct work_struct *work) { struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work); - rxe_drop_all_mcast_groups(qp); - if (qp->sq.queue) rxe_queue_cleanup(qp->sq.queue); if (qp->srq) - rxe_drop_ref(qp->srq); + rxe_put(qp->srq); if (qp->rq.queue) rxe_queue_cleanup(qp->rq.queue); if (qp->scq) - rxe_drop_ref(qp->scq); + rxe_put(qp->scq); if (qp->rcq) - rxe_drop_ref(qp->rcq); + rxe_put(qp->rcq); if (qp->pd) - rxe_drop_ref(qp->pd); + rxe_put(qp->pd); - if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); - qp->resp.mr = NULL; - } + if (qp->resp.mr) + rxe_put(qp->resp.mr); if (qp_type(qp) == IB_QPT_RC) sk_dst_reset(qp->sk->sk); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index a1b283dd2d4c..dbd4971039c0 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -151,6 +151,8 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, struct rxe_queue *new_q; unsigned int num_elem = *num_elem_p; int err; + unsigned long producer_flags; + unsigned long consumer_flags; new_q = rxe_queue_init(q->rxe, &num_elem, elem_size, q->type); if (!new_q) @@ -164,17 +166,17 @@ int rxe_queue_resize(struct rxe_queue *q, unsigned int *num_elem_p, goto err1; } - spin_lock_bh(consumer_lock); + spin_lock_irqsave(consumer_lock, consumer_flags); if (producer_lock) { - spin_lock_bh(producer_lock); + spin_lock_irqsave(producer_lock, producer_flags); err = resize_finish(q, new_q, num_elem); - spin_unlock_bh(producer_lock); + spin_unlock_irqrestore(producer_lock, producer_flags); } else { err = resize_finish(q, new_q, num_elem); } - spin_unlock_bh(consumer_lock); + spin_unlock_irqrestore(consumer_lock, consumer_flags); rxe_queue_cleanup(new_q); /* new/old dep on err */ if (err) diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 6a6cc1fa90e4..d09a8b68c962 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -217,7 +217,7 @@ static int hdr_check(struct rxe_pkt_info *pkt) return 0; err2: - rxe_drop_ref(qp); + rxe_put(qp); err1: return -EINVAL; } @@ -233,8 +233,8 @@ static inline void rxe_rcv_pkt(struct rxe_pkt_info *pkt, struct sk_buff *skb) static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) { struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); - struct rxe_mc_grp *mcg; - struct rxe_mc_elem *mce; + struct rxe_mcg *mcg; + struct rxe_mca *mca; struct rxe_qp *qp; union ib_gid dgid; int err; @@ -246,19 +246,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) memcpy(&dgid, &ipv6_hdr(skb)->daddr, sizeof(dgid)); /* lookup mcast group corresponding to mgid, takes a ref */ - mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid); + mcg = rxe_lookup_mcg(rxe, &dgid); if (!mcg) goto drop; /* mcast group not registered */ - spin_lock_bh(&mcg->mcg_lock); + spin_lock_bh(&rxe->mcg_lock); /* this is unreliable datagram service so we let * failures to deliver a multicast packet to a * single QP happen and just move on and try * the rest of them on the list */ - list_for_each_entry(mce, &mcg->qp_list, qp_list) { - qp = mce->qp; + list_for_each_entry(mca, &mcg->qp_list, qp_list) { + qp = mca->qp; /* validate qp for incoming packet */ err = check_type_state(rxe, pkt, qp); @@ -273,7 +273,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) * skb and pass to the QP. Pass the original skb to * the last QP in the list. */ - if (mce->qp_list.next != &mcg->qp_list) { + if (mca->qp_list.next != &mcg->qp_list) { struct sk_buff *cskb; struct rxe_pkt_info *cpkt; @@ -288,19 +288,19 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) cpkt = SKB_TO_PKT(cskb); cpkt->qp = qp; - rxe_add_ref(qp); + rxe_get(qp); rxe_rcv_pkt(cpkt, cskb); } else { pkt->qp = qp; - rxe_add_ref(qp); + rxe_get(qp); rxe_rcv_pkt(pkt, skb); skb = NULL; /* mark consumed */ } } - spin_unlock_bh(&mcg->mcg_lock); + spin_unlock_bh(&rxe->mcg_lock); - rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ + kref_put(&mcg->ref_cnt, rxe_cleanup_mcg); if (likely(!skb)) return; @@ -397,7 +397,7 @@ void rxe_rcv(struct sk_buff *skb) drop: if (pkt->qp) - rxe_drop_ref(pkt->qp); + rxe_put(pkt->qp); kfree_skb(skb); ib_device_put(&rxe->ib_dev); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 5eb89052dd66..ae5fbc79dd5c 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -358,14 +358,14 @@ static inline int get_mtu(struct rxe_qp *qp) } static struct sk_buff *init_req_packet(struct rxe_qp *qp, + struct rxe_av *av, struct rxe_send_wqe *wqe, - int opcode, int payload, + int opcode, u32 payload, struct rxe_pkt_info *pkt) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct sk_buff *skb; struct rxe_send_wr *ibwr = &wqe->wr; - struct rxe_av *av; int pad = (-payload) & 0x3; int paylen; int solicited; @@ -374,21 +374,9 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* length from start of bth to end of icrc */ paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; - - /* pkt->hdr, port_num and mask are initialized in ifc layer */ - pkt->rxe = rxe; - pkt->opcode = opcode; - pkt->qp = qp; - pkt->psn = qp->req.psn; - pkt->mask = rxe_opcode[opcode].mask; - pkt->paylen = paylen; - pkt->wqe = wqe; + pkt->paylen = paylen; /* init skb */ - av = rxe_get_av(pkt); - if (!av) - return NULL; - skb = rxe_init_packet(rxe, av, paylen, pkt); if (unlikely(!skb)) return NULL; @@ -447,13 +435,13 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, return skb; } -static int finish_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, struct sk_buff *skb, - int paylen) +static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, + struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 paylen) { int err; - err = rxe_prepare(pkt, skb); + err = rxe_prepare(av, pkt, skb); if (err) return err; @@ -497,7 +485,7 @@ static void update_wqe_state(struct rxe_qp *qp, static void update_wqe_psn(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt, - int payload) + u32 payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -540,7 +528,7 @@ static void rollback_state(struct rxe_send_wqe *wqe, } static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) + struct rxe_pkt_info *pkt) { qp->req.opcode = pkt->opcode; @@ -608,19 +596,22 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) int rxe_requester(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_pkt_info pkt; struct sk_buff *skb; struct rxe_send_wqe *wqe; enum rxe_hdr_mask mask; - int payload; + u32 payload; int mtu; int opcode; int ret; struct rxe_send_wqe rollback_wqe; u32 rollback_psn; struct rxe_queue *q = qp->sq.queue; + struct rxe_ah *ah; + struct rxe_av *av; - rxe_add_ref(qp); + rxe_get(qp); next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -699,20 +690,34 @@ next_wqe: wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; __rxe_do_task(&qp->comp.task); - rxe_drop_ref(qp); + rxe_put(qp); return 0; } payload = mtu; } - skb = init_req_packet(qp, wqe, opcode, payload, &pkt); + pkt.rxe = rxe; + pkt.opcode = opcode; + pkt.qp = qp; + pkt.psn = qp->req.psn; + pkt.mask = rxe_opcode[opcode].mask; + pkt.wqe = wqe; + + av = rxe_get_av(&pkt, &ah); + if (unlikely(!av)) { + pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err_drop_ah; + } + + skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); wqe->status = IB_WC_LOC_QP_OP_ERR; - goto err; + goto err_drop_ah; } - ret = finish_packet(qp, wqe, &pkt, skb, payload); + ret = finish_packet(qp, av, wqe, &pkt, skb, payload); if (unlikely(ret)) { pr_debug("qp#%d Error during finish packet\n", qp_num(qp)); if (ret == -EFAULT) @@ -720,9 +725,12 @@ next_wqe: else wqe->status = IB_WC_LOC_QP_OP_ERR; kfree_skb(skb); - goto err; + goto err_drop_ah; } + if (ah) + rxe_put(ah); + /* * To prevent a race on wqe access between requester and completer, * wqe members state and psn need to be set before calling @@ -747,15 +755,18 @@ next_wqe: goto err; } - update_state(qp, wqe, &pkt, payload); + update_state(qp, wqe, &pkt); goto next_wqe; +err_drop_ah: + if (ah) + rxe_put(ah); err: wqe->state = wqe_state_error; __rxe_do_task(&qp->comp.task); exit: - rxe_drop_ref(qp); + rxe_put(qp); return -EAGAIN; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index e8f435fa6e4d..16fc7ea1298d 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -99,7 +99,7 @@ static inline enum resp_states get_req(struct rxe_qp *qp, if (qp->resp.state == QP_STATE_ERROR) { while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } @@ -297,21 +297,22 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) struct ib_event ev; unsigned int count; size_t size; + unsigned long flags; if (srq->error) return RESPST_ERR_RNR; - spin_lock_bh(&srq->rq.consumer_lock); + spin_lock_irqsave(&srq->rq.consumer_lock, flags); wqe = queue_head(q, QUEUE_TYPE_FROM_CLIENT); if (!wqe) { - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); return RESPST_ERR_RNR; } /* don't trust user space data */ if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); return RESPST_ERR_MALFORMED_WQE; } @@ -327,11 +328,11 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) goto event; } - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); return RESPST_CHK_LENGTH; event: - spin_unlock_bh(&srq->rq.consumer_lock); + spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); ev.device = qp->ibqp.device; ev.element.srq = qp->ibqp.srq; ev.event = IB_EVENT_SRQ_LIMIT_REACHED; @@ -463,8 +464,8 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (mw->access & IB_ZERO_BASED) qp->resp.offset = mw->addr; - rxe_drop_ref(mw); - rxe_add_ref(mr); + rxe_put(mw); + rxe_get(mr); } else { mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); if (!mr) { @@ -507,9 +508,9 @@ static enum resp_states check_rkey(struct rxe_qp *qp, err: if (mr) - rxe_drop_ref(mr); + rxe_put(mr); if (mw) - rxe_drop_ref(mw); + rxe_put(mw); return state; } @@ -632,7 +633,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, if (ack->mask & RXE_ATMACK_MASK) atmack_set_orig(ack, qp->resp.atomic_orig); - err = rxe_prepare(ack, skb); + err = rxe_prepare(&qp->pri_av, ack, skb); if (err) { kfree_skb(skb); return NULL; @@ -641,6 +642,78 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, return skb; } +static struct resp_res *rxe_prepare_read_res(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + struct resp_res *res; + u32 pkts; + + res = &qp->resp.resources[qp->resp.res_head]; + rxe_advance_resp_resource(qp); + free_rd_atomic_resource(qp, res); + + res->type = RXE_READ_MASK; + res->replay = 0; + res->read.va = qp->resp.va + qp->resp.offset; + res->read.va_org = qp->resp.va + qp->resp.offset; + res->read.resid = qp->resp.resid; + res->read.length = qp->resp.resid; + res->read.rkey = qp->resp.rkey; + + pkts = max_t(u32, (reth_len(pkt) + qp->mtu - 1)/qp->mtu, 1); + res->first_psn = pkt->psn; + res->cur_psn = pkt->psn; + res->last_psn = (pkt->psn + pkts - 1) & BTH_PSN_MASK; + + res->state = rdatm_res_state_new; + + return res; +} + +/** + * rxe_recheck_mr - revalidate MR from rkey and get a reference + * @qp: the qp + * @rkey: the rkey + * + * This code allows the MR to be invalidated or deregistered or + * the MW if one was used to be invalidated or deallocated. + * It is assumed that the access permissions if originally good + * are OK and the mappings to be unchanged. + * + * Return: mr on success else NULL + */ +static struct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + struct rxe_mr *mr; + struct rxe_mw *mw; + + if (rkey_is_mw(rkey)) { + mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); + if (!mw || mw->rkey != rkey) + return NULL; + + if (mw->state != RXE_MW_STATE_VALID) { + rxe_put(mw); + return NULL; + } + + mr = mw->mr; + rxe_put(mw); + } else { + mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); + if (!mr || mr->rkey != rkey) + return NULL; + } + + if (mr->state != RXE_MR_STATE_VALID) { + rxe_put(mr); + return NULL; + } + + return mr; +} + /* RDMA read response. If res is not NULL, then we have a current RDMA request * being processed or replayed. */ @@ -655,53 +728,26 @@ static enum resp_states read_reply(struct rxe_qp *qp, int opcode; int err; struct resp_res *res = qp->resp.res; + struct rxe_mr *mr; if (!res) { - /* This is the first time we process that request. Get a - * resource - */ - res = &qp->resp.resources[qp->resp.res_head]; - - free_rd_atomic_resource(qp, res); - rxe_advance_resp_resource(qp); - - res->type = RXE_READ_MASK; - res->replay = 0; - - res->read.va = qp->resp.va + - qp->resp.offset; - res->read.va_org = qp->resp.va + - qp->resp.offset; - - res->first_psn = req_pkt->psn; - - if (reth_len(req_pkt)) { - res->last_psn = (req_pkt->psn + - (reth_len(req_pkt) + mtu - 1) / - mtu - 1) & BTH_PSN_MASK; - } else { - res->last_psn = res->first_psn; - } - res->cur_psn = req_pkt->psn; - - res->read.resid = qp->resp.resid; - res->read.length = qp->resp.resid; - res->read.rkey = qp->resp.rkey; - - /* note res inherits the reference to mr from qp */ - res->read.mr = qp->resp.mr; - qp->resp.mr = NULL; - - qp->resp.res = res; - res->state = rdatm_res_state_new; + res = rxe_prepare_read_res(qp, req_pkt); + qp->resp.res = res; } if (res->state == rdatm_res_state_new) { + mr = qp->resp.mr; + qp->resp.mr = NULL; + if (res->read.resid <= mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; else opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST; } else { + mr = rxe_recheck_mr(qp, res->read.rkey); + if (!mr) + return RESPST_ERR_RKEY_VIOLATION; + if (res->read.resid > mtu) opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE; else @@ -717,10 +763,12 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (!skb) return RESPST_ERR_RNR; - err = rxe_mr_copy(res->read.mr, res->read.va, payload_addr(&ack_pkt), + err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), payload, RXE_FROM_MR_OBJ); if (err) pr_err("Failed copying memory\n"); + if (mr) + rxe_put(mr); if (bth_pad(&ack_pkt)) { u8 *pad = payload_addr(&ack_pkt) + payload; @@ -814,6 +862,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_ERR_INVALIDATE_RKEY; } + if (pkt->mask & RXE_END_MASK) + /* We successfully processed this new request. */ + qp->resp.msn++; + /* next expected psn, read handles this separately */ qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; qp->resp.ack_psn = qp->resp.psn; @@ -821,11 +873,9 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.opcode = pkt->opcode; qp->resp.status = IB_WC_SUCCESS; - if (pkt->mask & RXE_COMP_MASK) { - /* We successfully processed this new request. */ - qp->resp.msn++; + if (pkt->mask & RXE_COMP_MASK) return RESPST_COMPLETE; - } else if (qp_type(qp) == IB_QPT_RC) + else if (qp_type(qp) == IB_QPT_RC) return RESPST_ACKNOWLEDGE; else return RESPST_CLEANUP; @@ -987,7 +1037,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, rc = rxe_xmit_packet(qp, &ack_pkt, skb); if (rc) { pr_err_ratelimited("Failed sending ack\n"); - rxe_drop_ref(qp); + rxe_put(qp); } out: return rc; @@ -1016,13 +1066,13 @@ static enum resp_states cleanup(struct rxe_qp *qp, if (pkt) { skb = skb_dequeue(&qp->req_pkts); - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -1166,7 +1216,7 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp) } if (qp->resp.mr) { - rxe_drop_ref(qp->resp.mr); + rxe_put(qp->resp.mr); qp->resp.mr = NULL; } @@ -1180,7 +1230,7 @@ static void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) struct rxe_queue *q = qp->rq.queue; while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); + rxe_put(qp); kfree_skb(skb); ib_device_put(qp->ibqp.device); } @@ -1200,7 +1250,7 @@ int rxe_responder(void *arg) struct rxe_pkt_info *pkt = NULL; int ret = 0; - rxe_add_ref(qp); + rxe_get(qp); qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED; @@ -1387,6 +1437,6 @@ int rxe_responder(void *arg) exit: ret = -EAGAIN; done: - rxe_drop_ref(qp); + rxe_put(qp); return ret; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 915ad6664321..67184b0281a0 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -115,7 +115,7 @@ static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) { struct rxe_ucontext *uc = to_ruc(ibuc); - rxe_drop_ref(uc); + rxe_put(uc); } static int rxe_port_immutable(struct ib_device *dev, u32 port_num, @@ -149,7 +149,7 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_pd *pd = to_rpd(ibpd); - rxe_drop_ref(pd); + rxe_put(pd); return 0; } @@ -181,7 +181,6 @@ static int rxe_create_ah(struct ib_ah *ibah, return err; /* create index > 0 */ - rxe_add_index(ah); ah->ah_num = ah->elem.index; if (uresp) { @@ -189,8 +188,7 @@ static int rxe_create_ah(struct ib_ah *ibah, err = copy_to_user(&uresp->ah_num, &ah->ah_num, sizeof(uresp->ah_num)); if (err) { - rxe_drop_index(ah); - rxe_drop_ref(ah); + rxe_put(ah); return -EFAULT; } } else if (ah->is_user) { @@ -230,8 +228,7 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); - rxe_drop_index(ah); - rxe_drop_ref(ah); + rxe_put(ah); return 0; } @@ -306,7 +303,7 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, if (err) goto err1; - rxe_add_ref(pd); + rxe_get(pd); srq->pd = pd; err = rxe_srq_from_init(rxe, srq, init, udata, uresp); @@ -316,8 +313,8 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, return 0; err2: - rxe_drop_ref(pd); - rxe_drop_ref(srq); + rxe_put(pd); + rxe_put(srq); err1: return err; } @@ -374,8 +371,8 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) if (srq->rq.queue) rxe_queue_cleanup(srq->rq.queue); - rxe_drop_ref(srq->pd); - rxe_drop_ref(srq); + rxe_put(srq->pd); + rxe_put(srq); return 0; } @@ -384,8 +381,9 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, { int err = 0; struct rxe_srq *srq = to_rsrq(ibsrq); + unsigned long flags; - spin_lock_bh(&srq->rq.producer_lock); + spin_lock_irqsave(&srq->rq.producer_lock, flags); while (wr) { err = post_one_recv(&srq->rq, wr); @@ -394,7 +392,7 @@ static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, wr = wr->next; } - spin_unlock_bh(&srq->rq.producer_lock); + spin_unlock_irqrestore(&srq->rq.producer_lock, flags); if (err) *bad_wr = wr; @@ -437,7 +435,6 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init, if (err) return err; - rxe_add_index(qp); err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibqp->pd, udata); if (err) goto qp_init; @@ -445,8 +442,7 @@ static int rxe_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init, return 0; qp_init: - rxe_drop_index(qp); - rxe_drop_ref(qp); + rxe_put(qp); return err; } @@ -493,10 +489,14 @@ static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct rxe_qp *qp = to_rqp(ibqp); + int ret; + + ret = rxe_qp_chk_destroy(qp); + if (ret) + return ret; rxe_qp_destroy(qp); - rxe_drop_index(qp); - rxe_drop_ref(qp); + rxe_put(qp); return 0; } @@ -638,18 +638,19 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, int err; struct rxe_sq *sq = &qp->sq; struct rxe_send_wqe *send_wqe; + unsigned long flags; int full; err = validate_send_wr(qp, ibwr, mask, length); if (err) return err; - spin_lock_bh(&qp->sq.sq_lock); + spin_lock_irqsave(&qp->sq.sq_lock, flags); full = queue_full(sq->queue, QUEUE_TYPE_TO_DRIVER); if (unlikely(full)) { - spin_unlock_bh(&qp->sq.sq_lock); + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return -ENOMEM; } @@ -658,7 +659,7 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, queue_advance_producer(sq->queue, QUEUE_TYPE_TO_DRIVER); - spin_unlock_bh(&qp->sq.sq_lock); + spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return 0; } @@ -738,6 +739,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, int err = 0; struct rxe_qp *qp = to_rqp(ibqp); struct rxe_rq *rq = &qp->rq; + unsigned long flags; if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { *bad_wr = wr; @@ -751,7 +753,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, goto err1; } - spin_lock_bh(&rq->producer_lock); + spin_lock_irqsave(&rq->producer_lock, flags); while (wr) { err = post_one_recv(rq, wr); @@ -762,7 +764,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, wr = wr->next; } - spin_unlock_bh(&rq->producer_lock); + spin_unlock_irqrestore(&rq->producer_lock, flags); if (qp->resp.state == QP_STATE_ERROR) rxe_run_task(&qp->resp.task, 1); @@ -807,7 +809,7 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) rxe_cq_disable(cq); - rxe_drop_ref(cq); + rxe_put(cq); return 0; } @@ -843,8 +845,9 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) int i; struct rxe_cq *cq = to_rcq(ibcq); struct rxe_cqe *cqe; + unsigned long flags; - spin_lock_bh(&cq->cq_lock); + spin_lock_irqsave(&cq->cq_lock, flags); for (i = 0; i < num_entries; i++) { cqe = queue_head(cq->queue, QUEUE_TYPE_FROM_DRIVER); if (!cqe) @@ -853,7 +856,7 @@ static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) memcpy(wc++, &cqe->ibwc, sizeof(*wc)); queue_advance_consumer(cq->queue, QUEUE_TYPE_FROM_DRIVER); } - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, flags); return i; } @@ -873,8 +876,9 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) struct rxe_cq *cq = to_rcq(ibcq); int ret = 0; int empty; + unsigned long irq_flags; - spin_lock_bh(&cq->cq_lock); + spin_lock_irqsave(&cq->cq_lock, irq_flags); if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = flags & IB_CQ_SOLICITED_MASK; @@ -883,7 +887,7 @@ static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !empty) ret = 1; - spin_unlock_bh(&cq->cq_lock); + spin_unlock_irqrestore(&cq->cq_lock, irq_flags); return ret; } @@ -898,8 +902,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) if (!mr) return ERR_PTR(-ENOMEM); - rxe_add_index(mr); - rxe_add_ref(pd); + rxe_get(pd); rxe_mr_init_dma(pd, access, mr); return &mr->ibmr; @@ -922,9 +925,8 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, goto err2; } - rxe_add_index(mr); - rxe_add_ref(pd); + rxe_get(pd); err = rxe_mr_init_user(pd, start, length, iova, access, mr); if (err) @@ -933,9 +935,8 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, return &mr->ibmr; err3: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_put(pd); + rxe_put(mr); err2: return ERR_PTR(err); } @@ -957,9 +958,7 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, goto err1; } - rxe_add_index(mr); - - rxe_add_ref(pd); + rxe_get(pd); err = rxe_mr_init_fast(pd, max_num_sg, mr); if (err) @@ -968,9 +967,8 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, return &mr->ibmr; err2: - rxe_drop_ref(pd); - rxe_drop_index(mr); - rxe_drop_ref(mr); + rxe_put(pd); + rxe_put(mr); err1: return ERR_PTR(err); } @@ -999,32 +997,6 @@ static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, return n; } -static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) -{ - int err; - struct rxe_dev *rxe = to_rdev(ibqp->device); - struct rxe_qp *qp = to_rqp(ibqp); - struct rxe_mc_grp *grp; - - /* takes a ref on grp if successful */ - err = rxe_mcast_get_grp(rxe, mgid, &grp); - if (err) - return err; - - err = rxe_mcast_add_grp_elem(rxe, qp, grp); - - rxe_drop_ref(grp); - return err; -} - -static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) -{ - struct rxe_dev *rxe = to_rdev(ibqp->device); - struct rxe_qp *qp = to_rqp(ibqp); - - return rxe_mcast_drop_grp_elem(rxe, qp, mgid); -} - static ssize_t parent_show(struct device *device, struct device_attribute *attr, char *buf) { diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index e48969e8d4c8..e7eff1ca75e9 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -157,7 +157,6 @@ struct resp_res { struct sk_buff *skb; } atomic; struct { - struct rxe_mr *mr; u64 va_org; u32 rkey; u32 length; @@ -232,9 +231,7 @@ struct rxe_qp { struct rxe_av pri_av; struct rxe_av alt_av; - /* list of mcast groups qp has joined (for cleanup) */ - struct list_head grp_list; - spinlock_t grp_lock; /* guard grp_list */ + atomic_t mcg_num; struct sk_buff_head req_pkts; struct sk_buff_head resp_pkts; @@ -353,23 +350,20 @@ struct rxe_mw { u64 length; }; -struct rxe_mc_grp { - struct rxe_pool_elem elem; - spinlock_t mcg_lock; /* guard group */ +struct rxe_mcg { + struct rb_node node; + struct kref ref_cnt; struct rxe_dev *rxe; struct list_head qp_list; union ib_gid mgid; - int num_qp; + atomic_t qp_num; u32 qkey; u16 pkey; }; -struct rxe_mc_elem { - struct rxe_pool_elem elem; +struct rxe_mca { struct list_head qp_list; - struct list_head grp_list; struct rxe_qp *qp; - struct rxe_mc_grp *grp; }; struct rxe_port { @@ -401,7 +395,12 @@ struct rxe_dev { struct rxe_pool mr_pool; struct rxe_pool mw_pool; struct rxe_pool mc_grp_pool; - struct rxe_pool mc_elem_pool; + + /* multicast support */ + spinlock_t mcg_lock; + struct rb_root mcg_tree; + atomic_t mcg_num; + atomic_t mcg_attach; spinlock_t pending_lock; /* guard pending_mmaps */ struct list_head pending_mmaps; @@ -482,6 +481,4 @@ static inline struct rxe_pd *rxe_mw_pd(struct rxe_mw *mw) int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); -void rxe_mc_cleanup(struct rxe_pool_elem *elem); - #endif /* RXE_VERBS_H */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index 5b05cf3837da..ea16ba5d8da6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -32,7 +32,6 @@ #include #include /* For ARPHRD_xxx */ -#include #include #include "ipoib.h" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 0322dc75396f..4bd161e86f8d 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -30,7 +30,6 @@ * SOFTWARE. */ -#include #include #include diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 20af46c4e954..7e4faf9c5e9e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -203,12 +203,12 @@ struct iser_reg_resources; * * @sge: memory region sg element * @rkey: memory region remote key - * @mem_h: pointer to registration context (FMR/Fastreg) + * @desc: pointer to fast registration context */ struct iser_mem_reg { - struct ib_sge sge; - u32 rkey; - void *mem_h; + struct ib_sge sge; + u32 rkey; + struct iser_fr_desc *desc; }; enum iser_desc_type { @@ -531,13 +531,12 @@ int iser_post_recvm(struct iser_conn *iser_conn, int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc); int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir); void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum dma_data_direction dir); + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 2490150d3085..bd5f3b5e1727 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -52,30 +52,17 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) struct iser_mem_reg *mem_reg; int err; struct iser_ctrl *hdr = &iser_task->desc.iser_header; - struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN]; err = iser_dma_map_task_data(iser_task, - buf_in, ISER_DIR_IN, DMA_FROM_DEVICE); if (err) return err; - if (scsi_prot_sg_count(iser_task->sc)) { - struct iser_data_buf *pbuf_in = &iser_task->prot[ISER_DIR_IN]; - - err = iser_dma_map_task_data(iser_task, - pbuf_in, - ISER_DIR_IN, - DMA_FROM_DEVICE); - if (err) - return err; - } - err = iser_reg_mem_fastreg(iser_task, ISER_DIR_IN, false); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); - return err; + goto out_err; } mem_reg = &iser_task->rdma_reg[ISER_DIR_IN]; @@ -88,6 +75,10 @@ static int iser_prepare_read_cmd(struct iscsi_task *task) (unsigned long long)mem_reg->sge.addr); return 0; + +out_err: + iser_dma_unmap_task_data(iser_task, ISER_DIR_IN, DMA_FROM_DEVICE); + return err; } /* Register user buffer memory and initialize passive rdma @@ -106,28 +97,16 @@ static int iser_prepare_write_cmd(struct iscsi_task *task, unsigned int imm_sz, struct ib_sge *tx_dsg = &iser_task->desc.tx_sg[1]; err = iser_dma_map_task_data(iser_task, - buf_out, ISER_DIR_OUT, DMA_TO_DEVICE); if (err) return err; - if (scsi_prot_sg_count(iser_task->sc)) { - struct iser_data_buf *pbuf_out = &iser_task->prot[ISER_DIR_OUT]; - - err = iser_dma_map_task_data(iser_task, - pbuf_out, - ISER_DIR_OUT, - DMA_TO_DEVICE); - if (err) - return err; - } - err = iser_reg_mem_fastreg(iser_task, ISER_DIR_OUT, buf_out->data_len == imm_sz); - if (err != 0) { + if (err) { iser_err("Failed to register write cmd RDMA mem\n"); - return err; + goto out_err; } mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT]; @@ -154,6 +133,10 @@ static int iser_prepare_write_cmd(struct iscsi_task *task, unsigned int imm_sz, } return 0; + +out_err: + iser_dma_unmap_task_data(iser_task, ISER_DIR_OUT, DMA_TO_DEVICE); + return err; } /* creates a new tx descriptor and adds header regd buffer */ @@ -619,13 +602,13 @@ static int iser_check_remote_inv(struct iser_conn *iser_conn, struct ib_wc *wc, struct iser_fr_desc *desc; if (iser_task->dir[ISER_DIR_IN]) { - desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h; + desc = iser_task->rdma_reg[ISER_DIR_IN].desc; if (unlikely(iser_inv_desc(desc, rkey))) return -EINVAL; } if (iser_task->dir[ISER_DIR_OUT]) { - desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h; + desc = iser_task->rdma_reg[ISER_DIR_OUT].desc; if (unlikely(iser_inv_desc(desc, rkey))) return -EINVAL; } @@ -740,27 +723,16 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { - int prot_count = scsi_prot_sg_count(iser_task->sc); if (iser_task->dir[ISER_DIR_IN]) { iser_unreg_mem_fastreg(iser_task, ISER_DIR_IN); - iser_dma_unmap_task_data(iser_task, - &iser_task->data[ISER_DIR_IN], + iser_dma_unmap_task_data(iser_task, ISER_DIR_IN, DMA_FROM_DEVICE); - if (prot_count) - iser_dma_unmap_task_data(iser_task, - &iser_task->prot[ISER_DIR_IN], - DMA_FROM_DEVICE); } if (iser_task->dir[ISER_DIR_OUT]) { iser_unreg_mem_fastreg(iser_task, ISER_DIR_OUT); - iser_dma_unmap_task_data(iser_task, - &iser_task->data[ISER_DIR_OUT], + iser_dma_unmap_task_data(iser_task, ISER_DIR_OUT, DMA_TO_DEVICE); - if (prot_count) - iser_dma_unmap_task_data(iser_task, - &iser_task->prot[ISER_DIR_OUT], - DMA_TO_DEVICE); } } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 660982625488..29ae2c6a250a 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -30,7 +30,6 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include #include #include #include @@ -71,10 +70,10 @@ static void iser_reg_desc_put_fr(struct ib_conn *ib_conn, } int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir) { + struct iser_data_buf *data = &iser_task->data[iser_dir]; struct ib_device *dev; iser_task->dir[iser_dir] = 1; @@ -85,17 +84,40 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, iser_err("dma_map_sg failed!!!\n"); return -EINVAL; } + + if (scsi_prot_sg_count(iser_task->sc)) { + struct iser_data_buf *pdata = &iser_task->prot[iser_dir]; + + pdata->dma_nents = ib_dma_map_sg(dev, pdata->sg, pdata->size, dma_dir); + if (unlikely(pdata->dma_nents == 0)) { + iser_err("protection dma_map_sg failed!!!\n"); + goto out_unmap; + } + } + return 0; + +out_unmap: + ib_dma_unmap_sg(dev, data->sg, data->size, dma_dir); + return -EINVAL; } + void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, - struct iser_data_buf *data, - enum dma_data_direction dir) + enum iser_data_dir iser_dir, + enum dma_data_direction dma_dir) { + struct iser_data_buf *data = &iser_task->data[iser_dir]; struct ib_device *dev; dev = iser_task->iser_conn->ib_conn.device->ib_device; - ib_dma_unmap_sg(dev, data->sg, data->size, dir); + ib_dma_unmap_sg(dev, data->sg, data->size, dma_dir); + + if (scsi_prot_sg_count(iser_task->sc)) { + struct iser_data_buf *pdata = &iser_task->prot[iser_dir]; + + ib_dma_unmap_sg(dev, pdata->sg, pdata->size, dma_dir); + } } static int iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, @@ -130,7 +152,7 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, struct iser_fr_desc *desc; struct ib_mr_status mr_status; - desc = reg->mem_h; + desc = reg->desc; if (!desc) return; @@ -147,8 +169,8 @@ void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, ib_check_mr_status(desc->rsc.sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); } - iser_reg_desc_put_fr(&iser_task->iser_conn->ib_conn, reg->mem_h); - reg->mem_h = NULL; + iser_reg_desc_put_fr(&iser_task->iser_conn->ib_conn, reg->desc); + reg->desc = NULL; } static void iser_set_dif_domain(struct scsi_cmnd *sc, @@ -327,40 +349,26 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return 0; } -static int iser_reg_data_sg(struct iscsi_iser_task *task, - struct iser_data_buf *mem, - struct iser_fr_desc *desc, bool use_dma_key, - struct iser_mem_reg *reg) -{ - struct iser_device *device = task->iser_conn->ib_conn.device; - - if (use_dma_key) - return iser_reg_dma(device, mem, reg); - - return iser_fast_reg_mr(task, mem, &desc->rsc, reg); -} - int iser_reg_mem_fastreg(struct iscsi_iser_task *task, enum iser_data_dir dir, bool all_imm) { struct ib_conn *ib_conn = &task->iser_conn->ib_conn; + struct iser_device *device = ib_conn->device; struct iser_data_buf *mem = &task->data[dir]; struct iser_mem_reg *reg = &task->rdma_reg[dir]; - struct iser_fr_desc *desc = NULL; + struct iser_fr_desc *desc; bool use_dma_key; int err; use_dma_key = mem->dma_nents == 1 && (all_imm || !iser_always_reg) && scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL; + if (use_dma_key) + return iser_reg_dma(device, mem, reg); - if (!use_dma_key) { - desc = iser_reg_desc_get_fr(ib_conn); - reg->mem_h = desc; - } - + desc = iser_reg_desc_get_fr(ib_conn); if (scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL) { - err = iser_reg_data_sg(task, mem, desc, use_dma_key, reg); + err = iser_fast_reg_mr(task, mem, &desc->rsc, reg); if (unlikely(err)) goto err_reg; } else { @@ -372,11 +380,12 @@ int iser_reg_mem_fastreg(struct iscsi_iser_task *task, desc->sig_protected = true; } + reg->desc = desc; + return 0; err_reg: - if (desc) - iser_reg_desc_put_fr(ib_conn, desc); + iser_reg_desc_put_fr(ib_conn, desc); return err; } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 8bf87b073d9b..5dbad68c7390 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -32,7 +32,6 @@ * SOFTWARE. */ #include -#include #include #include @@ -905,7 +904,7 @@ u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir, sector_t *sector) { struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir]; - struct iser_fr_desc *desc = reg->mem_h; + struct iser_fr_desc *desc = reg->desc; unsigned long sector_size = iser_task->sc->device->sector_size; struct ib_mr_status mr_status; int ret; diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c index aeff68f582d3..071f35711468 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c @@ -50,7 +50,6 @@ * netdev functionality. */ -#include #include #include diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index b4fa473b7888..d3c436ead694 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -156,8 +156,7 @@ static DEVICE_ATTR_RW(mpath_policy); static ssize_t add_path_show(struct device *dev, struct device_attribute *attr, char *page) { - return sysfs_emit( - page, + return sysfs_emit(page, "Usage: echo [@] > %s\n\n*addr ::= [ ip: | gid: ]\n", attr->attr.name); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 759b85f03331..c2c860d0c56e 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -297,6 +297,7 @@ static bool rtrs_clt_change_state_from_to(struct rtrs_clt_path *clt_path, return changed; } +static void rtrs_clt_stop_and_destroy_conns(struct rtrs_clt_path *clt_path); static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) { struct rtrs_clt_path *clt_path = to_clt_path(con->c.path); @@ -304,16 +305,7 @@ static void rtrs_rdma_error_recovery(struct rtrs_clt_con *con) if (rtrs_clt_change_state_from_to(clt_path, RTRS_CLT_CONNECTED, RTRS_CLT_RECONNECTING)) { - struct rtrs_clt_sess *clt = clt_path->clt; - unsigned int delay_ms; - - /* - * Normal scenario, reconnect if we were successfully connected - */ - delay_ms = clt->reconnect_delay_sec * 1000; - queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, - msecs_to_jiffies(delay_ms + - prandom_u32() % RTRS_RECONNECT_SEED)); + queue_work(rtrs_wq, &clt_path->err_recovery_work); } else { /* * Error can happen just on establishing new connection, @@ -917,7 +909,7 @@ static inline void path_it_deinit(struct path_it *it) { struct list_head *skip, *tmp; /* - * The skip_list is used only for the MIN_INFLIGHT policy. + * The skip_list is used only for the MIN_INFLIGHT and MIN_LATENCY policies. * We need to remove paths from it, so that next IO can insert * paths (->mp_skip_entry) into a skip_list again. */ @@ -1511,6 +1503,22 @@ static void rtrs_clt_init_hb(struct rtrs_clt_path *clt_path) static void rtrs_clt_reconnect_work(struct work_struct *work); static void rtrs_clt_close_work(struct work_struct *work); +static void rtrs_clt_err_recovery_work(struct work_struct *work) +{ + struct rtrs_clt_path *clt_path; + struct rtrs_clt_sess *clt; + int delay_ms; + + clt_path = container_of(work, struct rtrs_clt_path, err_recovery_work); + clt = clt_path->clt; + delay_ms = clt->reconnect_delay_sec * 1000; + rtrs_clt_stop_and_destroy_conns(clt_path); + queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, + msecs_to_jiffies(delay_ms + + prandom_u32() % + RTRS_RECONNECT_SEED)); +} + static struct rtrs_clt_path *alloc_path(struct rtrs_clt_sess *clt, const struct rtrs_addr *path, size_t con_num, u32 nr_poll_queues) @@ -1562,6 +1570,7 @@ static struct rtrs_clt_path *alloc_path(struct rtrs_clt_sess *clt, clt_path->state = RTRS_CLT_CONNECTING; atomic_set(&clt_path->connected_cnt, 0); INIT_WORK(&clt_path->close_work, rtrs_clt_close_work); + INIT_WORK(&clt_path->err_recovery_work, rtrs_clt_err_recovery_work); INIT_DELAYED_WORK(&clt_path->reconnect_dwork, rtrs_clt_reconnect_work); rtrs_clt_init_hb(clt_path); @@ -2326,6 +2335,7 @@ static void rtrs_clt_close_work(struct work_struct *work) clt_path = container_of(work, struct rtrs_clt_path, close_work); + cancel_work_sync(&clt_path->err_recovery_work); cancel_delayed_work_sync(&clt_path->reconnect_dwork); rtrs_clt_stop_and_destroy_conns(clt_path); rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CLOSED, NULL); @@ -2638,7 +2648,6 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) { struct rtrs_clt_path *clt_path; struct rtrs_clt_sess *clt; - unsigned int delay_ms; int err; clt_path = container_of(to_delayed_work(work), struct rtrs_clt_path, @@ -2655,8 +2664,6 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) } clt_path->reconnect_attempts++; - /* Stop everything */ - rtrs_clt_stop_and_destroy_conns(clt_path); msleep(RTRS_RECONNECT_BACKOFF); if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_CONNECTING, NULL)) { err = init_path(clt_path); @@ -2669,11 +2676,7 @@ static void rtrs_clt_reconnect_work(struct work_struct *work) reconnect_again: if (rtrs_clt_change_state_get_old(clt_path, RTRS_CLT_RECONNECTING, NULL)) { clt_path->stats->reconnects.fail_cnt++; - delay_ms = clt->reconnect_delay_sec * 1000; - queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, - msecs_to_jiffies(delay_ms + - prandom_u32() % - RTRS_RECONNECT_SEED)); + queue_work(rtrs_wq, &clt_path->err_recovery_work); } } @@ -2908,6 +2911,7 @@ int rtrs_clt_reconnect_from_sysfs(struct rtrs_clt_path *clt_path) &old_state); if (changed) { clt_path->reconnect_attempts = 0; + rtrs_clt_stop_and_destroy_conns(clt_path); queue_delayed_work(rtrs_wq, &clt_path->reconnect_dwork, 0); } if (changed || old_state == RTRS_CLT_RECONNECTING) { diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index d1b18a154ae0..f848c0392d98 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -134,6 +134,7 @@ struct rtrs_clt_path { struct rtrs_clt_io_req *reqs; struct delayed_work reconnect_dwork; struct work_struct close_work; + struct work_struct err_recovery_work; unsigned int reconnect_attempts; bool established; struct rtrs_rbuf *rbufs; diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 4da889103a5f..60fa0b0160f4 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -479,7 +479,6 @@ static int rtrs_str_to_sockaddr(const char *addr, size_t len, */ int sockaddr_to_str(const struct sockaddr *addr, char *buf, size_t len) { - switch (addr->sa_family) { case AF_IB: return scnprintf(buf, len, "gid:%pI6", diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index abccddeea1e3..55a575e2cace 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -92,6 +92,9 @@ enum srp_iu_type { }; /* + * RDMA adapter in the initiator system. + * + * @dev_list: List of RDMA ports associated with this RDMA adapter (srp_host). * @mr_page_mask: HCA memory registration page mask. * @mr_page_size: HCA memory registration page size. * @mr_max_size: Maximum size in bytes of a single FR registration request. @@ -109,6 +112,12 @@ struct srp_device { bool use_fast_reg; }; +/* + * One port of an RDMA adapter in the initiator system. + * + * @target_list: List of connected target ports (struct srp_target_port). + * @target_lock: Protects @target_list. + */ struct srp_host { struct srp_device *srp_dev; u8 port; @@ -183,7 +192,7 @@ struct srp_rdma_ch { }; /** - * struct srp_target_port + * struct srp_target_port - RDMA port in the SRP target system * @comp_vector: Completion vector used by the first RDMA channel created for * this target port. */ diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index a11850dd475d..2650a852eeaf 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -185,7 +185,7 @@ static inline void devm_nsio_disable(struct device *dev, } #endif -#ifdef CONFIG_PROVE_LOCKING +#ifdef CONFIG_PROVE_NVDIMM_LOCKING extern struct class *nd_class; enum { diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index f6135c93ce9d..c7a0ab0d298b 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -184,7 +184,7 @@ static int ceph_releasepage(struct page *page, gfp_t gfp) static void ceph_netfs_expand_readahead(struct netfs_read_request *rreq) { - struct inode *inode = rreq->mapping->host; + struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_file_layout *lo = &ci->i_layout; u32 blockoff; @@ -201,7 +201,7 @@ static void ceph_netfs_expand_readahead(struct netfs_read_request *rreq) static bool ceph_netfs_clamp_length(struct netfs_read_subrequest *subreq) { - struct inode *inode = subreq->rreq->mapping->host; + struct inode *inode = subreq->rreq->inode; struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); u64 objno, objoff; @@ -244,10 +244,63 @@ static void finish_netfs_read(struct ceph_osd_request *req) iput(req->r_inode); } +static bool ceph_netfs_issue_op_inline(struct netfs_read_subrequest *subreq) +{ + struct netfs_read_request *rreq = subreq->rreq; + struct inode *inode = rreq->inode; + struct ceph_mds_reply_info_parsed *rinfo; + struct ceph_mds_reply_info_in *iinfo; + struct ceph_mds_request *req; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); + struct ceph_inode_info *ci = ceph_inode(inode); + struct iov_iter iter; + ssize_t err = 0; + size_t len; + + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + __clear_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); + + if (subreq->start >= inode->i_size) + goto out; + + /* We need to fetch the inline data. */ + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto out; + } + req->r_ino1 = ci->i_vino; + req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA); + req->r_num_caps = 2; + + err = ceph_mdsc_do_request(mdsc, NULL, req); + if (err < 0) + goto out; + + rinfo = &req->r_reply_info; + iinfo = &rinfo->targeti; + if (iinfo->inline_version == CEPH_INLINE_NONE) { + /* The data got uninlined */ + ceph_mdsc_put_request(req); + return false; + } + + len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len); + iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len); + err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter); + if (err == 0) + err = -EFAULT; + + ceph_mdsc_put_request(req); +out: + netfs_subreq_terminated(subreq, err, false); + return true; +} + static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq) { struct netfs_read_request *rreq = subreq->rreq; - struct inode *inode = rreq->mapping->host; + struct inode *inode = rreq->inode; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_osd_request *req; @@ -258,6 +311,10 @@ static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq) int err = 0; u64 len = subreq->len; + if (ci->i_inline_version != CEPH_INLINE_NONE && + ceph_netfs_issue_op_inline(subreq)) + return; + req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len, 0, 1, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica, @@ -326,23 +383,9 @@ static int ceph_readpage(struct file *file, struct page *subpage) size_t len = folio_size(folio); u64 off = folio_file_pos(folio); - if (ci->i_inline_version != CEPH_INLINE_NONE) { - /* - * Uptodate inline data should have been added - * into page cache while getting Fcr caps. - */ - if (off == 0) { - folio_unlock(folio); - return -EINVAL; - } - zero_user_segment(&folio->page, 0, folio_size(folio)); - folio_mark_uptodate(folio); - folio_unlock(folio); - return 0; - } - - dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n", - vino.ino, vino.snap, file, off, len, folio, folio_index(folio)); + dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n inline %d", + vino.ino, vino.snap, file, off, len, folio, folio_index(folio), + ci->i_inline_version != CEPH_INLINE_NONE); return netfs_readpage(file, folio, &ceph_netfs_read_ops, NULL); } @@ -1281,45 +1324,11 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = file_inode(file); - struct ceph_inode_info *ci = ceph_inode(inode); struct folio *folio = NULL; - pgoff_t index = pos >> PAGE_SHIFT; int r; - /* - * Uninlining should have already been done and everything updated, EXCEPT - * for inline_version sent to the MDS. - */ - if (ci->i_inline_version != CEPH_INLINE_NONE) { - unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; - if (aop_flags & AOP_FLAG_NOFS) - fgp_flags |= FGP_NOFS; - folio = __filemap_get_folio(mapping, index, fgp_flags, - mapping_gfp_mask(mapping)); - if (!folio) - return -ENOMEM; - - /* - * The inline_version on a new inode is set to 1. If that's the - * case, then the folio is brand new and isn't yet Uptodate. - */ - r = 0; - if (index == 0 && ci->i_inline_version != 1) { - if (!folio_test_uptodate(folio)) { - WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n", - ci->i_inline_version); - r = -EINVAL; - } - goto out; - } - zero_user_segment(&folio->page, 0, folio_size(folio)); - folio_mark_uptodate(folio); - goto out; - } - r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL, &ceph_netfs_read_ops, NULL); -out: if (r == 0) folio_wait_fscache(folio); if (r < 0) { @@ -1515,19 +1524,6 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) sb_start_pagefault(inode->i_sb); ceph_block_sigs(&oldset); - if (ci->i_inline_version != CEPH_INLINE_NONE) { - struct page *locked_page = NULL; - if (off == 0) { - lock_page(page); - locked_page = page; - } - err = ceph_uninline_data(vma->vm_file, locked_page); - if (locked_page) - unlock_page(locked_page); - if (err < 0) - goto out_free; - } - if (off + thp_size(page) <= size) len = thp_size(page); else @@ -1584,11 +1580,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) ceph_put_snap_context(snapc); } while (err == 0); - if (ret == VM_FAULT_LOCKED || - ci->i_inline_version != CEPH_INLINE_NONE) { + if (ret == VM_FAULT_LOCKED) { int dirty; spin_lock(&ci->i_ceph_lock); - ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); spin_unlock(&ci->i_ceph_lock); @@ -1652,16 +1646,30 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, } } -int ceph_uninline_data(struct file *filp, struct page *locked_page) +int ceph_uninline_data(struct file *file) { - struct inode *inode = file_inode(filp); + struct inode *inode = file_inode(file); struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_osd_request *req; - struct page *page = NULL; - u64 len, inline_version; + struct ceph_cap_flush *prealloc_cf; + struct folio *folio = NULL; + u64 inline_version = CEPH_INLINE_NONE; + struct page *pages[1]; int err = 0; - bool from_pagecache = false; + u64 len; + + prealloc_cf = ceph_alloc_cap_flush(); + if (!prealloc_cf) + return -ENOMEM; + + folio = read_mapping_folio(inode->i_mapping, 0, file); + if (IS_ERR(folio)) { + err = PTR_ERR(folio); + goto out; + } + + folio_lock(folio); spin_lock(&ci->i_ceph_lock); inline_version = ci->i_inline_version; @@ -1672,45 +1680,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) if (inline_version == 1 || /* initial version, no data */ inline_version == CEPH_INLINE_NONE) - goto out; + goto out_unlock; - if (locked_page) { - page = locked_page; - WARN_ON(!PageUptodate(page)); - } else if (ceph_caps_issued(ci) & - (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) { - page = find_get_page(inode->i_mapping, 0); - if (page) { - if (PageUptodate(page)) { - from_pagecache = true; - lock_page(page); - } else { - put_page(page); - page = NULL; - } - } - } - - if (page) { - len = i_size_read(inode); - if (len > PAGE_SIZE) - len = PAGE_SIZE; - } else { - page = __page_cache_alloc(GFP_NOFS); - if (!page) { - err = -ENOMEM; - goto out; - } - err = __ceph_do_getattr(inode, page, - CEPH_STAT_CAP_INLINE_DATA, true); - if (err < 0) { - /* no inline data */ - if (err == -ENODATA) - err = 0; - goto out; - } - len = err; - } + len = i_size_read(inode); + if (len > folio_size(folio)) + len = folio_size(folio); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), 0, &len, 0, 1, @@ -1718,7 +1692,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) NULL, 0, 0, false); if (IS_ERR(req)) { err = PTR_ERR(req); - goto out; + goto out_unlock; } req->r_mtime = inode->i_mtime; @@ -1727,7 +1701,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) err = ceph_osdc_wait_request(&fsc->client->osdc, req); ceph_osdc_put_request(req); if (err < 0) - goto out; + goto out_unlock; req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, ceph_vino(inode), 0, &len, 1, 3, @@ -1736,10 +1710,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ci->i_truncate_size, false); if (IS_ERR(req)) { err = PTR_ERR(req); - goto out; + goto out_unlock; } - osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); + pages[0] = folio_page(folio, 0); + osd_req_op_extent_osd_data_pages(req, 1, pages, len, 0, false, false); { __le64 xattr_buf = cpu_to_le64(inline_version); @@ -1749,7 +1724,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) CEPH_OSD_CMPXATTR_OP_GT, CEPH_OSD_CMPXATTR_MODE_U64); if (err) - goto out_put; + goto out_put_req; } { @@ -1760,7 +1735,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) "inline_version", xattr_buf, xattr_len, 0, 0); if (err) - goto out_put; + goto out_put_req; } req->r_mtime = inode->i_mtime; @@ -1771,19 +1746,28 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, req->r_end_latency, len, err); -out_put: + if (!err) { + int dirty; + + /* Set to CAP_INLINE_NONE and dirty the caps */ + down_read(&fsc->mdsc->snap_rwsem); + spin_lock(&ci->i_ceph_lock); + ci->i_inline_version = CEPH_INLINE_NONE; + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); + spin_unlock(&ci->i_ceph_lock); + up_read(&fsc->mdsc->snap_rwsem); + if (dirty) + __mark_inode_dirty(inode, dirty); + } +out_put_req: ceph_osdc_put_request(req); if (err == -ECANCELED) err = 0; +out_unlock: + folio_unlock(folio); + folio_put(folio); out: - if (page && page != locked_page) { - if (from_pagecache) { - unlock_page(page); - put_page(page); - } else - __free_pages(page, 0); - } - + ceph_free_cap_flush(prealloc_cf); dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", inode, ceph_vinop(inode), inline_version, err); return err; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index b472cd066d1c..f1ad6884d4da 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1915,6 +1915,13 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, ceph_get_mds_session(session); spin_lock(&ci->i_ceph_lock); + if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { + /* Don't send messages until we get async create reply */ + spin_unlock(&ci->i_ceph_lock); + ceph_put_mds_session(session); + return; + } + if (ci->i_ceph_flags & CEPH_I_FLUSH) flags |= CHECK_CAPS_FLUSH; retry: @@ -2409,6 +2416,9 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) dout("write_inode %p wait=%d\n", inode, wait); ceph_fscache_unpin_writeback(inode, wbc); if (wait) { + err = ceph_wait_on_async_create(inode); + if (err) + return err; dirty = try_flush_caps(inode, &flush_tid); if (dirty) err = wait_event_interruptible(ci->i_cap_wq, @@ -2439,6 +2449,10 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, u64 first_tid = 0; u64 last_snap_flush = 0; + /* Don't do anything until create reply comes in */ + if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) + return; + ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) { @@ -4152,7 +4166,6 @@ void ceph_handle_caps(struct ceph_mds_session *session, /* lookup ino */ inode = ceph_find_inode(mdsc->fsc->sb, vino); - ci = ceph_inode(inode); dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino, vino.snap, inode); @@ -4178,6 +4191,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, } goto flush_cap_releases; } + ci = ceph_inode(inode); /* these will work even if we don't have a cap yet */ switch (op) { diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 3cf7c9c1085b..bec3c4549c07 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -175,7 +175,7 @@ static int metrics_latency_show(struct seq_file *s, void *p) struct ceph_fs_client *fsc = s->private; struct ceph_client_metric *cm = &fsc->mdsc->metric; struct ceph_metric *m; - s64 total, sum, avg, min, max, sq; + s64 total, avg, min, max, sq; int i; seq_printf(s, "item total avg_lat(us) min_lat(us) max_lat(us) stdev(us)\n"); @@ -185,8 +185,7 @@ static int metrics_latency_show(struct seq_file *s, void *p) m = &cm->metric[i]; spin_lock(&m->lock); total = m->total; - sum = m->latency_sum; - avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0; + avg = m->latency_avg; min = m->latency_min; max = m->latency_max; sq = m->latency_sq_sum; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 133dbd9338e7..eae417d71136 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -145,7 +145,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, return ERR_PTR(-EAGAIN); } /* reading/filling the cache are serialized by - i_mutex, no need to use page lock */ + i_rwsem, no need to use page lock */ unlock_page(cache_ctl->page); cache_ctl->dentries = kmap(cache_ctl->page); } @@ -155,7 +155,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, rcu_read_lock(); spin_lock(&parent->d_lock); /* check i_size again here, because empty directory can be - * marked as complete while not holding the i_mutex. */ + * marked as complete while not holding the i_rwsem. */ if (ceph_dir_is_complete_ordered(dir) && ptr_pos < i_size_read(dir)) dentry = cache_ctl->dentries[cache_ctl->index]; else @@ -478,8 +478,11 @@ more: 2 : (fpos_off(rde->offset) + 1); err = note_last_dentry(dfi, rde->name, rde->name_len, next_offset); - if (err) + if (err) { + ceph_mdsc_put_request(dfi->last_readdir); + dfi->last_readdir = NULL; return err; + } } else if (req->r_reply_info.dir_end) { dfi->next_offset = 2; /* keep last name */ @@ -520,6 +523,12 @@ more: if (!dir_emit(ctx, rde->name, rde->name_len, ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)), le32_to_cpu(rde->inode.in->mode) >> 12)) { + /* + * NOTE: Here no need to put the 'dfi->last_readdir', + * because when dir_emit stops us it's most likely + * doesn't have enough memory, etc. So for next readdir + * it will continue. + */ dout("filldir stopping us...\n"); return 0; } @@ -671,7 +680,7 @@ struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req, struct dentry *dentry) { struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); - struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */ + struct inode *parent = d_inode(dentry->d_parent); /* we hold i_rwsem */ /* .snap dir? */ if (ceph_snap(parent) == CEPH_NOSNAP && diff --git a/fs/ceph/file.c b/fs/ceph/file.c index bbed3224ad68..feb75eb1cd82 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -207,6 +207,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, struct ceph_mount_options *opt = ceph_inode_to_client(&ci->vfs_inode)->mount_options; struct ceph_file_info *fi; + int ret; dout("%s %p %p 0%o (%s)\n", __func__, inode, file, inode->i_mode, isdir ? "dir" : "regular"); @@ -240,7 +241,22 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, INIT_LIST_HEAD(&fi->rw_contexts); fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen); + if ((file->f_mode & FMODE_WRITE) && + ci->i_inline_version != CEPH_INLINE_NONE) { + ret = ceph_uninline_data(file); + if (ret < 0) + goto error; + } + return 0; + +error: + ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE); + ceph_put_fmode(ci, fi->fmode, 1); + kmem_cache_free(ceph_file_cachep, fi); + /* wake up anyone waiting for caps on this inode */ + wake_up_all(&ci->i_cap_wq); + return ret; } /* @@ -516,52 +532,67 @@ static void restore_deleg_ino(struct inode *dir, u64 ino) } } +static void wake_async_create_waiters(struct inode *inode, + struct ceph_mds_session *session) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + + spin_lock(&ci->i_ceph_lock); + if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { + ci->i_ceph_flags &= ~CEPH_I_ASYNC_CREATE; + wake_up_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT); + } + ceph_kick_flushing_inode_caps(session, ci); + spin_unlock(&ci->i_ceph_lock); +} + static void ceph_async_create_cb(struct ceph_mds_client *mdsc, struct ceph_mds_request *req) { + struct dentry *dentry = req->r_dentry; + struct inode *dinode = d_inode(dentry); + struct inode *tinode = req->r_target_inode; int result = req->r_err ? req->r_err : le32_to_cpu(req->r_reply_info.head->result); + WARN_ON_ONCE(dinode && tinode && dinode != tinode); + + /* MDS changed -- caller must resubmit */ if (result == -EJUKEBOX) goto out; mapping_set_error(req->r_parent->i_mapping, result); if (result) { - struct dentry *dentry = req->r_dentry; - struct inode *inode = d_inode(dentry); int pathlen = 0; u64 base = 0; char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, &base, 0); + pr_warn("ceph: async create failure path=(%llx)%s result=%d!\n", + base, IS_ERR(path) ? "<>" : path, result); + ceph_mdsc_free_path(path, pathlen); + ceph_dir_clear_complete(req->r_parent); if (!d_unhashed(dentry)) d_drop(dentry); - ceph_inode_shutdown(inode); - - pr_warn("ceph: async create failure path=(%llx)%s result=%d!\n", - base, IS_ERR(path) ? "<>" : path, result); - ceph_mdsc_free_path(path, pathlen); + if (dinode) { + mapping_set_error(dinode->i_mapping, result); + ceph_inode_shutdown(dinode); + wake_async_create_waiters(dinode, req->r_session); + } } - if (req->r_target_inode) { - struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); - u64 ino = ceph_vino(req->r_target_inode).ino; + if (tinode) { + u64 ino = ceph_vino(tinode).ino; if (req->r_deleg_ino != ino) pr_warn("%s: inode number mismatch! err=%d deleg_ino=0x%llx target=0x%llx\n", __func__, req->r_err, req->r_deleg_ino, ino); - mapping_set_error(req->r_target_inode->i_mapping, result); - spin_lock(&ci->i_ceph_lock); - if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) { - ci->i_ceph_flags &= ~CEPH_I_ASYNC_CREATE; - wake_up_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT); - } - ceph_kick_flushing_inode_caps(req->r_session, ci); - spin_unlock(&ci->i_ceph_lock); + mapping_set_error(tinode->i_mapping, result); + wake_async_create_waiters(tinode, req->r_session); } else if (!result) { pr_warn("%s: no req->r_target_inode for 0x%llx\n", __func__, req->r_deleg_ino); @@ -1041,7 +1072,6 @@ static void ceph_aio_complete(struct inode *inode, } spin_lock(&ci->i_ceph_lock); - ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &aio_req->prealloc_cf); spin_unlock(&ci->i_ceph_lock); @@ -1778,12 +1808,6 @@ retry_snap: if (err) goto out; - if (ci->i_inline_version != CEPH_INLINE_NONE) { - err = ceph_uninline_data(file, NULL); - if (err < 0) - goto out; - } - dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", inode, ceph_vinop(inode), pos, count, i_size_read(inode)); if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) @@ -1855,7 +1879,6 @@ retry_snap: int dirty; spin_lock(&ci->i_ceph_lock); - ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); spin_unlock(&ci->i_ceph_lock); @@ -2109,12 +2132,6 @@ static long ceph_fallocate(struct file *file, int mode, goto unlock; } - if (ci->i_inline_version != CEPH_INLINE_NONE) { - ret = ceph_uninline_data(file, NULL); - if (ret < 0) - goto unlock; - } - size = i_size_read(inode); /* Are we punching a hole beyond EOF? */ @@ -2139,7 +2156,6 @@ static long ceph_fallocate(struct file *file, int mode, if (!ret) { spin_lock(&ci->i_ceph_lock); - ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf); spin_unlock(&ci->i_ceph_lock); @@ -2532,7 +2548,6 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, } /* Mark Fw dirty */ spin_lock(&dst_ci->i_ceph_lock); - dst_ci->i_inline_version = CEPH_INLINE_NONE; dirty = __ceph_mark_dirty_caps(dst_ci, CEPH_CAP_FILE_WR, &prealloc_cf); spin_unlock(&dst_ci->i_ceph_lock); if (dirty) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 9cfa6c730519..d80911dc91c2 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -87,13 +87,13 @@ struct inode *ceph_get_snapdir(struct inode *parent) if (!S_ISDIR(parent->i_mode)) { pr_warn_once("bad snapdir parent type (mode=0%o)\n", parent->i_mode); - return ERR_PTR(-ENOTDIR); + goto err; } if (!(inode->i_state & I_NEW) && !S_ISDIR(inode->i_mode)) { pr_warn_once("bad snapdir inode type (mode=0%o)\n", inode->i_mode); - return ERR_PTR(-ENOTDIR); + goto err; } inode->i_mode = parent->i_mode; @@ -113,6 +113,12 @@ struct inode *ceph_get_snapdir(struct inode *parent) } return inode; +err: + if ((inode->i_state & I_NEW)) + discard_new_inode(inode); + else + iput(inode); + return ERR_PTR(-ENOTDIR); } const struct inode_operations ceph_file_iops = { @@ -1201,7 +1207,7 @@ out_unlock: /* * splice a dentry to an inode. - * caller must hold directory i_mutex for this to be safe. + * caller must hold directory i_rwsem for this to be safe. */ static int splice_dentry(struct dentry **pdn, struct inode *in) { @@ -1598,7 +1604,7 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn, return idx == 0 ? -ENOMEM : 0; } /* reading/filling the cache are serialized by - * i_mutex, no need to use page lock */ + * i_rwsem, no need to use page lock */ unlock_page(ctl->page); ctl->dentries = kmap(ctl->page); if (idx == 0) @@ -2301,6 +2307,57 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page, return err; } +int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, + size_t size) +{ + struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); + struct ceph_mds_client *mdsc = fsc->mdsc; + struct ceph_mds_request *req; + int mode = USE_AUTH_MDS; + int err; + char *xattr_value; + size_t xattr_value_len; + + req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETVXATTR, mode); + if (IS_ERR(req)) { + err = -ENOMEM; + goto out; + } + + req->r_path2 = kstrdup(name, GFP_NOFS); + if (!req->r_path2) { + err = -ENOMEM; + goto put; + } + + ihold(inode); + req->r_inode = inode; + err = ceph_mdsc_do_request(mdsc, NULL, req); + if (err < 0) + goto put; + + xattr_value = req->r_reply_info.xattr_info.xattr_value; + xattr_value_len = req->r_reply_info.xattr_info.xattr_value_len; + + dout("do_getvxattr xattr_value_len:%zu, size:%zu\n", xattr_value_len, size); + + err = (int)xattr_value_len; + if (size == 0) + goto put; + + if (xattr_value_len > size) { + err = -ERANGE; + goto put; + } + + memcpy(value, xattr_value, xattr_value_len); +put: + ceph_mdsc_put_request(req); +out: + dout("do_getvxattr result=%d\n", err); + return err; +} + /* * Check inode permissions. We verify we have a valid value for diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index d1f154aec249..3e2843e86e27 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c @@ -111,10 +111,10 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, req->r_args.filelock_change.length = cpu_to_le64(length); req->r_args.filelock_change.wait = wait; - if (wait) - req->r_wait_for_completion = ceph_lock_wait_for_completion; - - err = ceph_mdsc_do_request(mdsc, inode, req); + err = ceph_mdsc_submit_request(mdsc, inode, req); + if (!err) + err = ceph_mdsc_wait_request(mdsc, req, wait ? + ceph_lock_wait_for_completion : NULL); if (!err && operation == CEPH_MDS_OP_GETFILELOCK) { fl->fl_pid = -le64_to_cpu(req->r_reply_info.filelock_reply->pid); if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index c30eefc0ac19..fa38c013126d 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -555,6 +555,28 @@ bad: return -EIO; } +static int parse_reply_info_getvxattr(void **p, void *end, + struct ceph_mds_reply_info_parsed *info, + u64 features) +{ + u32 value_len; + + ceph_decode_skip_8(p, end, bad); /* skip current version: 1 */ + ceph_decode_skip_8(p, end, bad); /* skip first version: 1 */ + ceph_decode_skip_32(p, end, bad); /* skip payload length */ + + ceph_decode_32_safe(p, end, value_len, bad); + + if (value_len == end - *p) { + info->xattr_info.xattr_value = *p; + info->xattr_info.xattr_value_len = value_len; + *p = end; + return value_len; + } +bad: + return -EIO; +} + /* * parse extra results */ @@ -570,6 +592,8 @@ static int parse_reply_info_extra(void **p, void *end, return parse_reply_info_readdir(p, end, info, features); else if (op == CEPH_MDS_OP_CREATE) return parse_reply_info_create(p, end, info, features, s); + else if (op == CEPH_MDS_OP_GETVXATTR) + return parse_reply_info_getvxattr(p, end, info, features); else return -EIO; } @@ -2178,7 +2202,8 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, order = get_order(size * num_entries); while (order >= 0) { rinfo->dir_entries = (void*)__get_free_pages(GFP_KERNEL | - __GFP_NOWARN, + __GFP_NOWARN | + __GFP_ZERO, order); if (rinfo->dir_entries) break; @@ -2946,15 +2971,16 @@ int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, return err; } -static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, - struct ceph_mds_request *req) +int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, + struct ceph_mds_request *req, + ceph_mds_request_wait_callback_t wait_func) { int err; /* wait */ dout("do_request waiting\n"); - if (!req->r_timeout && req->r_wait_for_completion) { - err = req->r_wait_for_completion(mdsc, req); + if (wait_func) { + err = wait_func(mdsc, req); } else { long timeleft = wait_for_completion_killable_timeout( &req->r_completion, @@ -3011,7 +3037,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, /* issue */ err = ceph_mdsc_submit_request(mdsc, dir, req); if (!err) - err = ceph_mdsc_wait_request(mdsc, req); + err = ceph_mdsc_wait_request(mdsc, req, NULL); dout("do_request %p done, result %d\n", req, err); return err; } @@ -3097,35 +3123,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) result = le32_to_cpu(head->result); - /* - * Handle an ESTALE - * if we're not talking to the authority, send to them - * if the authority has changed while we weren't looking, - * send to new authority - * Otherwise we just have to return an ESTALE - */ - if (result == -ESTALE) { - dout("got ESTALE on request %llu\n", req->r_tid); - req->r_resend_mds = -1; - if (req->r_direct_mode != USE_AUTH_MDS) { - dout("not using auth, setting for that now\n"); - req->r_direct_mode = USE_AUTH_MDS; - __do_request(mdsc, req); - mutex_unlock(&mdsc->mutex); - goto out; - } else { - int mds = __choose_mds(mdsc, req, NULL); - if (mds >= 0 && mds != req->r_session->s_mds) { - dout("but auth changed, so resending\n"); - __do_request(mdsc, req); - mutex_unlock(&mdsc->mutex); - goto out; - } - } - dout("have to return ESTALE on request %llu\n", req->r_tid); - } - - if (head->safe) { set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags); __unregister_request(mdsc, req); @@ -4841,7 +4838,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) mutex_unlock(&mdsc->mutex); ceph_cleanup_snapid_map(mdsc); - ceph_cleanup_empty_realms(mdsc); + ceph_cleanup_global_and_empty_realms(mdsc); cancel_work_sync(&mdsc->cap_reclaim_work); cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 97c7f7bfa55f..33497846e47e 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -100,6 +100,11 @@ struct ceph_mds_reply_dir_entry { loff_t offset; }; +struct ceph_mds_reply_xattr { + char *xattr_value; + size_t xattr_value_len; +}; + /* * parsed info about an mds reply, including information about * either: 1) the target inode and/or its parent directory and dentry, @@ -115,6 +120,7 @@ struct ceph_mds_reply_info_parsed { char *dname; u32 dname_len; struct ceph_mds_reply_lease *dlease; + struct ceph_mds_reply_xattr xattr_info; /* extra */ union { @@ -274,8 +280,8 @@ struct ceph_mds_request { union ceph_mds_request_args r_args; int r_fmode; /* file mode, if expecting cap */ - const struct cred *r_cred; int r_request_release_offset; + const struct cred *r_cred; struct timespec64 r_stamp; /* for choosing which mds to send this request to */ @@ -296,12 +302,11 @@ struct ceph_mds_request { struct ceph_msg *r_reply; struct ceph_mds_reply_info_parsed r_reply_info; int r_err; - + u32 r_readdir_offset; struct page *r_locked_page; int r_dir_caps; int r_num_caps; - u32 r_readdir_offset; unsigned long r_timeout; /* optional. jiffies, 0 is "wait forever" */ unsigned long r_started; /* start time to measure timeout against */ @@ -329,7 +334,6 @@ struct ceph_mds_request { struct completion r_completion; struct completion r_safe_completion; ceph_mds_request_callback_t r_callback; - ceph_mds_request_wait_callback_t r_wait_for_completion; struct list_head r_unsafe_item; /* per-session unsafe list item */ long long r_dir_release_cnt; @@ -507,6 +511,9 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, struct ceph_mds_request *req); +int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, + struct ceph_mds_request *req, + ceph_mds_request_wait_callback_t wait_func); extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, struct inode *dir, struct ceph_mds_request *req); diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index 0fcba68f9a99..c47347d2e84e 100644 --- a/fs/ceph/metric.c +++ b/fs/ceph/metric.c @@ -8,6 +8,12 @@ #include "metric.h" #include "mds_client.h" +static void ktime_to_ceph_timespec(struct ceph_timespec *ts, ktime_t val) +{ + struct timespec64 t = ktime_to_timespec64(val); + ceph_encode_timespec64(ts, &t); +} + static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, struct ceph_mds_session *s) { @@ -26,7 +32,6 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, u64 nr_caps = atomic64_read(&m->total_caps); u32 header_len = sizeof(struct ceph_metric_header); struct ceph_msg *msg; - struct timespec64 ts; s64 sum; s32 items = 0; s32 len; @@ -59,37 +64,40 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc, /* encode the read latency metric */ read = (struct ceph_metric_read_latency *)(cap + 1); read->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY); - read->header.ver = 1; + read->header.ver = 2; read->header.compat = 1; read->header.data_len = cpu_to_le32(sizeof(*read) - header_len); sum = m->metric[METRIC_READ].latency_sum; - jiffies_to_timespec64(sum, &ts); - read->sec = cpu_to_le32(ts.tv_sec); - read->nsec = cpu_to_le32(ts.tv_nsec); + ktime_to_ceph_timespec(&read->lat, sum); + ktime_to_ceph_timespec(&read->avg, m->metric[METRIC_READ].latency_avg); + read->sq_sum = cpu_to_le64(m->metric[METRIC_READ].latency_sq_sum); + read->count = cpu_to_le64(m->metric[METRIC_READ].total); items++; /* encode the write latency metric */ write = (struct ceph_metric_write_latency *)(read + 1); write->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY); - write->header.ver = 1; + write->header.ver = 2; write->header.compat = 1; write->header.data_len = cpu_to_le32(sizeof(*write) - header_len); sum = m->metric[METRIC_WRITE].latency_sum; - jiffies_to_timespec64(sum, &ts); - write->sec = cpu_to_le32(ts.tv_sec); - write->nsec = cpu_to_le32(ts.tv_nsec); + ktime_to_ceph_timespec(&write->lat, sum); + ktime_to_ceph_timespec(&write->avg, m->metric[METRIC_WRITE].latency_avg); + write->sq_sum = cpu_to_le64(m->metric[METRIC_WRITE].latency_sq_sum); + write->count = cpu_to_le64(m->metric[METRIC_WRITE].total); items++; /* encode the metadata latency metric */ meta = (struct ceph_metric_metadata_latency *)(write + 1); meta->header.type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY); - meta->header.ver = 1; + meta->header.ver = 2; meta->header.compat = 1; meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len); sum = m->metric[METRIC_METADATA].latency_sum; - jiffies_to_timespec64(sum, &ts); - meta->sec = cpu_to_le32(ts.tv_sec); - meta->nsec = cpu_to_le32(ts.tv_nsec); + ktime_to_ceph_timespec(&meta->lat, sum); + ktime_to_ceph_timespec(&meta->avg, m->metric[METRIC_METADATA].latency_avg); + meta->sq_sum = cpu_to_le64(m->metric[METRIC_METADATA].latency_sq_sum); + meta->count = cpu_to_le64(m->metric[METRIC_METADATA].total); items++; /* encode the dentry lease metric */ @@ -250,6 +258,7 @@ int ceph_metric_init(struct ceph_client_metric *m) metric->size_max = 0; metric->total = 0; metric->latency_sum = 0; + metric->latency_avg = 0; metric->latency_sq_sum = 0; metric->latency_min = KTIME_MAX; metric->latency_max = 0; @@ -307,20 +316,19 @@ void ceph_metric_destroy(struct ceph_client_metric *m) max = new; \ } -static inline void __update_stdev(ktime_t total, ktime_t lsum, - ktime_t *sq_sump, ktime_t lat) +static inline void __update_mean_and_stdev(ktime_t total, ktime_t *lavg, + ktime_t *sq_sump, ktime_t lat) { - ktime_t avg, sq; + ktime_t avg; - if (unlikely(total == 1)) - return; - - /* the sq is (lat - old_avg) * (lat - new_avg) */ - avg = DIV64_U64_ROUND_CLOSEST((lsum - lat), (total - 1)); - sq = lat - avg; - avg = DIV64_U64_ROUND_CLOSEST(lsum, total); - sq = sq * (lat - avg); - *sq_sump += sq; + if (unlikely(total == 1)) { + *lavg = lat; + } else { + /* the sq is (lat - old_avg) * (lat - new_avg) */ + avg = *lavg + div64_s64(lat - *lavg, total); + *sq_sump += (lat - *lavg)*(lat - avg); + *lavg = avg; + } } void ceph_update_metrics(struct ceph_metric *m, @@ -339,6 +347,7 @@ void ceph_update_metrics(struct ceph_metric *m, METRIC_UPDATE_MIN_MAX(m->size_min, m->size_max, size); m->latency_sum += lat; METRIC_UPDATE_MIN_MAX(m->latency_min, m->latency_max, lat); - __update_stdev(total, m->latency_sum, &m->latency_sq_sum, lat); + __update_mean_and_stdev(total, &m->latency_avg, &m->latency_sq_sum, + lat); spin_unlock(&m->lock); } diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index bb45608181e7..0d0c44bd3332 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -2,7 +2,7 @@ #ifndef _FS_CEPH_MDS_METRIC_H #define _FS_CEPH_MDS_METRIC_H -#include +#include #include #include @@ -19,27 +19,39 @@ enum ceph_metric_type { CLIENT_METRIC_TYPE_OPENED_INODES, CLIENT_METRIC_TYPE_READ_IO_SIZES, CLIENT_METRIC_TYPE_WRITE_IO_SIZES, + CLIENT_METRIC_TYPE_AVG_READ_LATENCY, + CLIENT_METRIC_TYPE_STDEV_READ_LATENCY, + CLIENT_METRIC_TYPE_AVG_WRITE_LATENCY, + CLIENT_METRIC_TYPE_STDEV_WRITE_LATENCY, + CLIENT_METRIC_TYPE_AVG_METADATA_LATENCY, + CLIENT_METRIC_TYPE_STDEV_METADATA_LATENCY, - CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_WRITE_IO_SIZES, + CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_STDEV_METADATA_LATENCY, }; /* * This will always have the highest metric bit value * as the last element of the array. */ -#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \ - CLIENT_METRIC_TYPE_CAP_INFO, \ - CLIENT_METRIC_TYPE_READ_LATENCY, \ - CLIENT_METRIC_TYPE_WRITE_LATENCY, \ - CLIENT_METRIC_TYPE_METADATA_LATENCY, \ - CLIENT_METRIC_TYPE_DENTRY_LEASE, \ - CLIENT_METRIC_TYPE_OPENED_FILES, \ - CLIENT_METRIC_TYPE_PINNED_ICAPS, \ - CLIENT_METRIC_TYPE_OPENED_INODES, \ - CLIENT_METRIC_TYPE_READ_IO_SIZES, \ - CLIENT_METRIC_TYPE_WRITE_IO_SIZES, \ - \ - CLIENT_METRIC_TYPE_MAX, \ +#define CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED { \ + CLIENT_METRIC_TYPE_CAP_INFO, \ + CLIENT_METRIC_TYPE_READ_LATENCY, \ + CLIENT_METRIC_TYPE_WRITE_LATENCY, \ + CLIENT_METRIC_TYPE_METADATA_LATENCY, \ + CLIENT_METRIC_TYPE_DENTRY_LEASE, \ + CLIENT_METRIC_TYPE_OPENED_FILES, \ + CLIENT_METRIC_TYPE_PINNED_ICAPS, \ + CLIENT_METRIC_TYPE_OPENED_INODES, \ + CLIENT_METRIC_TYPE_READ_IO_SIZES, \ + CLIENT_METRIC_TYPE_WRITE_IO_SIZES, \ + CLIENT_METRIC_TYPE_AVG_READ_LATENCY, \ + CLIENT_METRIC_TYPE_STDEV_READ_LATENCY, \ + CLIENT_METRIC_TYPE_AVG_WRITE_LATENCY, \ + CLIENT_METRIC_TYPE_STDEV_WRITE_LATENCY, \ + CLIENT_METRIC_TYPE_AVG_METADATA_LATENCY, \ + CLIENT_METRIC_TYPE_STDEV_METADATA_LATENCY, \ + \ + CLIENT_METRIC_TYPE_MAX, \ } struct ceph_metric_header { @@ -60,22 +72,28 @@ struct ceph_metric_cap { /* metric read latency header */ struct ceph_metric_read_latency { struct ceph_metric_header header; - __le32 sec; - __le32 nsec; + struct ceph_timespec lat; + struct ceph_timespec avg; + __le64 sq_sum; + __le64 count; } __packed; /* metric write latency header */ struct ceph_metric_write_latency { struct ceph_metric_header header; - __le32 sec; - __le32 nsec; + struct ceph_timespec lat; + struct ceph_timespec avg; + __le64 sq_sum; + __le64 count; } __packed; /* metric metadata latency header */ struct ceph_metric_metadata_latency { struct ceph_metric_header header; - __le32 sec; - __le32 nsec; + struct ceph_timespec lat; + struct ceph_timespec avg; + __le64 sq_sum; + __le64 count; } __packed; /* metric dentry lease header */ @@ -140,6 +158,7 @@ struct ceph_metric { u64 size_min; u64 size_max; ktime_t latency_sum; + ktime_t latency_avg; ktime_t latency_sq_sum; ktime_t latency_min; ktime_t latency_max; diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index b41e6724c591..322ee5add942 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -121,18 +121,23 @@ static struct ceph_snap_realm *ceph_create_snap_realm( if (!realm) return ERR_PTR(-ENOMEM); - atomic_set(&realm->nref, 1); /* for caller */ + /* Do not release the global dummy snaprealm until unmouting */ + if (ino == CEPH_INO_GLOBAL_SNAPREALM) + atomic_set(&realm->nref, 2); + else + atomic_set(&realm->nref, 1); realm->ino = ino; INIT_LIST_HEAD(&realm->children); INIT_LIST_HEAD(&realm->child_item); INIT_LIST_HEAD(&realm->empty_item); INIT_LIST_HEAD(&realm->dirty_item); + INIT_LIST_HEAD(&realm->rebuild_item); INIT_LIST_HEAD(&realm->inodes_with_caps); spin_lock_init(&realm->inodes_with_caps_lock); __insert_snap_realm(&mdsc->snap_realms, realm); mdsc->num_snap_realms++; - dout("create_snap_realm %llx %p\n", realm->ino, realm); + dout("%s %llx %p\n", __func__, realm->ino, realm); return realm; } @@ -156,7 +161,7 @@ static struct ceph_snap_realm *__lookup_snap_realm(struct ceph_mds_client *mdsc, else if (ino > r->ino) n = n->rb_right; else { - dout("lookup_snap_realm %llx %p\n", r->ino, r); + dout("%s %llx %p\n", __func__, r->ino, r); return r; } } @@ -184,7 +189,7 @@ static void __destroy_snap_realm(struct ceph_mds_client *mdsc, { lockdep_assert_held_write(&mdsc->snap_rwsem); - dout("__destroy_snap_realm %p %llx\n", realm, realm->ino); + dout("%s %p %llx\n", __func__, realm, realm->ino); rb_erase(&realm->node, &mdsc->snap_realms); mdsc->num_snap_realms--; @@ -260,9 +265,14 @@ static void __cleanup_empty_realms(struct ceph_mds_client *mdsc) spin_unlock(&mdsc->snap_empty_lock); } -void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc) +void ceph_cleanup_global_and_empty_realms(struct ceph_mds_client *mdsc) { + struct ceph_snap_realm *global_realm; + down_write(&mdsc->snap_rwsem); + global_realm = __lookup_snap_realm(mdsc, CEPH_INO_GLOBAL_SNAPREALM); + if (global_realm) + ceph_put_snap_realm(mdsc, global_realm); __cleanup_empty_realms(mdsc); up_write(&mdsc->snap_rwsem); } @@ -292,9 +302,8 @@ static int adjust_snap_realm_parent(struct ceph_mds_client *mdsc, if (IS_ERR(parent)) return PTR_ERR(parent); } - dout("adjust_snap_realm_parent %llx %p: %llx %p -> %llx %p\n", - realm->ino, realm, realm->parent_ino, realm->parent, - parentino, parent); + dout("%s %llx %p: %llx %p -> %llx %p\n", __func__, realm->ino, + realm, realm->parent_ino, realm->parent, parentino, parent); if (realm->parent) { list_del_init(&realm->child_item); ceph_put_snap_realm(mdsc, realm->parent); @@ -320,7 +329,8 @@ static int cmpu64_rev(const void *a, const void *b) * build the snap context for a given realm. */ static int build_snap_context(struct ceph_snap_realm *realm, - struct list_head* dirty_realms) + struct list_head *realm_queue, + struct list_head *dirty_realms) { struct ceph_snap_realm *parent = realm->parent; struct ceph_snap_context *snapc; @@ -334,9 +344,9 @@ static int build_snap_context(struct ceph_snap_realm *realm, */ if (parent) { if (!parent->cached_context) { - err = build_snap_context(parent, dirty_realms); - if (err) - goto fail; + /* add to the queue head */ + list_add(&parent->rebuild_item, realm_queue); + return 1; } num += parent->cached_context->num_snaps; } @@ -349,9 +359,8 @@ static int build_snap_context(struct ceph_snap_realm *realm, realm->cached_context->seq == realm->seq && (!parent || realm->cached_context->seq >= parent->cached_context->seq)) { - dout("build_snap_context %llx %p: %p seq %lld (%u snaps)" - " (unchanged)\n", - realm->ino, realm, realm->cached_context, + dout("%s %llx %p: %p seq %lld (%u snaps) (unchanged)\n", + __func__, realm->ino, realm, realm->cached_context, realm->cached_context->seq, (unsigned int)realm->cached_context->num_snaps); return 0; @@ -390,9 +399,8 @@ static int build_snap_context(struct ceph_snap_realm *realm, sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL); snapc->num_snaps = num; - dout("build_snap_context %llx %p: %p seq %lld (%u snaps)\n", - realm->ino, realm, snapc, snapc->seq, - (unsigned int) snapc->num_snaps); + dout("%s %llx %p: %p seq %lld (%u snaps)\n", __func__, realm->ino, + realm, snapc, snapc->seq, (unsigned int) snapc->num_snaps); ceph_put_snap_context(realm->cached_context); realm->cached_context = snapc; @@ -409,8 +417,7 @@ fail: ceph_put_snap_context(realm->cached_context); realm->cached_context = NULL; } - pr_err("build_snap_context %llx %p fail %d\n", realm->ino, - realm, err); + pr_err("%s %llx %p fail %d\n", __func__, realm->ino, realm, err); return err; } @@ -420,13 +427,50 @@ fail: static void rebuild_snap_realms(struct ceph_snap_realm *realm, struct list_head *dirty_realms) { - struct ceph_snap_realm *child; + LIST_HEAD(realm_queue); + int last = 0; + bool skip = false; - dout("rebuild_snap_realms %llx %p\n", realm->ino, realm); - build_snap_context(realm, dirty_realms); + list_add_tail(&realm->rebuild_item, &realm_queue); - list_for_each_entry(child, &realm->children, child_item) - rebuild_snap_realms(child, dirty_realms); + while (!list_empty(&realm_queue)) { + struct ceph_snap_realm *_realm, *child; + + _realm = list_first_entry(&realm_queue, + struct ceph_snap_realm, + rebuild_item); + + /* + * If the last building failed dues to memory + * issue, just empty the realm_queue and return + * to avoid infinite loop. + */ + if (last < 0) { + list_del_init(&_realm->rebuild_item); + continue; + } + + last = build_snap_context(_realm, &realm_queue, dirty_realms); + dout("%s %llx %p, %s\n", __func__, _realm->ino, _realm, + last > 0 ? "is deferred" : !last ? "succeeded" : "failed"); + + /* is any child in the list ? */ + list_for_each_entry(child, &_realm->children, child_item) { + if (!list_empty(&child->rebuild_item)) { + skip = true; + break; + } + } + + if (!skip) { + list_for_each_entry(child, &_realm->children, child_item) + list_add_tail(&child->rebuild_item, &realm_queue); + } + + /* last == 1 means need to build parent first */ + if (last <= 0) + list_del_init(&_realm->rebuild_item); + } } @@ -474,23 +518,15 @@ static bool has_new_snaps(struct ceph_snap_context *o, * Caller must hold snap_rwsem for read (i.e., the realm topology won't * change). */ -static void ceph_queue_cap_snap(struct ceph_inode_info *ci) +static void ceph_queue_cap_snap(struct ceph_inode_info *ci, + struct ceph_cap_snap **pcapsnap) { struct inode *inode = &ci->vfs_inode; - struct ceph_cap_snap *capsnap; struct ceph_snap_context *old_snapc, *new_snapc; + struct ceph_cap_snap *capsnap = *pcapsnap; struct ceph_buffer *old_blob = NULL; int used, dirty; - capsnap = kzalloc(sizeof(*capsnap), GFP_NOFS); - if (!capsnap) { - pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode); - return; - } - capsnap->cap_flush.is_capsnap = true; - INIT_LIST_HEAD(&capsnap->cap_flush.i_list); - INIT_LIST_HEAD(&capsnap->cap_flush.g_list); - spin_lock(&ci->i_ceph_lock); used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); @@ -511,12 +547,14 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci) as no new writes are allowed to start when pending, so any writes in progress now were started before the previous cap_snap. lucky us. */ - dout("queue_cap_snap %p already pending\n", inode); + dout("%s %p %llx.%llx already pending\n", + __func__, inode, ceph_vinop(inode)); goto update_snapc; } if (ci->i_wrbuffer_ref_head == 0 && !(dirty & (CEPH_CAP_ANY_EXCL|CEPH_CAP_FILE_WR))) { - dout("queue_cap_snap %p nothing dirty|writing\n", inode); + dout("%s %p %llx.%llx nothing dirty|writing\n", + __func__, inode, ceph_vinop(inode)); goto update_snapc; } @@ -536,20 +574,17 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci) } else { if (!(used & CEPH_CAP_FILE_WR) && ci->i_wrbuffer_ref_head == 0) { - dout("queue_cap_snap %p " - "no new_snap|dirty_page|writing\n", inode); + dout("%s %p %llx.%llx no new_snap|dirty_page|writing\n", + __func__, inode, ceph_vinop(inode)); goto update_snapc; } } - dout("queue_cap_snap %p cap_snap %p queuing under %p %s %s\n", - inode, capsnap, old_snapc, ceph_cap_string(dirty), - capsnap->need_flush ? "" : "no_flush"); + dout("%s %p %llx.%llx cap_snap %p queuing under %p %s %s\n", + __func__, inode, ceph_vinop(inode), capsnap, old_snapc, + ceph_cap_string(dirty), capsnap->need_flush ? "" : "no_flush"); ihold(inode); - refcount_set(&capsnap->nref, 1); - INIT_LIST_HEAD(&capsnap->ci_item); - capsnap->follows = old_snapc->seq; capsnap->issued = __ceph_caps_issued(ci, NULL); capsnap->dirty = dirty; @@ -579,31 +614,30 @@ static void ceph_queue_cap_snap(struct ceph_inode_info *ci) list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps); if (used & CEPH_CAP_FILE_WR) { - dout("queue_cap_snap %p cap_snap %p snapc %p" - " seq %llu used WR, now pending\n", inode, + dout("%s %p %llx.%llx cap_snap %p snapc %p seq %llu used WR," + " now pending\n", __func__, inode, ceph_vinop(inode), capsnap, old_snapc, old_snapc->seq); capsnap->writing = 1; } else { /* note mtime, size NOW. */ __ceph_finish_cap_snap(ci, capsnap); } - capsnap = NULL; + *pcapsnap = NULL; old_snapc = NULL; update_snapc: - if (ci->i_wrbuffer_ref_head == 0 && - ci->i_wr_ref == 0 && - ci->i_dirty_caps == 0 && - ci->i_flushing_caps == 0) { - ci->i_head_snapc = NULL; - } else { + if (ci->i_wrbuffer_ref_head == 0 && + ci->i_wr_ref == 0 && + ci->i_dirty_caps == 0 && + ci->i_flushing_caps == 0) { + ci->i_head_snapc = NULL; + } else { ci->i_head_snapc = ceph_get_snap_context(new_snapc); dout(" new snapc is %p\n", new_snapc); } spin_unlock(&ci->i_ceph_lock); ceph_buffer_put(old_blob); - kfree(capsnap); ceph_put_snap_context(old_snapc); } @@ -632,27 +666,28 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, capsnap->truncate_size = ci->i_truncate_size; capsnap->truncate_seq = ci->i_truncate_seq; if (capsnap->dirty_pages) { - dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu " - "still has %d dirty pages\n", inode, capsnap, - capsnap->context, capsnap->context->seq, - ceph_cap_string(capsnap->dirty), capsnap->size, - capsnap->dirty_pages); + dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu " + "still has %d dirty pages\n", __func__, inode, + ceph_vinop(inode), capsnap, capsnap->context, + capsnap->context->seq, ceph_cap_string(capsnap->dirty), + capsnap->size, capsnap->dirty_pages); return 0; } /* Fb cap still in use, delay it */ if (ci->i_wb_ref) { - dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu " - "used WRBUFFER, delaying\n", inode, capsnap, - capsnap->context, capsnap->context->seq, - ceph_cap_string(capsnap->dirty), capsnap->size); + dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu " + "used WRBUFFER, delaying\n", __func__, inode, + ceph_vinop(inode), capsnap, capsnap->context, + capsnap->context->seq, ceph_cap_string(capsnap->dirty), + capsnap->size); capsnap->writing = 1; return 0; } ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS; - dout("finish_cap_snap %p cap_snap %p snapc %p %llu %s s=%llu\n", - inode, capsnap, capsnap->context, + dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu\n", + __func__, inode, ceph_vinop(inode), capsnap, capsnap->context, capsnap->context->seq, ceph_cap_string(capsnap->dirty), capsnap->size); @@ -671,8 +706,9 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) { struct ceph_inode_info *ci; struct inode *lastinode = NULL; + struct ceph_cap_snap *capsnap = NULL; - dout("queue_realm_cap_snaps %p %llx inodes\n", realm, realm->ino); + dout("%s %p %llx inode\n", __func__, realm, realm->ino); spin_lock(&realm->inodes_with_caps_lock); list_for_each_entry(ci, &realm->inodes_with_caps, i_snap_realm_item) { @@ -682,13 +718,35 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm) spin_unlock(&realm->inodes_with_caps_lock); iput(lastinode); lastinode = inode; - ceph_queue_cap_snap(ci); + + /* + * Allocate the capsnap memory outside of ceph_queue_cap_snap() + * to reduce very possible but unnecessary frequently memory + * allocate/free in this loop. + */ + if (!capsnap) { + capsnap = kmem_cache_zalloc(ceph_cap_snap_cachep, GFP_NOFS); + if (!capsnap) { + pr_err("ENOMEM allocating ceph_cap_snap on %p\n", + inode); + return; + } + } + capsnap->cap_flush.is_capsnap = true; + refcount_set(&capsnap->nref, 1); + INIT_LIST_HEAD(&capsnap->cap_flush.i_list); + INIT_LIST_HEAD(&capsnap->cap_flush.g_list); + INIT_LIST_HEAD(&capsnap->ci_item); + + ceph_queue_cap_snap(ci, &capsnap); spin_lock(&realm->inodes_with_caps_lock); } spin_unlock(&realm->inodes_with_caps_lock); iput(lastinode); - dout("queue_realm_cap_snaps %p %llx done\n", realm, realm->ino); + if (capsnap) + kmem_cache_free(ceph_cap_snap_cachep, capsnap); + dout("%s %p %llx done\n", __func__, realm, realm->ino); } /* @@ -707,14 +765,16 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, __le64 *prior_parent_snaps; /* encoded */ struct ceph_snap_realm *realm = NULL; struct ceph_snap_realm *first_realm = NULL; - int invalidate = 0; + struct ceph_snap_realm *realm_to_rebuild = NULL; + int rebuild_snapcs; int err = -ENOMEM; LIST_HEAD(dirty_realms); lockdep_assert_held_write(&mdsc->snap_rwsem); - dout("update_snap_trace deletion=%d\n", deletion); + dout("%s deletion=%d\n", __func__, deletion); more: + rebuild_snapcs = 0; ceph_decode_need(&p, e, sizeof(*ri), bad); ri = p; p += sizeof(*ri); @@ -738,10 +798,10 @@ more: err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); if (err < 0) goto fail; - invalidate += err; + rebuild_snapcs += err; if (le64_to_cpu(ri->seq) > realm->seq) { - dout("update_snap_trace updating %llx %p %lld -> %lld\n", + dout("%s updating %llx %p %lld -> %lld\n", __func__, realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); /* update realm parameters, snap lists */ realm->seq = le64_to_cpu(ri->seq); @@ -763,22 +823,30 @@ more: if (realm->seq > mdsc->last_snap_seq) mdsc->last_snap_seq = realm->seq; - invalidate = 1; + rebuild_snapcs = 1; } else if (!realm->cached_context) { - dout("update_snap_trace %llx %p seq %lld new\n", + dout("%s %llx %p seq %lld new\n", __func__, realm->ino, realm, realm->seq); - invalidate = 1; + rebuild_snapcs = 1; } else { - dout("update_snap_trace %llx %p seq %lld unchanged\n", + dout("%s %llx %p seq %lld unchanged\n", __func__, realm->ino, realm, realm->seq); } - dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, - realm, invalidate, p, e); + dout("done with %llx %p, rebuild_snapcs=%d, %p %p\n", realm->ino, + realm, rebuild_snapcs, p, e); - /* invalidate when we reach the _end_ (root) of the trace */ - if (invalidate && p >= e) - rebuild_snap_realms(realm, &dirty_realms); + /* + * this will always track the uppest parent realm from which + * we need to rebuild the snapshot contexts _downward_ in + * hierarchy. + */ + if (rebuild_snapcs) + realm_to_rebuild = realm; + + /* rebuild_snapcs when we reach the _end_ (root) of the trace */ + if (realm_to_rebuild && p >= e) + rebuild_snap_realms(realm_to_rebuild, &dirty_realms); if (!first_realm) first_realm = realm; @@ -814,7 +882,7 @@ fail: ceph_put_snap_realm(mdsc, realm); if (first_realm) ceph_put_snap_realm(mdsc, first_realm); - pr_err("update_snap_trace error %d\n", err); + pr_err("%s error %d\n", __func__, err); return err; } @@ -831,7 +899,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) struct inode *inode; struct ceph_mds_session *session = NULL; - dout("flush_snaps\n"); + dout("%s\n", __func__); spin_lock(&mdsc->snap_flush_lock); while (!list_empty(&mdsc->snap_flush_list)) { ci = list_first_entry(&mdsc->snap_flush_list, @@ -846,7 +914,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) spin_unlock(&mdsc->snap_flush_lock); ceph_put_mds_session(session); - dout("flush_snaps done\n"); + dout("%s done\n", __func__); } /** @@ -928,8 +996,8 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, trace_len = le32_to_cpu(h->trace_len); p += sizeof(*h); - dout("handle_snap from mds%d op %s split %llx tracelen %d\n", mds, - ceph_snap_op_name(op), split, trace_len); + dout("%s from mds%d op %s split %llx tracelen %d\n", __func__, + mds, ceph_snap_op_name(op), split, trace_len); mutex_lock(&session->s_mutex); inc_session_sequence(session); @@ -989,13 +1057,13 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, */ if (ci->i_snap_realm->created > le64_to_cpu(ri->created)) { - dout(" leaving %p in newer realm %llx %p\n", - inode, ci->i_snap_realm->ino, + dout(" leaving %p %llx.%llx in newer realm %llx %p\n", + inode, ceph_vinop(inode), ci->i_snap_realm->ino, ci->i_snap_realm); goto skip_inode; } - dout(" will move %p to split realm %llx %p\n", - inode, realm->ino, realm); + dout(" will move %p %llx.%llx to split realm %llx %p\n", + inode, ceph_vinop(inode), realm->ino, realm); ceph_get_snap_realm(mdsc, realm); ceph_change_snap_realm(inode, realm); @@ -1038,7 +1106,7 @@ skip_inode: return; bad: - pr_err("corrupt snap message from mds%d\n", mds); + pr_err("%s corrupt snap message from mds%d\n", __func__, mds); ceph_msg_dump(msg); out: if (locked_rwsem) @@ -1071,7 +1139,8 @@ struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, } spin_unlock(&mdsc->snapid_map_lock); if (exist) { - dout("found snapid map %llx -> %x\n", exist->snap, exist->dev); + dout("%s found snapid map %llx -> %x\n", __func__, + exist->snap, exist->dev); return exist; } @@ -1115,11 +1184,13 @@ struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc, if (exist) { free_anon_bdev(sm->dev); kfree(sm); - dout("found snapid map %llx -> %x\n", exist->snap, exist->dev); + dout("%s found snapid map %llx -> %x\n", __func__, + exist->snap, exist->dev); return exist; } - dout("create snapid map %llx -> %x\n", sm->snap, sm->dev); + dout("%s create snapid map %llx -> %x\n", __func__, + sm->snap, sm->dev); return sm; } diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index 573bb9556fb5..e36e8948e728 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c @@ -60,6 +60,7 @@ const char *ceph_mds_op_name(int op) case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; case CEPH_MDS_OP_LOOKUPNAME: return "lookupname"; case CEPH_MDS_OP_GETATTR: return "getattr"; + case CEPH_MDS_OP_GETVXATTR: return "getvxattr"; case CEPH_MDS_OP_SETXATTR: return "setxattr"; case CEPH_MDS_OP_SETATTR: return "setattr"; case CEPH_MDS_OP_RMXATTR: return "rmxattr"; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4a3b77d049c7..e6987d295079 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -865,6 +865,7 @@ static void destroy_fs_client(struct ceph_fs_client *fsc) */ struct kmem_cache *ceph_inode_cachep; struct kmem_cache *ceph_cap_cachep; +struct kmem_cache *ceph_cap_snap_cachep; struct kmem_cache *ceph_cap_flush_cachep; struct kmem_cache *ceph_dentry_cachep; struct kmem_cache *ceph_file_cachep; @@ -893,6 +894,9 @@ static int __init init_caches(void) ceph_cap_cachep = KMEM_CACHE(ceph_cap, SLAB_MEM_SPREAD); if (!ceph_cap_cachep) goto bad_cap; + ceph_cap_snap_cachep = KMEM_CACHE(ceph_cap_snap, SLAB_MEM_SPREAD); + if (!ceph_cap_snap_cachep) + goto bad_cap_snap; ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); if (!ceph_cap_flush_cachep) @@ -932,6 +936,8 @@ bad_file: bad_dentry: kmem_cache_destroy(ceph_cap_flush_cachep); bad_cap_flush: + kmem_cache_destroy(ceph_cap_snap_cachep); +bad_cap_snap: kmem_cache_destroy(ceph_cap_cachep); bad_cap: kmem_cache_destroy(ceph_inode_cachep); @@ -948,6 +954,7 @@ static void destroy_caches(void) kmem_cache_destroy(ceph_inode_cachep); kmem_cache_destroy(ceph_cap_cachep); + kmem_cache_destroy(ceph_cap_snap_cachep); kmem_cache_destroy(ceph_cap_flush_cachep); kmem_cache_destroy(ceph_dentry_cachep); kmem_cache_destroy(ceph_file_cachep); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 0bd97aea2319..a1ecc410a495 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -231,7 +231,7 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) if (refcount_dec_and_test(&capsnap->nref)) { if (capsnap->xattr_blob) ceph_buffer_put(capsnap->xattr_blob); - kfree(capsnap); + kmem_cache_free(ceph_cap_snap_cachep, capsnap); } } @@ -884,6 +884,8 @@ struct ceph_snap_realm { struct list_head dirty_item; /* if realm needs new context */ + struct list_head rebuild_item; /* rebuild snap realms _downward_ in hierarchy */ + /* the current set of snaps for this realm */ struct ceph_snap_context *cached_context; @@ -939,7 +941,7 @@ extern void ceph_handle_snap(struct ceph_mds_client *mdsc, struct ceph_msg *msg); extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci, struct ceph_cap_snap *capsnap); -extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc); +extern void ceph_cleanup_global_and_empty_realms(struct ceph_mds_client *mdsc); extern struct ceph_snapid_map *ceph_get_snapid_map(struct ceph_mds_client *mdsc, u64 snap); @@ -1049,6 +1051,7 @@ static inline bool ceph_inode_is_shutdown(struct inode *inode) /* xattr.c */ int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int); +int ceph_do_getvxattr(struct inode *inode, const char *name, void *value, size_t size); ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t); extern ssize_t ceph_listxattr(struct dentry *, char *, size_t); extern struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci); @@ -1214,7 +1217,7 @@ extern void __ceph_touch_fmode(struct ceph_inode_info *ci, /* addr.c */ extern const struct address_space_operations ceph_aops; extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); -extern int ceph_uninline_data(struct file *filp, struct page *locked_page); +extern int ceph_uninline_data(struct file *file); extern int ceph_pool_perm_check(struct inode *inode, int need); extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index fcf7dfdecf96..afec84088471 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -923,10 +923,13 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_xattr *xattr; - struct ceph_vxattr *vxattr = NULL; + struct ceph_vxattr *vxattr; int req_mask; ssize_t err; + if (strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) + goto handle_non_vxattrs; + /* let's see if a virtual xattr was requested */ vxattr = ceph_match_vxattr(inode, name); if (vxattr) { @@ -945,8 +948,14 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value, err = -ERANGE; } return err; + } else { + err = ceph_do_getvxattr(inode, name, value, size); + /* this would happen with a new client and old server combo */ + if (err == -EOPNOTSUPP) + err = -ENODATA; + return err; } - +handle_non_vxattrs: req_mask = __get_request_mask(inode); spin_lock(&ci->i_ceph_lock); diff --git a/fs/dax.c b/fs/dax.c index ab0978739eaa..67a08a32fccb 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -389,7 +389,7 @@ static struct page *dax_busy_page(void *entry) } /* - * dax_lock_mapping_entry - Lock the DAX entry corresponding to a page + * dax_lock_page - Lock the DAX entry corresponding to a page * @page: The page whose entry we want to lock * * Context: Process context. diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index 50546eadaae2..5f1e4799e8fa 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -19,7 +19,11 @@ #include "xfs_error.h" #include "xfs_trace.h" -struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; +const struct xfs_name xfs_name_dotdot = { + .name = (const unsigned char *)"..", + .len = 2, + .type = XFS_DIR3_FT_DIR, +}; /* * Convert inode mode to directory entry filetype @@ -54,10 +58,10 @@ xfs_mode_to_ftype( */ xfs_dahash_t xfs_ascii_ci_hashname( - struct xfs_name *name) + const struct xfs_name *name) { - xfs_dahash_t hash; - int i; + xfs_dahash_t hash; + int i; for (i = 0, hash = 0; i < name->len; i++) hash = tolower(name->name[i]) ^ rol32(hash, 7); @@ -243,7 +247,7 @@ int xfs_dir_createname( struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name, + const struct xfs_name *name, xfs_ino_t inum, /* new entry inode number */ xfs_extlen_t total) /* bmap's total block count */ { @@ -337,16 +341,16 @@ xfs_dir_cilookup_result( int xfs_dir_lookup( - xfs_trans_t *tp, - xfs_inode_t *dp, - struct xfs_name *name, - xfs_ino_t *inum, /* out: inode number */ - struct xfs_name *ci_name) /* out: actual name if CI match */ + struct xfs_trans *tp, + struct xfs_inode *dp, + const struct xfs_name *name, + xfs_ino_t *inum, /* out: inode number */ + struct xfs_name *ci_name) /* out: actual name if CI match */ { - struct xfs_da_args *args; - int rval; - int v; /* type-checking value */ - int lock_mode; + struct xfs_da_args *args; + int rval; + int v; /* type-checking value */ + int lock_mode; ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); XFS_STATS_INC(dp->i_mount, xs_dir_lookup); @@ -475,7 +479,7 @@ int xfs_dir_replace( struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name, /* name of entry to replace */ + const struct xfs_name *name, /* name of entry to replace */ xfs_ino_t inum, /* new inode number */ xfs_extlen_t total) /* bmap's total block count */ { @@ -728,7 +732,7 @@ xfs_dir2_namecheck( xfs_dahash_t xfs_dir2_hashname( struct xfs_mount *mp, - struct xfs_name *name) + const struct xfs_name *name) { if (unlikely(xfs_has_asciici(mp))) return xfs_ascii_ci_hashname(name); diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index d03e6098ded9..b6df3c34b26a 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -21,7 +21,7 @@ struct xfs_dir2_data_unused; struct xfs_dir3_icfree_hdr; struct xfs_dir3_icleaf_hdr; -extern struct xfs_name xfs_name_dotdot; +extern const struct xfs_name xfs_name_dotdot; /* * Convert inode mode to directory entry filetype @@ -39,16 +39,16 @@ extern int xfs_dir_isempty(struct xfs_inode *dp); extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_inode *pdp); extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name, xfs_ino_t inum, + const struct xfs_name *name, xfs_ino_t inum, xfs_extlen_t tot); extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name, xfs_ino_t *inum, + const struct xfs_name *name, xfs_ino_t *inum, struct xfs_name *ci_name); extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t ino, xfs_extlen_t tot); extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name, xfs_ino_t inum, + const struct xfs_name *name, xfs_ino_t inum, xfs_extlen_t tot); extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name); diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 711709a2aa53..7404a9ff1a92 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -40,7 +40,7 @@ struct xfs_dir3_icfree_hdr { }; /* xfs_dir2.c */ -xfs_dahash_t xfs_ascii_ci_hashname(struct xfs_name *name); +xfs_dahash_t xfs_ascii_ci_hashname(const struct xfs_name *name); enum xfs_dacmp xfs_ascii_ci_compname(struct xfs_da_args *args, const unsigned char *name, int len); extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, @@ -201,7 +201,8 @@ xfs_dir2_data_entsize( return round_up(len, XFS_DIR2_DATA_ALIGN); } -xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, struct xfs_name *name); +xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, + const struct xfs_name *name); enum xfs_dacmp xfs_dir2_compname(struct xfs_da_args *args, const unsigned char *name, int len); diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index e1f4d7d5a011..761dde155099 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -463,7 +463,7 @@ xfs_bui_item_recover( struct xfs_bui_log_item *buip = BUI_ITEM(lip); struct xfs_trans *tp; struct xfs_inode *ip = NULL; - struct xfs_mount *mp = lip->li_mountp; + struct xfs_mount *mp = lip->li_log->l_mp; struct xfs_map_extent *bmap; struct xfs_bud_log_item *budp; xfs_filblks_t count; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 44795018df1d..e1afb9e503e1 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -14,6 +14,7 @@ #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_log_recover.h" +#include "xfs_log_priv.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_errortag.h" @@ -813,7 +814,15 @@ xfs_buf_read_map( * buffer. */ if (error) { - if (!xfs_is_shutdown(target->bt_mount)) + /* + * Check against log shutdown for error reporting because + * metadata writeback may require a read first and we need to + * report errors in metadata writeback until the log is shut + * down. High level transaction read functions already check + * against mount shutdown, anyway, so we only need to be + * concerned about low level IO interactions here. + */ + if (!xlog_is_shutdown(target->bt_mount->m_log)) xfs_buf_ioerror_alert(bp, fa); bp->b_flags &= ~XBF_DONE; @@ -1174,10 +1183,10 @@ xfs_buf_ioend_handle_error( struct xfs_error_cfg *cfg; /* - * If we've already decided to shutdown the filesystem because of I/O - * errors, there's no point in giving this a retry. + * If we've already shutdown the journal because of I/O errors, there's + * no point in giving this a retry. */ - if (xfs_is_shutdown(mp)) + if (xlog_is_shutdown(mp->m_log)) goto out_stale; xfs_buf_ioerror_alert_ratelimited(bp); @@ -1588,8 +1597,23 @@ __xfs_buf_submit( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); - /* on shutdown we stale and complete the buffer immediately */ - if (xfs_is_shutdown(bp->b_mount)) { + /* + * On log shutdown we stale and complete the buffer immediately. We can + * be called to read the superblock before the log has been set up, so + * be careful checking the log state. + * + * Checking the mount shutdown state here can result in the log tail + * moving inappropriately on disk as the log may not yet be shut down. + * i.e. failing this buffer on mount shutdown can remove it from the AIL + * and move the tail of the log forwards without having written this + * buffer to disk. This corrupts the log tail state in memory, and + * because the log may not be shut down yet, it can then be propagated + * to disk before the log is shutdown. Hence we check log shutdown + * state here rather than mount state to avoid corrupting the log tail + * on shutdown. + */ + if (bp->b_mount->m_log && + xlog_is_shutdown(bp->b_mount->m_log)) { xfs_buf_ioend_fail(bp); return -EIO; } @@ -1803,10 +1827,10 @@ xfs_buftarg_drain( * If one or more failed buffers were freed, that means dirty metadata * was thrown away. This should only ever happen after I/O completion * handling has elevated I/O error(s) to permanent failures and shuts - * down the fs. + * down the journal. */ if (write_fail) { - ASSERT(xfs_is_shutdown(btp->bt_mount)); + ASSERT(xlog_is_shutdown(btp->bt_mount->m_log)); xfs_alert(btp->bt_mount, "Please run xfs_repair to determine the extent of the problem."); } @@ -2089,12 +2113,13 @@ xfs_buf_delwri_submit_buffers( blk_start_plug(&plug); list_for_each_entry_safe(bp, n, buffer_list, b_list) { if (!wait_list) { + if (!xfs_buf_trylock(bp)) + continue; if (xfs_buf_ispinned(bp)) { + xfs_buf_unlock(bp); pinned++; continue; } - if (!xfs_buf_trylock(bp)) - continue; } else { xfs_buf_lock(bp); } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index a7a8e4528881..522d450a94b1 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -21,6 +21,7 @@ #include "xfs_dquot.h" #include "xfs_trace.h" #include "xfs_log.h" +#include "xfs_log_priv.h" struct kmem_cache *xfs_buf_item_cache; @@ -428,7 +429,7 @@ xfs_buf_item_format( * occurs during recovery. */ if (bip->bli_flags & XFS_BLI_INODE_BUF) { - if (xfs_has_v3inodes(lip->li_mountp) || + if (xfs_has_v3inodes(lip->li_log->l_mp) || !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && xfs_log_item_in_current_chkpt(lip))) bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; @@ -616,7 +617,7 @@ xfs_buf_item_put( * that case, the bli is freed on buffer writeback completion. */ aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || - xfs_is_shutdown(lip->li_mountp); + xlog_is_shutdown(lip->li_log); dirty = bip->bli_flags & XFS_BLI_DIRTY; if (dirty && !aborted) return false; diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 47ef9c9c5c17..0e50f2c9348e 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -604,7 +604,7 @@ xfs_efi_item_recover( struct list_head *capture_list) { struct xfs_efi_log_item *efip = EFI_ITEM(lip); - struct xfs_mount *mp = lip->li_mountp; + struct xfs_mount *mp = lip->li_log->l_mp; struct xfs_efd_log_item *efdp; struct xfs_trans *tp; struct xfs_extent *extp; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 48287caad28b..10e1cb71439e 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -864,8 +864,8 @@ xfs_getfsmap( !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1])) return -EINVAL; - use_rmap = capable(CAP_SYS_ADMIN) && - xfs_has_rmapbt(mp); + use_rmap = xfs_has_rmapbt(mp) && + has_capability_noaudit(current, CAP_SYS_ADMIN); head->fmh_entries = 0; /* Set up our device handlers. */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 9e434cb41e48..20186c584c7d 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -23,6 +23,7 @@ #include "xfs_reflink.h" #include "xfs_ialloc.h" #include "xfs_ag.h" +#include "xfs_log_priv.h" #include @@ -873,7 +874,14 @@ xfs_reclaim_inode( if (xfs_iflags_test_and_set(ip, XFS_IFLUSHING)) goto out_iunlock; - if (xfs_is_shutdown(ip->i_mount)) { + /* + * Check for log shutdown because aborting the inode can move the log + * tail and corrupt in memory state. This is fine if the log is shut + * down, but if the log is still active and only the mount is shut down + * then the in-memory log tail movement caused by the abort can be + * incorrectly propagated to disk. + */ + if (xlog_is_shutdown(ip->i_mount->m_log)) { xfs_iunpin_wait(ip); xfs_iflush_abort(ip); goto reclaim; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 04bf467b1090..26227d26f274 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -35,6 +35,7 @@ #include "xfs_bmap_btree.h" #include "xfs_reflink.h" #include "xfs_ag.h" +#include "xfs_log_priv.h" struct kmem_cache *xfs_inode_cache; @@ -658,9 +659,9 @@ xfs_ip2xflags( */ int xfs_lookup( - xfs_inode_t *dp, - struct xfs_name *name, - xfs_inode_t **ipp, + struct xfs_inode *dp, + const struct xfs_name *name, + struct xfs_inode **ipp, struct xfs_name *ci_name) { xfs_ino_t inum; @@ -1217,7 +1218,7 @@ xfs_link( { xfs_mount_t *mp = tdp->i_mount; xfs_trans_t *tp; - int error; + int error, nospace_error = 0; int resblks; trace_xfs_link(tdp, target_name); @@ -1236,19 +1237,11 @@ xfs_link( goto std_return; resblks = XFS_LINK_SPACE_RES(mp, target_name->len); - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp); - if (error == -ENOSPC) { - resblks = 0; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp); - } + error = xfs_trans_alloc_dir(tdp, &M_RES(mp)->tr_link, sip, &resblks, + &tp, &nospace_error); if (error) goto std_return; - xfs_lock_two_inodes(sip, XFS_ILOCK_EXCL, tdp, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL); - error = xfs_iext_count_may_overflow(tdp, XFS_DATA_FORK, XFS_IEXT_DIR_MANIP_CNT(mp)); if (error) @@ -1306,6 +1299,8 @@ xfs_link( error_return: xfs_trans_cancel(tp); std_return: + if (error == -ENOSPC && nospace_error) + error = nospace_error; return error; } @@ -2755,6 +2750,7 @@ xfs_remove( xfs_mount_t *mp = dp->i_mount; xfs_trans_t *tp = NULL; int is_dir = S_ISDIR(VFS_I(ip)->i_mode); + int dontcare; int error = 0; uint resblks; @@ -2772,31 +2768,24 @@ xfs_remove( goto std_return; /* - * We try to get the real space reservation first, - * allowing for directory btree deletion(s) implying - * possible bmap insert(s). If we can't get the space - * reservation then we use 0 instead, and avoid the bmap - * btree insert(s) in the directory code by, if the bmap - * insert tries to happen, instead trimming the LAST - * block from the directory. + * We try to get the real space reservation first, allowing for + * directory btree deletion(s) implying possible bmap insert(s). If we + * can't get the space reservation then we use 0 instead, and avoid the + * bmap btree insert(s) in the directory code by, if the bmap insert + * tries to happen, instead trimming the LAST block from the directory. + * + * Ignore EDQUOT and ENOSPC being returned via nospace_error because + * the directory code can handle a reservationless update and we don't + * want to prevent a user from trying to free space by deleting things. */ resblks = XFS_REMOVE_SPACE_RES(mp); - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, resblks, 0, 0, &tp); - if (error == -ENOSPC) { - resblks = 0; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_remove, 0, 0, 0, - &tp); - } + error = xfs_trans_alloc_dir(dp, &M_RES(mp)->tr_remove, ip, &resblks, + &tp, &dontcare); if (error) { ASSERT(error != -ENOSPC); goto std_return; } - xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - /* * If we're removing a directory perform some additional validation. */ @@ -3109,7 +3098,8 @@ xfs_rename( bool new_parent = (src_dp != target_dp); bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode); int spaceres; - int error; + bool retried = false; + int error, nospace_error = 0; trace_xfs_rename(src_dp, target_dp, src_name, target_name); @@ -3133,9 +3123,12 @@ xfs_rename( xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip, inodes, &num_inodes); +retry: + nospace_error = 0; spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, spaceres, 0, 0, &tp); if (error == -ENOSPC) { + nospace_error = error; spaceres = 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_rename, 0, 0, 0, &tp); @@ -3189,6 +3182,31 @@ xfs_rename( target_dp, target_name, target_ip, spaceres); + /* + * Try to reserve quota to handle an expansion of the target directory. + * We'll allow the rename to continue in reservationless mode if we hit + * a space usage constraint. If we trigger reservationless mode, save + * the errno if there isn't any free space in the target directory. + */ + if (spaceres != 0) { + error = xfs_trans_reserve_quota_nblks(tp, target_dp, spaceres, + 0, false); + if (error == -EDQUOT || error == -ENOSPC) { + if (!retried) { + xfs_trans_cancel(tp); + xfs_blockgc_free_quota(target_dp, 0); + retried = true; + goto retry; + } + + nospace_error = error; + spaceres = 0; + error = 0; + } + if (error) + goto out_trans_cancel; + } + /* * Check for expected errors before we dirty the transaction * so we can return an error without a transaction abort. @@ -3435,6 +3453,8 @@ out_trans_cancel: out_release_wip: if (wip) xfs_irele(wip); + if (error == -ENOSPC && nospace_error) + error = nospace_error; return error; } @@ -3659,7 +3679,7 @@ xfs_iflush_cluster( * AIL, leaving a dirty/unpinned inode attached to the buffer * that otherwise looks like it should be flushed. */ - if (xfs_is_shutdown(mp)) { + if (xlog_is_shutdown(mp->m_log)) { xfs_iunpin_wait(ip); xfs_iflush_abort(ip); xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -3685,9 +3705,19 @@ xfs_iflush_cluster( } if (error) { + /* + * Shutdown first so we kill the log before we release this + * buffer. If it is an INODE_ALLOC buffer and pins the tail + * of the log, failing it before the _log_ is shut down can + * result in the log tail being moved forward in the journal + * on disk because log writes can still be taking place. Hence + * unpinning the tail will allow the ICREATE intent to be + * removed from the log an recovery will fail with uninitialised + * inode cluster buffers. + */ + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); bp->b_flags |= XBF_ASYNC; xfs_buf_ioend_fail(bp); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index b7e8f14d9fca..740ab13d1aa2 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -402,7 +402,7 @@ enum layout_break_reason { int xfs_release(struct xfs_inode *ip); void xfs_inactive(struct xfs_inode *ip); -int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, +int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct user_namespace *mnt_userns, struct xfs_inode *dp, struct xfs_name *name, diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 90d8e591baf8..11158fa81a09 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -17,6 +17,7 @@ #include "xfs_trans_priv.h" #include "xfs_buf_item.h" #include "xfs_log.h" +#include "xfs_log_priv.h" #include "xfs_error.h" #include @@ -720,6 +721,17 @@ xfs_iflush_ail_updates( if (INODE_ITEM(lip)->ili_flush_lsn != lip->li_lsn) continue; + /* + * dgc: Not sure how this happens, but it happens very + * occassionaly via generic/388. xfs_iflush_abort() also + * silently handles this same "under writeback but not in AIL at + * shutdown" condition via xfs_trans_ail_delete(). + */ + if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { + ASSERT(xlog_is_shutdown(lip->li_log)); + continue; + } + lsn = xfs_ail_delete_one(ailp, lip); if (!tail_lsn && lsn) tail_lsn = lsn; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 2515fe8299e1..83481005317a 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1189,7 +1189,7 @@ xfs_ioctl_setattr_get_trans( goto out_error; error = xfs_trans_alloc_ichange(ip, NULL, NULL, pdqp, - capable(CAP_FOWNER), &tp); + has_capability_noaudit(current, CAP_FOWNER), &tp); if (error) goto out_error; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index b79b3846e71b..b34e8e4344a8 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -613,37 +613,6 @@ xfs_vn_getattr( return 0; } -static void -xfs_setattr_mode( - struct xfs_inode *ip, - struct iattr *iattr) -{ - struct inode *inode = VFS_I(ip); - umode_t mode = iattr->ia_mode; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - inode->i_mode &= S_IFMT; - inode->i_mode |= mode & ~S_IFMT; -} - -void -xfs_setattr_time( - struct xfs_inode *ip, - struct iattr *iattr) -{ - struct inode *inode = VFS_I(ip); - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (iattr->ia_valid & ATTR_ATIME) - inode->i_atime = iattr->ia_atime; - if (iattr->ia_valid & ATTR_CTIME) - inode->i_ctime = iattr->ia_ctime; - if (iattr->ia_valid & ATTR_MTIME) - inode->i_mtime = iattr->ia_mtime; -} - static int xfs_vn_change_ok( struct user_namespace *mnt_userns, @@ -678,10 +647,10 @@ xfs_setattr_nonsize( int mask = iattr->ia_valid; xfs_trans_t *tp; int error; - kuid_t uid = GLOBAL_ROOT_UID, iuid = GLOBAL_ROOT_UID; - kgid_t gid = GLOBAL_ROOT_GID, igid = GLOBAL_ROOT_GID; + kuid_t uid = GLOBAL_ROOT_UID; + kgid_t gid = GLOBAL_ROOT_GID; struct xfs_dquot *udqp = NULL, *gdqp = NULL; - struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; + struct xfs_dquot *old_udqp = NULL, *old_gdqp = NULL; ASSERT((mask & ATTR_SIZE) == 0); @@ -723,66 +692,30 @@ xfs_setattr_nonsize( } error = xfs_trans_alloc_ichange(ip, udqp, gdqp, NULL, - capable(CAP_FOWNER), &tp); + has_capability_noaudit(current, CAP_FOWNER), &tp); if (error) goto out_dqrele; /* - * Change file ownership. Must be the owner or privileged. + * Register quota modifications in the transaction. Must be the owner + * or privileged. These IDs could have changed since we last looked at + * them. But, we're assured that if the ownership did change while we + * didn't have the inode locked, inode's dquot(s) would have changed + * also. */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * These IDs could have changed since we last looked at them. - * But, we're assured that if the ownership did change - * while we didn't have the inode locked, inode's dquot(s) - * would have changed also. - */ - iuid = inode->i_uid; - igid = inode->i_gid; - gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; - uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; - - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((inode->i_mode & (S_ISUID|S_ISGID)) && - !capable(CAP_FSETID)) - inode->i_mode &= ~(S_ISUID|S_ISGID); - - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (!uid_eq(iuid, uid)) { - if (XFS_IS_UQUOTA_ON(mp)) { - ASSERT(mask & ATTR_UID); - ASSERT(udqp); - olddquot1 = xfs_qm_vop_chown(tp, ip, - &ip->i_udquot, udqp); - } - inode->i_uid = uid; - } - if (!gid_eq(igid, gid)) { - if (XFS_IS_GQUOTA_ON(mp)) { - ASSERT(xfs_has_pquotino(mp) || - !XFS_IS_PQUOTA_ON(mp)); - ASSERT(mask & ATTR_GID); - ASSERT(gdqp); - olddquot2 = xfs_qm_vop_chown(tp, ip, - &ip->i_gdquot, gdqp); - } - inode->i_gid = gid; - } + if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp) && + !uid_eq(inode->i_uid, iattr->ia_uid)) { + ASSERT(udqp); + old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); + } + if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp) && + !gid_eq(inode->i_gid, iattr->ia_gid)) { + ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp)); + ASSERT(gdqp); + old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } - if (mask & ATTR_MODE) - xfs_setattr_mode(ip, iattr); - if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) - xfs_setattr_time(ip, iattr); - + setattr_copy(mnt_userns, inode, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); @@ -794,8 +727,8 @@ xfs_setattr_nonsize( /* * Release any dquot(s) the inode had kept before chown. */ - xfs_qm_dqrele(olddquot1); - xfs_qm_dqrele(olddquot2); + xfs_qm_dqrele(old_udqp); + xfs_qm_dqrele(old_gdqp); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); @@ -1006,11 +939,8 @@ xfs_setattr_size( xfs_inode_clear_eofblocks_tag(ip); } - if (iattr->ia_valid & ATTR_MODE) - xfs_setattr_mode(ip, iattr); - if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) - xfs_setattr_time(ip, iattr); - + ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); + setattr_copy(mnt_userns, inode, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 16f9edbda4eb..a8034c0afdf2 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -812,10 +812,9 @@ xfs_log_mount_finish( * mount failure occurs. */ mp->m_super->s_flags |= SB_ACTIVE; + xfs_log_work_queue(mp); if (xlog_recovery_needed(log)) error = xlog_recover_finish(log); - if (!error) - xfs_log_work_queue(mp); mp->m_super->s_flags &= ~SB_ACTIVE; evict_inodes(mp->m_super); @@ -1102,7 +1101,7 @@ xfs_log_item_init( int type, const struct xfs_item_ops *ops) { - item->li_mountp = mp; + item->li_log = mp->m_log; item->li_ailp = mp->m_ail; item->li_type = type; item->li_ops = ops; diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 83a039762b81..796e4464f809 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -1243,18 +1243,27 @@ xlog_cil_push_now( if (!async) flush_workqueue(cil->xc_push_wq); + spin_lock(&cil->xc_push_lock); + + /* + * If this is an async flush request, we always need to set the + * xc_push_commit_stable flag even if something else has already queued + * a push. The flush caller is asking for the CIL to be on stable + * storage when the next push completes, so regardless of who has queued + * the push, the flush requires stable semantics from it. + */ + cil->xc_push_commit_stable = async; + /* * If the CIL is empty or we've already pushed the sequence then - * there's no work we need to do. + * there's no more work that we need to do. */ - spin_lock(&cil->xc_push_lock); if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) { spin_unlock(&cil->xc_push_lock); return; } cil->xc_push_seq = push_seq; - cil->xc_push_commit_stable = async; queue_work(cil->xc_push_wq, &cil->xc_ctx->push_work); spin_unlock(&cil->xc_push_lock); } @@ -1352,6 +1361,13 @@ xlog_cil_flush( trace_xfs_log_force(log->l_mp, seq, _RET_IP_); xlog_cil_push_now(log, seq, true); + + /* + * If the CIL is empty, make sure that any previous checkpoint that may + * still be in an active iclog is pushed to stable storage. + */ + if (list_empty(&log->l_cilp->xc_cil)) + xfs_log_force(log->l_mp, 0); } /* @@ -1468,7 +1484,7 @@ bool xfs_log_item_in_current_chkpt( struct xfs_log_item *lip) { - struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp; + struct xfs_cil *cil = lip->li_log->l_cilp; if (list_empty(&lip->li_cil)) return false; diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 4abe17312c2b..37a24f0f7cd4 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -319,7 +319,8 @@ xfs_fs_commit_blocks( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_setattr_time(ip, iattr); + ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); + setattr_copy(&init_user_ns, inode, iattr); if (update_isize) { i_size_write(inode, iattr->ia_size); ip->i_disk_size = iattr->ia_size; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 32ac8d9c8940..f165d1a3de1d 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -25,6 +25,7 @@ #include "xfs_error.h" #include "xfs_ag.h" #include "xfs_ialloc.h" +#include "xfs_log_priv.h" /* * The global quota manager. There is only one of these for the entire @@ -121,8 +122,7 @@ xfs_qm_dqpurge( struct xfs_dquot *dqp, void *data) { - struct xfs_mount *mp = dqp->q_mount; - struct xfs_quotainfo *qi = mp->m_quotainfo; + struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; int error = -EAGAIN; xfs_dqlock(dqp); @@ -157,7 +157,7 @@ xfs_qm_dqpurge( } ASSERT(atomic_read(&dqp->q_pincount) == 0); - ASSERT(xfs_is_shutdown(mp) || + ASSERT(xlog_is_shutdown(dqp->q_logitem.qli_item.li_log) || !test_bit(XFS_LI_IN_AIL, &dqp->q_logitem.qli_item.li_flags)); xfs_dqfunlock(dqp); @@ -172,7 +172,7 @@ xfs_qm_dqpurge( */ ASSERT(!list_empty(&dqp->q_lru)); list_lru_del(&qi->qi_lru, &dqp->q_lru); - XFS_STATS_DEC(mp, xs_qm_dquot_unused); + XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused); xfs_qm_dqdestroy(dqp); return 0; diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index d3da67772d57..0d868c93144d 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -457,7 +457,7 @@ xfs_cui_item_recover( struct xfs_cud_log_item *cudp; struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; - struct xfs_mount *mp = lip->li_mountp; + struct xfs_mount *mp = lip->li_log->l_mp; xfs_fsblock_t new_fsb; xfs_extlen_t new_len; unsigned int refc_type; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index db70060e7bf6..54e68e5693fd 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -425,7 +425,10 @@ convert: if (!convert_now || cmap->br_state == XFS_EXT_NORM) return 0; trace_xfs_reflink_convert_cow(ip, cmap); - return xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb); + error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb); + if (!error) + cmap->br_state = XFS_EXT_NORM; + return error; out_trans_cancel: xfs_trans_cancel(tp); diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index c3966b4c58ef..a22b2d19ef91 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -510,7 +510,7 @@ xfs_rui_item_recover( struct xfs_rud_log_item *rudp; struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; - struct xfs_mount *mp = lip->li_mountp; + struct xfs_mount *mp = lip->li_log->l_mp; enum xfs_rmap_intent_type type; xfs_exntst_t state; int i; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 4a8076ef8cb4..b141ef78c755 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -933,7 +933,7 @@ DEFINE_IREF_EVENT(xfs_inode_unpin); DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); DECLARE_EVENT_CLASS(xfs_namespace_class, - TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), + TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), TP_ARGS(dp, name), TP_STRUCT__entry( __field(dev_t, dev) @@ -956,7 +956,7 @@ DECLARE_EVENT_CLASS(xfs_namespace_class, #define DEFINE_NAMESPACE_EVENT(name) \ DEFINE_EVENT(xfs_namespace_class, name, \ - TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \ + TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name), \ TP_ARGS(dp, name)) DEFINE_NAMESPACE_EVENT(xfs_remove); DEFINE_NAMESPACE_EVENT(xfs_link); @@ -1308,7 +1308,7 @@ DECLARE_EVENT_CLASS(xfs_log_item_class, __field(xfs_lsn_t, lsn) ), TP_fast_assign( - __entry->dev = lip->li_mountp->m_super->s_dev; + __entry->dev = lip->li_log->l_mp->m_super->s_dev; __entry->lip = lip; __entry->type = lip->li_type; __entry->flags = lip->li_flags; @@ -1361,7 +1361,7 @@ DECLARE_EVENT_CLASS(xfs_ail_class, __field(xfs_lsn_t, new_lsn) ), TP_fast_assign( - __entry->dev = lip->li_mountp->m_super->s_dev; + __entry->dev = lip->li_log->l_mp->m_super->s_dev; __entry->lip = lip; __entry->type = lip->li_type; __entry->flags = lip->li_flags; diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 59e2f9031b9f..917a69f0a6ff 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -648,7 +648,7 @@ xfs_trans_add_item( struct xfs_trans *tp, struct xfs_log_item *lip) { - ASSERT(lip->li_mountp == tp->t_mountp); + ASSERT(lip->li_log == tp->t_mountp->m_log); ASSERT(lip->li_ailp == tp->t_mountp->m_ail); ASSERT(list_empty(&lip->li_trans)); ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags)); @@ -775,7 +775,7 @@ xfs_trans_committed_bulk( * object into the AIL as we are in a shutdown situation. */ if (aborted) { - ASSERT(xfs_is_shutdown(ailp->ail_mount)); + ASSERT(xlog_is_shutdown(ailp->ail_log)); if (lip->li_ops->iop_unpin) lip->li_ops->iop_unpin(lip, 1); continue; @@ -1210,3 +1210,89 @@ out_cancel: xfs_trans_cancel(tp); return error; } + +/* + * Allocate an transaction, lock and join the directory and child inodes to it, + * and reserve quota for a directory update. If there isn't sufficient space, + * @dblocks will be set to zero for a reservationless directory update and + * @nospace_error will be set to a negative errno describing the space + * constraint we hit. + * + * The caller must ensure that the on-disk dquots attached to this inode have + * already been allocated and initialized. The ILOCKs will be dropped when the + * transaction is committed or cancelled. + */ +int +xfs_trans_alloc_dir( + struct xfs_inode *dp, + struct xfs_trans_res *resv, + struct xfs_inode *ip, + unsigned int *dblocks, + struct xfs_trans **tpp, + int *nospace_error) +{ + struct xfs_trans *tp; + struct xfs_mount *mp = ip->i_mount; + unsigned int resblks; + bool retried = false; + int error; + +retry: + *nospace_error = 0; + resblks = *dblocks; + error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); + if (error == -ENOSPC) { + *nospace_error = error; + resblks = 0; + error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp); + } + if (error) + return error; + + xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + error = xfs_qm_dqattach_locked(dp, false); + if (error) { + /* Caller should have allocated the dquots! */ + ASSERT(error != -ENOENT); + goto out_cancel; + } + + error = xfs_qm_dqattach_locked(ip, false); + if (error) { + /* Caller should have allocated the dquots! */ + ASSERT(error != -ENOENT); + goto out_cancel; + } + + if (resblks == 0) + goto done; + + error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false); + if (error == -EDQUOT || error == -ENOSPC) { + if (!retried) { + xfs_trans_cancel(tp); + xfs_blockgc_free_quota(dp, 0); + retried = true; + goto retry; + } + + *nospace_error = error; + resblks = 0; + error = 0; + } + if (error) + goto out_cancel; + +done: + *tpp = tp; + *dblocks = resblks; + return 0; + +out_cancel: + xfs_trans_cancel(tp); + return error; +} diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index a487b264a9eb..de177842b951 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -8,6 +8,7 @@ /* kernel only transaction subsystem defines */ +struct xlog; struct xfs_buf; struct xfs_buftarg; struct xfs_efd_log_item; @@ -31,7 +32,7 @@ struct xfs_log_item { struct list_head li_ail; /* AIL pointers */ struct list_head li_trans; /* transaction list */ xfs_lsn_t li_lsn; /* last on-disk lsn */ - struct xfs_mount *li_mountp; /* ptr to fs mount */ + struct xlog *li_log; struct xfs_ail *li_ailp; /* ptr to AIL */ uint li_type; /* item type */ unsigned long li_flags; /* misc flags */ @@ -259,6 +260,9 @@ int xfs_trans_alloc_icreate(struct xfs_mount *mp, struct xfs_trans_res *resv, int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force, struct xfs_trans **tpp); +int xfs_trans_alloc_dir(struct xfs_inode *dp, struct xfs_trans_res *resv, + struct xfs_inode *ip, unsigned int *dblocks, + struct xfs_trans **tpp, int *nospace_error); static inline void xfs_trans_set_context( diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 2a8c8dc54c95..c2ccb98c7bcd 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -398,7 +398,7 @@ xfsaild_push_item( * If log item pinning is enabled, skip the push and track the item as * pinned. This can help induce head-behind-tail conditions. */ - if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN)) + if (XFS_TEST_ERROR(false, ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN)) return XFS_ITEM_PINNED; /* @@ -418,7 +418,7 @@ static long xfsaild_push( struct xfs_ail *ailp) { - xfs_mount_t *mp = ailp->ail_mount; + struct xfs_mount *mp = ailp->ail_log->l_mp; struct xfs_ail_cursor cur; struct xfs_log_item *lip; xfs_lsn_t lsn; @@ -443,15 +443,27 @@ xfsaild_push( ailp->ail_log_flush = 0; XFS_STATS_INC(mp, xs_push_ail_flush); - xlog_cil_flush(mp->m_log); + xlog_cil_flush(ailp->ail_log); } spin_lock(&ailp->ail_lock); - /* barrier matches the ail_target update in xfs_ail_push() */ - smp_rmb(); - target = ailp->ail_target; - ailp->ail_target_prev = target; + /* + * If we have a sync push waiter, we always have to push till the AIL is + * empty. Update the target to point to the end of the AIL so that + * capture updates that occur after the sync push waiter has gone to + * sleep. + */ + if (waitqueue_active(&ailp->ail_empty)) { + lip = xfs_ail_max(ailp); + if (lip) + target = lip->li_lsn; + } else { + /* barrier matches the ail_target update in xfs_ail_push() */ + smp_rmb(); + target = ailp->ail_target; + ailp->ail_target_prev = target; + } /* we're done if the AIL is empty or our push has reached the end */ lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn); @@ -620,7 +632,7 @@ xfsaild( * opportunity to release such buffers from the queue. */ ASSERT(list_empty(&ailp->ail_buf_list) || - xfs_is_shutdown(ailp->ail_mount)); + xlog_is_shutdown(ailp->ail_log)); xfs_buf_delwri_cancel(&ailp->ail_buf_list); break; } @@ -683,7 +695,7 @@ xfs_ail_push( struct xfs_log_item *lip; lip = xfs_ail_min(ailp); - if (!lip || xfs_is_shutdown(ailp->ail_mount) || + if (!lip || xlog_is_shutdown(ailp->ail_log) || XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0) return; @@ -724,7 +736,6 @@ xfs_ail_push_all_sync( spin_lock(&ailp->ail_lock); while ((lip = xfs_ail_max(ailp)) != NULL) { prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE); - ailp->ail_target = lip->li_lsn; wake_up_process(ailp->ail_task); spin_unlock(&ailp->ail_lock); schedule(); @@ -740,7 +751,7 @@ xfs_ail_update_finish( struct xfs_ail *ailp, xfs_lsn_t old_lsn) __releases(ailp->ail_lock) { - struct xfs_mount *mp = ailp->ail_mount; + struct xlog *log = ailp->ail_log; /* if the tail lsn hasn't changed, don't do updates or wakeups. */ if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) { @@ -748,13 +759,13 @@ xfs_ail_update_finish( return; } - if (!xfs_is_shutdown(mp)) - xlog_assign_tail_lsn_locked(mp); + if (!xlog_is_shutdown(log)) + xlog_assign_tail_lsn_locked(log->l_mp); if (list_empty(&ailp->ail_head)) wake_up_all(&ailp->ail_empty); spin_unlock(&ailp->ail_lock); - xfs_log_space_wake(mp); + xfs_log_space_wake(log->l_mp); } /* @@ -862,13 +873,13 @@ xfs_trans_ail_delete( int shutdown_type) { struct xfs_ail *ailp = lip->li_ailp; - struct xfs_mount *mp = ailp->ail_mount; + struct xfs_mount *mp = ailp->ail_log->l_mp; xfs_lsn_t tail_lsn; spin_lock(&ailp->ail_lock); if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { spin_unlock(&ailp->ail_lock); - if (shutdown_type && !xfs_is_shutdown(mp)) { + if (shutdown_type && !xlog_is_shutdown(ailp->ail_log)) { xfs_alert_tag(mp, XFS_PTAG_AILDELETE, "%s: attempting to delete a log item that is not in the AIL", __func__); @@ -893,7 +904,7 @@ xfs_trans_ail_init( if (!ailp) return -ENOMEM; - ailp->ail_mount = mp; + ailp->ail_log = mp->m_log; INIT_LIST_HEAD(&ailp->ail_head); INIT_LIST_HEAD(&ailp->ail_cursors); spin_lock_init(&ailp->ail_lock); @@ -901,7 +912,7 @@ xfs_trans_ail_init( init_waitqueue_head(&ailp->ail_empty); ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s", - ailp->ail_mount->m_super->s_id); + mp->m_super->s_id); if (IS_ERR(ailp->ail_task)) goto out_free_ailp; diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index 3004aeac9110..f0d79a9050ba 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -6,6 +6,7 @@ #ifndef __XFS_TRANS_PRIV_H__ #define __XFS_TRANS_PRIV_H__ +struct xlog; struct xfs_log_item; struct xfs_mount; struct xfs_trans; @@ -50,7 +51,7 @@ struct xfs_ail_cursor { * Eventually we need to drive the locking in here as well. */ struct xfs_ail { - struct xfs_mount *ail_mount; + struct xlog *ail_log; struct task_struct *ail_task; struct list_head ail_head; xfs_lsn_t ail_target; diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 7ad6c3d0db7d..86bf82dbd8b8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -28,8 +28,8 @@ #define CEPH_INO_ROOT 1 -#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ -#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_GLOBAL_SNAPREALM 3 /* global dummy snaprealm */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 @@ -328,6 +328,7 @@ enum { CEPH_MDS_OP_LOOKUPPARENT = 0x00103, CEPH_MDS_OP_LOOKUPINO = 0x00104, CEPH_MDS_OP_LOOKUPNAME = 0x00105, + CEPH_MDS_OP_GETVXATTR = 0x00106, CEPH_MDS_OP_SETXATTR = 0x01105, CEPH_MDS_OP_RMXATTR = 0x01106, diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index edf62eaa6285..00af2c98da75 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -284,6 +284,7 @@ DEFINE_RB_LOOKUP_FUNC(name, type, keyfld, nodefld) extern struct kmem_cache *ceph_inode_cachep; extern struct kmem_cache *ceph_cap_cachep; +extern struct kmem_cache *ceph_cap_snap_cachep; extern struct kmem_cache *ceph_cap_flush_cachep; extern struct kmem_cache *ceph_dentry_cachep; extern struct kmem_cache *ceph_file_cachep; diff --git a/kernel/capability.c b/kernel/capability.c index 46a361dde042..765194f5d678 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -360,6 +360,7 @@ bool has_capability_noaudit(struct task_struct *t, int cap) { return has_ns_capability_noaudit(t, &init_user_ns, cap); } +EXPORT_SYMBOL(has_capability_noaudit); static bool ns_capable_common(struct user_namespace *ns, int cap, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cfe3b092c31d..075cd25363ac 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1544,6 +1544,29 @@ config CSD_LOCK_WAIT_DEBUG include the IPI handler function currently executing (if any) and relevant stack traces. +choice + prompt "Lock debugging: prove subsystem device_lock() correctness" + depends on PROVE_LOCKING + help + For subsystems that have instrumented their usage of the device_lock() + with nested annotations, enable lock dependency checking. The locking + hierarchy 'subclass' identifiers are not compatible across + sub-systems, so only one can be enabled at a time. + +config PROVE_NVDIMM_LOCKING + bool "NVDIMM" + depends on LIBNVDIMM + help + Enable lockdep to validate nd_device_lock() usage. + +config PROVE_CXL_LOCKING + bool "CXL" + depends on CXL_BUS + help + Enable lockdep to validate cxl_device_lock() usage. + +endchoice + endmenu # lock debugging config TRACE_IRQFLAGS diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c81379f93ad5..c6e5bfc717d5 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -1773,10 +1773,8 @@ static int prepare_read_data(struct ceph_connection *con) bv.bv_page = con->bounce_page; bv.bv_offset = 0; - set_in_bvec(con, &bv); - } else { - set_in_bvec(con, &bv); } + set_in_bvec(con, &bv); con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT; return 0; } @@ -1807,10 +1805,8 @@ static void prepare_read_data_cont(struct ceph_connection *con) if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { bv.bv_page = con->bounce_page; bv.bv_offset = 0; - set_in_bvec(con, &bv); - } else { - set_in_bvec(con, &bv); } + set_in_bvec(con, &bv); WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT); return; } diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 1acdf2fc31c5..82e49ab0937d 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -3,8 +3,11 @@ ldflags-y += --wrap=acpi_table_parse_cedt ldflags-y += --wrap=is_acpi_device_node ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root -ldflags-y += --wrap=pci_walk_bus ldflags-y += --wrap=nvdimm_bus_register +ldflags-y += --wrap=devm_cxl_port_enumerate_dports +ldflags-y += --wrap=devm_cxl_setup_hdm +ldflags-y += --wrap=devm_cxl_add_passthrough_decoder +ldflags-y += --wrap=devm_cxl_enumerate_decoders DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -23,15 +26,26 @@ obj-m += cxl_pmem.o cxl_pmem-y := $(CXL_SRC)/pmem.o cxl_pmem-y += config_check.o +obj-m += cxl_port.o + +cxl_port-y := $(CXL_SRC)/port.o +cxl_port-y += config_check.o + +obj-m += cxl_mem.o + +cxl_mem-y := $(CXL_SRC)/mem.o +cxl_mem-y += mock_mem.o +cxl_mem-y += config_check.o + obj-m += cxl_core.o -cxl_core-y := $(CXL_CORE_SRC)/bus.o +cxl_core-y := $(CXL_CORE_SRC)/port.o cxl_core-y += $(CXL_CORE_SRC)/pmem.o cxl_core-y += $(CXL_CORE_SRC)/regs.o cxl_core-y += $(CXL_CORE_SRC)/memdev.o cxl_core-y += $(CXL_CORE_SRC)/mbox.o +cxl_core-y += $(CXL_CORE_SRC)/pci.o +cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += config_check.o -cxl_core-y += mock_pmem.o - obj-m += test/ diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c index 4c8a493ace56..55813de26d46 100644 --- a/tools/testing/cxl/mock_acpi.c +++ b/tools/testing/cxl/mock_acpi.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include "test/mock.h" @@ -34,76 +33,3 @@ out: put_cxl_mock_ops(index); return found; } - -static int match_add_root_port(struct pci_dev *pdev, void *data) -{ - struct cxl_walk_context *ctx = data; - struct pci_bus *root_bus = ctx->root; - struct cxl_port *port = ctx->port; - int type = pci_pcie_type(pdev); - struct device *dev = ctx->dev; - u32 lnkcap, port_num; - int rc; - - if (pdev->bus != root_bus) - return 0; - if (!pci_is_pcie(pdev)) - return 0; - if (type != PCI_EXP_TYPE_ROOT_PORT) - return 0; - if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, - &lnkcap) != PCIBIOS_SUCCESSFUL) - return 0; - - /* TODO walk DVSEC to find component register base */ - port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE); - if (rc) { - dev_err(dev, "failed to add dport: %s (%d)\n", - dev_name(&pdev->dev), rc); - ctx->error = rc; - return rc; - } - ctx->count++; - - dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); - - return 0; -} - -static int mock_add_root_port(struct platform_device *pdev, void *data) -{ - struct cxl_walk_context *ctx = data; - struct cxl_port *port = ctx->port; - struct device *dev = ctx->dev; - int rc; - - rc = cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); - if (rc) { - dev_err(dev, "failed to add dport: %s (%d)\n", - dev_name(&pdev->dev), rc); - ctx->error = rc; - return rc; - } - ctx->count++; - - dev_dbg(dev, "add dport%d: %s\n", pdev->id, dev_name(&pdev->dev)); - - return 0; -} - -int match_add_root_ports(struct pci_dev *dev, void *data) -{ - int index, rc; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - struct platform_device *pdev = (struct platform_device *) dev; - - if (ops && ops->is_mock_port(pdev)) - rc = mock_add_root_port(pdev, data); - else - rc = match_add_root_port(dev, data); - - put_cxl_mock_ops(index); - - return rc; -} diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c new file mode 100644 index 000000000000..d1dec5845139 --- /dev/null +++ b/tools/testing/cxl/mock_mem.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ + +#include + +struct cxl_dev_state; +bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +{ + return true; +} diff --git a/tools/testing/cxl/mock_pmem.c b/tools/testing/cxl/mock_pmem.c deleted file mode 100644 index f7315e6f52c0..000000000000 --- a/tools/testing/cxl/mock_pmem.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2021 Intel Corporation. All rights reserved. */ -#include -#include "test/mock.h" -#include - -int match_nvdimm_bridge(struct device *dev, const void *data) -{ - int index, rc = 0; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - const struct cxl_nvdimm *cxl_nvd = data; - - if (ops) { - if (dev->type == &cxl_nvdimm_bridge_type && - (ops->is_mock_dev(dev->parent->parent) == - ops->is_mock_dev(cxl_nvd->dev.parent->parent))) - rc = 1; - } else - rc = dev->type == &cxl_nvdimm_bridge_type; - - put_cxl_mock_ops(index); - - return rc; -} diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 736d99006fb7..431f2bddf6c8 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -8,16 +8,25 @@ #include #include #include +#include #include "mock.h" -#define NR_CXL_HOST_BRIDGES 4 +#define NR_CXL_HOST_BRIDGES 2 #define NR_CXL_ROOT_PORTS 2 +#define NR_CXL_SWITCH_PORTS 2 +#define NR_CXL_PORT_DECODERS 2 static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; static struct platform_device *cxl_root_port[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; -struct platform_device *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; +static struct platform_device + *cxl_switch_uport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; +static struct platform_device + *cxl_switch_dport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * + NR_CXL_SWITCH_PORTS]; +struct platform_device + *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS]; static struct acpi_device acpi0017_mock; static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = { @@ -27,12 +36,6 @@ static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = { [1] = { .handle = &host_bridge[1], }, - [2] = { - .handle = &host_bridge[2], - }, - [3] = { - .handle = &host_bridge[3], - }, }; static bool is_mock_dev(struct device *dev) @@ -70,7 +73,7 @@ static struct { } cfmws0; struct { struct acpi_cedt_cfmws cfmws; - u32 target[4]; + u32 target[2]; } cfmws1; struct { struct acpi_cedt_cfmws cfmws; @@ -78,7 +81,7 @@ static struct { } cfmws2; struct { struct acpi_cedt_cfmws cfmws; - u32 target[4]; + u32 target[2]; } cfmws3; } __packed mock_cedt = { .cedt = { @@ -104,22 +107,6 @@ static struct { .uid = 1, .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, }, - .chbs[2] = { - .header = { - .type = ACPI_CEDT_TYPE_CHBS, - .length = sizeof(mock_cedt.chbs[0]), - }, - .uid = 2, - .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, - }, - .chbs[3] = { - .header = { - .type = ACPI_CEDT_TYPE_CHBS, - .length = sizeof(mock_cedt.chbs[0]), - }, - .uid = 3, - .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, - }, .cfmws0 = { .cfmws = { .header = { @@ -141,14 +128,14 @@ static struct { .type = ACPI_CEDT_TYPE_CFMWS, .length = sizeof(mock_cedt.cfmws1), }, - .interleave_ways = 2, + .interleave_ways = 1, .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = 1, - .window_size = SZ_256M * 4, + .window_size = SZ_256M * 2, }, - .target = { 0, 1, 2, 3 }, + .target = { 0, 1, }, }, .cfmws2 = { .cfmws = { @@ -171,14 +158,14 @@ static struct { .type = ACPI_CEDT_TYPE_CFMWS, .length = sizeof(mock_cedt.cfmws3), }, - .interleave_ways = 2, + .interleave_ways = 1, .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = 3, - .window_size = SZ_256M * 4, + .window_size = SZ_256M * 2, }, - .target = { 0, 1, 2, 3 }, + .target = { 0, 1, }, }, }; @@ -317,6 +304,30 @@ static bool is_mock_bridge(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) if (dev == &cxl_host_bridge[i]->dev) return true; + return false; +} + +static bool is_mock_port(struct device *dev) +{ + int i; + + if (is_mock_bridge(dev)) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) + if (dev == &cxl_root_port[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++) + if (dev == &cxl_switch_uport[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) + if (dev == &cxl_switch_dport[i]->dev) + return true; + + if (is_cxl_memdev(dev)) + return is_mock_dev(dev->parent); return false; } @@ -358,34 +369,8 @@ static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = { [1] = { .bus = &mock_pci_bus[1], }, - [2] = { - .bus = &mock_pci_bus[2], - }, - [3] = { - .bus = &mock_pci_bus[3], - }, }; -static struct platform_device *mock_cxl_root_port(struct pci_bus *bus, int index) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(mock_pci_bus); i++) - if (bus == &mock_pci_bus[i]) - return cxl_root_port[index + i * NR_CXL_ROOT_PORTS]; - return NULL; -} - -static bool is_mock_port(struct platform_device *pdev) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) - if (pdev == cxl_root_port[i]) - return true; - return false; -} - static bool is_mock_bus(struct pci_bus *bus) { int i; @@ -405,16 +390,166 @@ static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle) return &mock_pci_root[host_bridge_index(adev)]; } +static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL); + + if (!cxlhdm) + return ERR_PTR(-ENOMEM); + + cxlhdm->port = port; + return cxlhdm; +} + +static int mock_cxl_add_passthrough_decoder(struct cxl_port *port) +{ + dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n"); + return -EOPNOTSUPP; +} + + +struct target_map_ctx { + int *target_map; + int index; + int target_count; +}; + +static int map_targets(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct target_map_ctx *ctx = data; + + ctx->target_map[ctx->index++] = pdev->id; + + if (ctx->index > ctx->target_count) { + dev_WARN_ONCE(dev, 1, "too many targets found?\n"); + return -ENXIO; + } + + return 0; +} + +static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) +{ + struct cxl_port *port = cxlhdm->port; + struct cxl_port *parent_port = to_cxl_port(port->dev.parent); + int target_count, i; + + if (is_cxl_endpoint(port)) + target_count = 0; + else if (is_cxl_root(parent_port)) + target_count = NR_CXL_ROOT_PORTS; + else + target_count = NR_CXL_SWITCH_PORTS; + + for (i = 0; i < NR_CXL_PORT_DECODERS; i++) { + int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; + struct target_map_ctx ctx = { + .target_map = target_map, + .target_count = target_count, + }; + struct cxl_decoder *cxld; + int rc; + + if (target_count) + cxld = cxl_switch_decoder_alloc(port, target_count); + else + cxld = cxl_endpoint_decoder_alloc(port); + if (IS_ERR(cxld)) { + dev_warn(&port->dev, + "Failed to allocate the decoder\n"); + return PTR_ERR(cxld); + } + + cxld->decoder_range = (struct range) { + .start = 0, + .end = -1, + }; + + cxld->flags = CXL_DECODER_F_ENABLE; + cxld->interleave_ways = min_not_zero(target_count, 1); + cxld->interleave_granularity = SZ_4K; + cxld->target_type = CXL_DECODER_EXPANDER; + + if (target_count) { + rc = device_for_each_child(port->uport, &ctx, + map_targets); + if (rc) { + put_device(&cxld->dev); + return rc; + } + } + + rc = cxl_decoder_add_locked(cxld, target_map); + if (rc) { + put_device(&cxld->dev); + dev_err(&port->dev, "Failed to add decoder\n"); + return rc; + } + + rc = cxl_decoder_autoremove(&port->dev, cxld); + if (rc) + return rc; + dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev)); + } + + return 0; +} + +static int mock_cxl_port_enumerate_dports(struct cxl_port *port) +{ + struct device *dev = &port->dev; + struct platform_device **array; + int i, array_size; + + if (port->depth == 1) { + array_size = ARRAY_SIZE(cxl_root_port); + array = cxl_root_port; + } else if (port->depth == 2) { + array_size = ARRAY_SIZE(cxl_switch_dport); + array = cxl_switch_dport; + } else { + dev_WARN_ONCE(&port->dev, 1, "unexpected depth %d\n", + port->depth); + return -ENXIO; + } + + for (i = 0; i < array_size; i++) { + struct platform_device *pdev = array[i]; + struct cxl_dport *dport; + + if (pdev->dev.parent != port->uport) + continue; + + dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, + CXL_RESOURCE_NONE); + + if (IS_ERR(dport)) { + dev_err(dev, "failed to add dport: %s (%ld)\n", + dev_name(&pdev->dev), PTR_ERR(dport)); + return PTR_ERR(dport); + } + + dev_dbg(dev, "add dport%d: %s\n", pdev->id, + dev_name(&pdev->dev)); + } + + return 0; +} + static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, .is_mock_bus = is_mock_bus, .is_mock_port = is_mock_port, .is_mock_dev = is_mock_dev, - .mock_port = mock_cxl_root_port, .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, .acpi_pci_find_root = mock_acpi_pci_find_root, + .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, + .devm_cxl_setup_hdm = mock_cxl_setup_hdm, + .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder, + .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; @@ -506,12 +641,17 @@ static __init int cxl_test_init(void) platform_device_put(pdev); goto err_bridge; } + cxl_host_bridge[i] = pdev; + rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, + "physical_node"); + if (rc) + goto err_bridge; } for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) { struct platform_device *bridge = - cxl_host_bridge[i / NR_CXL_ROOT_PORTS]; + cxl_host_bridge[i % ARRAY_SIZE(cxl_host_bridge)]; struct platform_device *pdev; pdev = platform_device_alloc("cxl_root_port", i); @@ -527,15 +667,52 @@ static __init int cxl_test_init(void) cxl_root_port[i] = pdev; } - BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_root_port)); + BUILD_BUG_ON(ARRAY_SIZE(cxl_switch_uport) != ARRAY_SIZE(cxl_root_port)); + for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++) { + struct platform_device *root_port = cxl_root_port[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_uport", i); + if (!pdev) + goto err_port; + pdev->dev.parent = &root_port->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_uport; + } + cxl_switch_uport[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) { + struct platform_device *uport = + cxl_switch_uport[i % ARRAY_SIZE(cxl_switch_uport)]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_dport", i); + if (!pdev) + goto err_port; + pdev->dev.parent = &uport->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_dport; + } + cxl_switch_dport[i] = pdev; + } + + BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_switch_dport)); for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { - struct platform_device *port = cxl_root_port[i]; + struct platform_device *dport = cxl_switch_dport[i]; struct platform_device *pdev; pdev = alloc_memdev(i); if (!pdev) goto err_mem; - pdev->dev.parent = &port->dev; + pdev->dev.parent = &dport->dev; + set_dev_node(&pdev->dev, i % 2); rc = platform_device_add(pdev); if (rc) { @@ -563,12 +740,24 @@ err_add: err_mem: for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); +err_dport: + for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_dport[i]); +err_uport: + for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_uport[i]); err_port: for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--) platform_device_unregister(cxl_root_port[i]); err_bridge: - for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) + for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_host_bridge[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); platform_device_unregister(cxl_host_bridge[i]); + } err_populate: depopulate_all_mock_resources(); err_gen_pool_add: @@ -585,10 +774,20 @@ static __exit void cxl_test_exit(void) platform_device_unregister(cxl_acpi); for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); + for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_dport[i]); + for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_uport[i]); for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--) platform_device_unregister(cxl_root_port[i]); - for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) + for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_host_bridge[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); platform_device_unregister(cxl_host_bridge[i]); + } depopulate_all_mock_resources(); gen_pool_destroy(cxl_mock_pool); unregister_cxl_mock_ops(&cxl_mock_ops); @@ -598,3 +797,4 @@ module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); +MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 8c2086c4caef..b6b726eff3e2 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -236,11 +237,25 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * return rc; } +static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds) +{ + msleep(100); + return 0; +} + static void label_area_release(void *lsa) { vfree(lsa); } +static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info; + + info = &cxlds->info; + info->mem_enabled = true; +} + static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -261,7 +276,9 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + cxlds->serial = pdev->id; cxlds->mbox_send = cxl_mock_mbox_send; + cxlds->wait_media_ready = cxl_mock_wait_media_ready; cxlds->payload_size = SZ_4K; rc = cxl_enumerate_cmds(cxlds); @@ -276,6 +293,8 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; + mock_validate_dvsec_ranges(cxlds); + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 17408f892df4..6e8c9d63c92d 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include "mock.h" static LIST_HEAD(mock); @@ -114,32 +116,6 @@ struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle) } EXPORT_SYMBOL_GPL(__wrap_acpi_pci_find_root); -void __wrap_pci_walk_bus(struct pci_bus *bus, - int (*cb)(struct pci_dev *, void *), void *userdata) -{ - int index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_bus(bus)) { - int rc, i; - - /* - * Simulate 2 root ports per host-bridge and no - * depth recursion. - */ - for (i = 0; i < 2; i++) { - rc = cb((struct pci_dev *) ops->mock_port(bus, i), - userdata); - if (rc) - break; - } - } else - pci_walk_bus(bus, cb, userdata); - - put_cxl_mock_ops(index); -} -EXPORT_SYMBOL_GPL(__wrap_pci_walk_bus); - struct nvdimm_bus * __wrap_nvdimm_bus_register(struct device *dev, struct nvdimm_bus_descriptor *nd_desc) @@ -155,5 +131,68 @@ __wrap_nvdimm_bus_register(struct device *dev, } EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); +struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port) +{ + int index; + struct cxl_hdm *cxlhdm; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + cxlhdm = ops->devm_cxl_setup_hdm(port); + else + cxlhdm = devm_cxl_setup_hdm(port); + put_cxl_mock_ops(index); + + return cxlhdm; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL); + +int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) +{ + int rc, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + rc = ops->devm_cxl_add_passthrough_decoder(port); + else + rc = devm_cxl_add_passthrough_decoder(port); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, CXL); + +int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) +{ + int rc, index; + struct cxl_port *port = cxlhdm->port; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + rc = ops->devm_cxl_enumerate_decoders(cxlhdm); + else + rc = devm_cxl_enumerate_decoders(cxlhdm); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, CXL); + +int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) +{ + int rc, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + rc = ops->devm_cxl_port_enumerate_dports(port); + else + rc = devm_cxl_port_enumerate_dports(port); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); +MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 15ed0fd877e4..738f24e3988a 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -2,6 +2,7 @@ #include #include +#include struct cxl_mock_ops { struct list_head list; @@ -15,10 +16,13 @@ struct cxl_mock_ops { struct acpi_object_list *arguments, unsigned long long *data); struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle); - struct platform_device *(*mock_port)(struct pci_bus *bus, int index); bool (*is_mock_bus)(struct pci_bus *bus); - bool (*is_mock_port)(struct platform_device *pdev); + bool (*is_mock_port)(struct device *dev); bool (*is_mock_dev)(struct device *dev); + int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port); + struct cxl_hdm *(*devm_cxl_setup_hdm)(struct cxl_port *port); + int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port); + int (*devm_cxl_enumerate_decoders)(struct cxl_hdm *hdm); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops);