diff --git a/.clang-format b/.clang-format index 6cbd6ee51610..10dc5a9a61b3 100644 --- a/.clang-format +++ b/.clang-format @@ -429,6 +429,7 @@ ForEachMacros: - 'rbtree_postorder_for_each_entry_safe' - 'rdma_for_each_block' - 'rdma_for_each_port' + - 'rdma_umem_for_each_dma_block' - 'resource_list_for_each_entry' - 'resource_list_for_each_entry_safe' - 'rhl_for_each_entry_rcu' diff --git a/Documentation/ABI/stable/sysfs-class-infiniband b/Documentation/ABI/stable/sysfs-class-infiniband index 96dfe1926b76..87b11f91b425 100644 --- a/Documentation/ABI/stable/sysfs-class-infiniband +++ b/Documentation/ABI/stable/sysfs-class-infiniband @@ -258,23 +258,6 @@ Description: userspace ABI compatibility of umad & issm devices. -What: /sys/class/infiniband_cm/ucmN/ibdev -Date: Oct, 2005 -KernelVersion: v2.6.14 -Contact: linux-rdma@vger.kernel.org -Description: - (RO) Display Infiniband (IB) device name - - -What: /sys/class/infiniband_cm/abi_version -Date: Oct, 2005 -KernelVersion: v2.6.14 -Contact: linux-rdma@vger.kernel.org -Description: - (RO) Value is incremented if any changes are made that break - userspace ABI compatibility of ucm devices. - - What: /sys/class/infiniband_verbs/uverbsN/ibdev What: /sys/class/infiniband_verbs/uverbsN/abi_version Date: Sept, 2005 diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml index 274bbe6a365e..b29050fd7470 100644 --- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml +++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml @@ -55,6 +55,37 @@ patternProperties: $ref: /schemas/types.yaml#/definitions/string enum: [none, soft, hw, hw_syndrome, hw_oob_first, on-die] + nand-ecc-engine: + allOf: + - $ref: /schemas/types.yaml#/definitions/phandle + description: | + A phandle on the hardware ECC engine if any. There are + basically three possibilities: + 1/ The ECC engine is part of the NAND controller, in this + case the phandle should reference the parent node. + 2/ The ECC engine is part of the NAND part (on-die), in this + case the phandle should reference the node itself. + 3/ The ECC engine is external, in this case the phandle should + reference the specific ECC engine node. + + nand-use-soft-ecc-engine: + type: boolean + description: Use a software ECC engine. + + nand-no-ecc-engine: + type: boolean + description: Do not use any ECC correction. + + nand-ecc-placement: + allOf: + - $ref: /schemas/types.yaml#/definitions/string + - enum: [ oob, interleaved ] + description: + Location of the ECC bytes. This location is unknown by default + but can be explicitly set to "oob", if all ECC bytes are + known to be stored in the OOB area, or "interleaved" if ECC + bytes will be interleaved with regular data in the main area. + nand-ecc-algo: description: Desired ECC algorithm. diff --git a/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml b/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml index 44ba6765697d..31edd051295a 100644 --- a/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml +++ b/Documentation/devicetree/bindings/thermal/allwinner,sun8i-a83t-ths.yaml @@ -17,6 +17,7 @@ properties: - allwinner,sun8i-h3-ths - allwinner,sun8i-r40-ths - allwinner,sun50i-a64-ths + - allwinner,sun50i-a100-ths - allwinner,sun50i-h5-ths - allwinner,sun50i-h6-ths @@ -61,7 +62,9 @@ allOf: properties: compatible: contains: - const: allwinner,sun50i-h6-ths + enum: + - allwinner,sun50i-a100-ths + - allwinner,sun50i-h6-ths then: properties: @@ -103,6 +106,7 @@ allOf: - const: allwinner,sun8i-h3-ths - const: allwinner,sun8i-r40-ths - const: allwinner,sun50i-a64-ths + - const: allwinner,sun50i-a100-ths - const: allwinner,sun50i-h5-ths - const: allwinner,sun50i-h6-ths diff --git a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml index b1a55ae497de..f386f2a7c06c 100644 --- a/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/rcar-gen3-thermal.yaml @@ -20,6 +20,7 @@ properties: enum: - renesas,r8a774a1-thermal # RZ/G2M - renesas,r8a774b1-thermal # RZ/G2N + - renesas,r8a774e1-thermal # RZ/G2H - renesas,r8a7795-thermal # R-Car H3 - renesas,r8a7796-thermal # R-Car M3-W - renesas,r8a77961-thermal # R-Car M3-W+ diff --git a/MAINTAINERS b/MAINTAINERS index f71621ff059e..79024af61202 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4256,7 +4256,6 @@ F: drivers/net/ethernet/cisco/enic/ CISCO VIC LOW LATENCY NIC DRIVER M: Christian Benvenuti M: Nelson Escobar -M: Parvi Kaustubhi S: Supported F: drivers/infiniband/hw/usnic/ @@ -7793,8 +7792,8 @@ F: include/linux/cciss*.h F: include/uapi/linux/cciss*.h HFI1 DRIVER -M: Mike Marciniszyn -M: Dennis Dalessandro +M: Mike Marciniszyn +M: Dennis Dalessandro L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/hfi1 @@ -13012,8 +13011,8 @@ S: Maintained F: drivers/char/hw_random/optee-rng.c OPA-VNIC DRIVER -M: Dennis Dalessandro -M: Niranjana Vishwanathapura +M: Dennis Dalessandro +M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/ulp/opa_vnic @@ -14314,8 +14313,8 @@ F: drivers/firmware/qemu_fw_cfg.c F: include/uapi/linux/qemu_fw_cfg.h QIB DRIVER -M: Dennis Dalessandro -M: Mike Marciniszyn +M: Dennis Dalessandro +M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/qib/ @@ -14740,8 +14739,8 @@ S: Maintained F: drivers/net/ethernet/rdc/r6040.c RDMAVT - RDMA verbs software -M: Dennis Dalessandro -M: Mike Marciniszyn +M: Dennis Dalessandro +M: Mike Marciniszyn L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/sw/rdmavt diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c index 1076886938b6..a20ba12d876c 100644 --- a/arch/arm/mach-davinci/board-da830-evm.c +++ b/arch/arm/mach-davinci/board-da830-evm.c @@ -306,7 +306,7 @@ static struct davinci_nand_pdata da830_evm_nand_pdata = { .core_chipsel = 1, .parts = da830_evm_nand_partitions, .nr_parts = ARRAY_SIZE(da830_evm_nand_partitions), - .ecc_mode = NAND_ECC_HW, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, .ecc_bits = 4, .bbt_options = NAND_BBT_USE_FLASH, .bbt_td = &da830_evm_nand_bbt_main_descr, diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 6751292e5f8f..428012687a80 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -239,7 +239,7 @@ static struct davinci_nand_pdata da850_evm_nandflash_data = { .core_chipsel = 1, .parts = da850_evm_nandflash_partition, .nr_parts = ARRAY_SIZE(da850_evm_nandflash_partition), - .ecc_mode = NAND_ECC_HW, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, .ecc_bits = 4, .bbt_options = NAND_BBT_USE_FLASH, .timing = &da850_evm_nandflash_timing, diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index 5113273fda69..3c5a9e3c128a 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -82,7 +82,7 @@ static struct davinci_nand_pdata davinci_nand_data = { .mask_chipsel = BIT(14), .parts = davinci_nand_partitions, .nr_parts = ARRAY_SIZE(davinci_nand_partitions), - .ecc_mode = NAND_ECC_HW, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, .bbt_options = NAND_BBT_USE_FLASH, .ecc_bits = 4, }; diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c index b9e9950dd300..e475b2113e70 100644 --- a/arch/arm/mach-davinci/board-dm355-leopard.c +++ b/arch/arm/mach-davinci/board-dm355-leopard.c @@ -76,7 +76,8 @@ static struct davinci_nand_pdata davinci_nand_data = { .mask_chipsel = BIT(14), .parts = davinci_nand_partitions, .nr_parts = ARRAY_SIZE(davinci_nand_partitions), - .ecc_mode = NAND_ECC_HW_SYNDROME, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, + .ecc_placement = NAND_ECC_PLACEMENT_INTERLEAVED, .ecc_bits = 4, .bbt_options = NAND_BBT_USE_FLASH, }; diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c index 2328b15ac067..bdf31eb77620 100644 --- a/arch/arm/mach-davinci/board-dm365-evm.c +++ b/arch/arm/mach-davinci/board-dm365-evm.c @@ -146,7 +146,7 @@ static struct davinci_nand_pdata davinci_nand_data = { .mask_chipsel = BIT(14), .parts = davinci_nand_partitions, .nr_parts = ARRAY_SIZE(davinci_nand_partitions), - .ecc_mode = NAND_ECC_HW, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, .bbt_options = NAND_BBT_USE_FLASH, .ecc_bits = 4, }; diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index a5d3708fedf6..bcb3c4070945 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -162,7 +162,7 @@ static struct davinci_nand_pdata davinci_evm_nandflash_data = { .core_chipsel = 0, .parts = davinci_evm_nandflash_partition, .nr_parts = ARRAY_SIZE(davinci_evm_nandflash_partition), - .ecc_mode = NAND_ECC_HW, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, .ecc_bits = 1, .bbt_options = NAND_BBT_USE_FLASH, .timing = &davinci_evm_nandflash_timing, diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c index dd7d60f4139a..8319a6067a68 100644 --- a/arch/arm/mach-davinci/board-dm646x-evm.c +++ b/arch/arm/mach-davinci/board-dm646x-evm.c @@ -91,7 +91,7 @@ static struct davinci_nand_pdata davinci_nand_data = { .mask_ale = 0x40000, .parts = davinci_nand_partitions, .nr_parts = ARRAY_SIZE(davinci_nand_partitions), - .ecc_mode = NAND_ECC_HW, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, .ecc_bits = 1, .options = 0, }; diff --git a/arch/arm/mach-davinci/board-mityomapl138.c b/arch/arm/mach-davinci/board-mityomapl138.c index 3382b93d9a2a..5205008c8061 100644 --- a/arch/arm/mach-davinci/board-mityomapl138.c +++ b/arch/arm/mach-davinci/board-mityomapl138.c @@ -432,7 +432,7 @@ static struct davinci_nand_pdata mityomapl138_nandflash_data = { .core_chipsel = 1, .parts = mityomapl138_nandflash_partition, .nr_parts = ARRAY_SIZE(mityomapl138_nandflash_partition), - .ecc_mode = NAND_ECC_HW, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, .bbt_options = NAND_BBT_USE_FLASH, .options = NAND_BUSWIDTH_16, .ecc_bits = 1, /* 4 bit mode is not supported with 16 bit NAND */ diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c index 6cf46bbc7e1d..b4843f68bb57 100644 --- a/arch/arm/mach-davinci/board-neuros-osd2.c +++ b/arch/arm/mach-davinci/board-neuros-osd2.c @@ -90,7 +90,7 @@ static struct davinci_nand_pdata davinci_ntosd2_nandflash_data = { .core_chipsel = 0, .parts = davinci_ntosd2_nandflash_partition, .nr_parts = ARRAY_SIZE(davinci_ntosd2_nandflash_partition), - .ecc_mode = NAND_ECC_HW, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, .ecc_bits = 1, .bbt_options = NAND_BBT_USE_FLASH, }; diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index 6c79039002c9..88df8011a4e6 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -206,7 +206,7 @@ static struct davinci_nand_pdata omapl138_hawk_nandflash_data = { .core_chipsel = 1, .parts = omapl138_hawk_nandflash_partition, .nr_parts = ARRAY_SIZE(omapl138_hawk_nandflash_partition), - .ecc_mode = NAND_ECC_HW, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, .ecc_bits = 4, .bbt_options = NAND_BBT_USE_FLASH, .options = NAND_BUSWIDTH_16, diff --git a/arch/arm/mach-s3c24xx/common-smdk.c b/arch/arm/mach-s3c24xx/common-smdk.c index 75064dfaceb1..121646ad1bb1 100644 --- a/arch/arm/mach-s3c24xx/common-smdk.c +++ b/arch/arm/mach-s3c24xx/common-smdk.c @@ -191,7 +191,7 @@ static struct s3c2410_platform_nand smdk_nand_info = { .twrph1 = 20, .nr_sets = ARRAY_SIZE(smdk_nand_sets), .sets = smdk_nand_sets, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; /* devices we initialise */ diff --git a/arch/arm/mach-s3c24xx/mach-anubis.c b/arch/arm/mach-s3c24xx/mach-anubis.c index 072966dcad78..28326241e360 100644 --- a/arch/arm/mach-s3c24xx/mach-anubis.c +++ b/arch/arm/mach-s3c24xx/mach-anubis.c @@ -218,7 +218,7 @@ static struct s3c2410_platform_nand __initdata anubis_nand_info = { .nr_sets = ARRAY_SIZE(anubis_nand_sets), .sets = anubis_nand_sets, .select_chip = anubis_nand_select, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; /* IDE channels */ diff --git a/arch/arm/mach-s3c24xx/mach-at2440evb.c b/arch/arm/mach-s3c24xx/mach-at2440evb.c index 58c5ef3cf1d7..04dedebdb57c 100644 --- a/arch/arm/mach-s3c24xx/mach-at2440evb.c +++ b/arch/arm/mach-s3c24xx/mach-at2440evb.c @@ -109,7 +109,7 @@ static struct s3c2410_platform_nand __initdata at2440evb_nand_info = { .twrph1 = 40, .nr_sets = ARRAY_SIZE(at2440evb_nand_sets), .sets = at2440evb_nand_sets, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; /* DM9000AEP 10/100 ethernet controller */ diff --git a/arch/arm/mach-s3c24xx/mach-bast.c b/arch/arm/mach-s3c24xx/mach-bast.c index a7c3955ae8f6..6465eab0ab3a 100644 --- a/arch/arm/mach-s3c24xx/mach-bast.c +++ b/arch/arm/mach-s3c24xx/mach-bast.c @@ -294,7 +294,7 @@ static struct s3c2410_platform_nand __initdata bast_nand_info = { .nr_sets = ARRAY_SIZE(bast_nand_sets), .sets = bast_nand_sets, .select_chip = bast_nand_select, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; /* DM9000 */ diff --git a/arch/arm/mach-s3c24xx/mach-gta02.c b/arch/arm/mach-s3c24xx/mach-gta02.c index 526fd0933289..732748170751 100644 --- a/arch/arm/mach-s3c24xx/mach-gta02.c +++ b/arch/arm/mach-s3c24xx/mach-gta02.c @@ -417,7 +417,7 @@ static struct s3c2410_platform_nand __initdata gta02_nand_info = { .twrph1 = 15, .nr_sets = ARRAY_SIZE(gta02_nand_sets), .sets = gta02_nand_sets, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c index 885e8f12e4b9..8233dcff19e7 100644 --- a/arch/arm/mach-s3c24xx/mach-jive.c +++ b/arch/arm/mach-s3c24xx/mach-jive.c @@ -228,7 +228,7 @@ static struct s3c2410_platform_nand __initdata jive_nand_info = { .twrph1 = 40, .sets = jive_nand_sets, .nr_sets = ARRAY_SIZE(jive_nand_sets), - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; static int __init jive_mtdset(char *options) diff --git a/arch/arm/mach-s3c24xx/mach-mini2440.c b/arch/arm/mach-s3c24xx/mach-mini2440.c index 235749448311..057dcbaf1b22 100644 --- a/arch/arm/mach-s3c24xx/mach-mini2440.c +++ b/arch/arm/mach-s3c24xx/mach-mini2440.c @@ -296,7 +296,7 @@ static struct s3c2410_platform_nand mini2440_nand_info __initdata = { .nr_sets = ARRAY_SIZE(mini2440_nand_sets), .sets = mini2440_nand_sets, .ignore_unset_ecc = 1, - .ecc_mode = NAND_ECC_HW, + .engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST, }; /* DM9000AEP 10/100 ethernet controller */ diff --git a/arch/arm/mach-s3c24xx/mach-osiris.c b/arch/arm/mach-s3c24xx/mach-osiris.c index ee3630cb236a..157448827f61 100644 --- a/arch/arm/mach-s3c24xx/mach-osiris.c +++ b/arch/arm/mach-s3c24xx/mach-osiris.c @@ -234,7 +234,7 @@ static struct s3c2410_platform_nand __initdata osiris_nand_info = { .nr_sets = ARRAY_SIZE(osiris_nand_sets), .sets = osiris_nand_sets, .select_chip = osiris_nand_select, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; /* PCMCIA control and configuration */ diff --git a/arch/arm/mach-s3c24xx/mach-qt2410.c b/arch/arm/mach-s3c24xx/mach-qt2410.c index ff9e3197309b..f3131d94e90b 100644 --- a/arch/arm/mach-s3c24xx/mach-qt2410.c +++ b/arch/arm/mach-s3c24xx/mach-qt2410.c @@ -287,7 +287,7 @@ static struct s3c2410_platform_nand __initdata qt2410_nand_info = { .twrph1 = 20, .nr_sets = ARRAY_SIZE(qt2410_nand_sets), .sets = qt2410_nand_sets, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; /* UDC */ diff --git a/arch/arm/mach-s3c24xx/mach-rx1950.c b/arch/arm/mach-s3c24xx/mach-rx1950.c index e9806bf654b4..3645b9c838d9 100644 --- a/arch/arm/mach-s3c24xx/mach-rx1950.c +++ b/arch/arm/mach-s3c24xx/mach-rx1950.c @@ -620,7 +620,7 @@ static struct s3c2410_platform_nand rx1950_nand_info = { .twrph1 = 15, .nr_sets = ARRAY_SIZE(rx1950_nand_sets), .sets = rx1950_nand_sets, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; static struct s3c2410_udc_mach_info rx1950_udc_cfg __initdata = { diff --git a/arch/arm/mach-s3c24xx/mach-rx3715.c b/arch/arm/mach-s3c24xx/mach-rx3715.c index 995f1ff34a1b..017010d67e01 100644 --- a/arch/arm/mach-s3c24xx/mach-rx3715.c +++ b/arch/arm/mach-s3c24xx/mach-rx3715.c @@ -158,7 +158,7 @@ static struct s3c2410_platform_nand __initdata rx3715_nand_info = { .twrph1 = 15, .nr_sets = ARRAY_SIZE(rx3715_nand_sets), .sets = rx3715_nand_sets, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; static struct platform_device *rx3715_devices[] __initdata = { diff --git a/arch/arm/mach-s3c24xx/mach-vstms.c b/arch/arm/mach-s3c24xx/mach-vstms.c index d76b28b65e65..c5fa215a527e 100644 --- a/arch/arm/mach-s3c24xx/mach-vstms.c +++ b/arch/arm/mach-s3c24xx/mach-vstms.c @@ -112,7 +112,7 @@ static struct s3c2410_platform_nand __initdata vstms_nand_info = { .twrph1 = 20, .nr_sets = ARRAY_SIZE(vstms_nand_sets), .sets = vstms_nand_sets, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; static struct platform_device *vstms_devices[] __initdata = { diff --git a/arch/arm/mach-s3c64xx/mach-hmt.c b/arch/arm/mach-s3c64xx/mach-hmt.c index e7080215c624..0d9acaf91701 100644 --- a/arch/arm/mach-s3c64xx/mach-hmt.c +++ b/arch/arm/mach-s3c64xx/mach-hmt.c @@ -199,7 +199,7 @@ static struct s3c2410_platform_nand hmt_nand_info = { .twrph1 = 40, .nr_sets = ARRAY_SIZE(hmt_nand_sets), .sets = hmt_nand_sets, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; static struct gpio_led hmt_leds[] = { diff --git a/arch/arm/mach-s3c64xx/mach-mini6410.c b/arch/arm/mach-s3c64xx/mach-mini6410.c index 0dd36ae49e6a..6fbb57878746 100644 --- a/arch/arm/mach-s3c64xx/mach-mini6410.c +++ b/arch/arm/mach-s3c64xx/mach-mini6410.c @@ -136,7 +136,7 @@ static struct s3c2410_platform_nand mini6410_nand_info = { .twrph1 = 40, .nr_sets = ARRAY_SIZE(mini6410_nand_sets), .sets = mini6410_nand_sets, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; static struct s3c_fb_pd_win mini6410_lcd_type0_fb_win = { diff --git a/arch/arm/mach-s3c64xx/mach-real6410.c b/arch/arm/mach-s3c64xx/mach-real6410.c index 0ff88b6859c4..1e98e530a6aa 100644 --- a/arch/arm/mach-s3c64xx/mach-real6410.c +++ b/arch/arm/mach-s3c64xx/mach-real6410.c @@ -188,7 +188,7 @@ static struct s3c2410_platform_nand real6410_nand_info = { .twrph1 = 40, .nr_sets = ARRAY_SIZE(real6410_nand_sets), .sets = real6410_nand_sets, - .ecc_mode = NAND_ECC_SOFT, + .engine_type = NAND_ECC_ENGINE_TYPE_SOFT, }; static struct platform_device *real6410_devices[] __initdata = { diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 541dbe980d9c..701b1ac80330 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -829,30 +829,27 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = { struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, struct page **pages, unsigned int nr_pages) { - struct sg_table *sg = NULL; + struct sg_table *sg; + struct scatterlist *sge; size_t max_segment = 0; - int ret; sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL); - if (!sg) { - ret = -ENOMEM; - goto out; - } + if (!sg) + return ERR_PTR(-ENOMEM); if (dev) max_segment = dma_max_mapping_size(dev->dev); if (max_segment == 0 || max_segment > SCATTERLIST_MAX_SEGMENT) max_segment = SCATTERLIST_MAX_SEGMENT; - ret = __sg_alloc_table_from_pages(sg, pages, nr_pages, 0, + sge = __sg_alloc_table_from_pages(sg, pages, nr_pages, 0, nr_pages << PAGE_SHIFT, - max_segment, GFP_KERNEL); - if (ret) - goto out; - + max_segment, + NULL, 0, GFP_KERNEL); + if (IS_ERR(sge)) { + kfree(sg); + sg = ERR_CAST(sge); + } return sg; -out: - kfree(sg); - return ERR_PTR(ret); } EXPORT_SYMBOL(drm_prime_pages_to_sg); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 12b30075134a..f2eaed6aca3d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; unsigned int sg_page_sizes; + struct scatterlist *sg; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOMEM); alloc_table: - ret = __sg_alloc_table_from_pages(st, pvec, num_pages, - 0, num_pages << PAGE_SHIFT, - max_segment, - GFP_KERNEL); - if (ret) { + sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, + num_pages << PAGE_SHIFT, max_segment, + NULL, 0, GFP_KERNEL); + if (IS_ERR(sg)) { kfree(st); - return ERR_PTR(ret); + return ERR_CAST(sg); } ret = i915_gem_gtt_prepare_pages(obj, st); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index 7f0310441da1..73116ec70ba5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -432,6 +432,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) int ret = 0; static size_t sgl_size; static size_t sgt_size; + struct scatterlist *sg; if (vmw_tt->mapped) return 0; @@ -454,13 +455,15 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) if (unlikely(ret != 0)) return ret; - ret = __sg_alloc_table_from_pages - (&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0, - (unsigned long) vsgt->num_pages << PAGE_SHIFT, - dma_get_max_seg_size(dev_priv->dev->dev), - GFP_KERNEL); - if (unlikely(ret != 0)) + sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages, + vsgt->num_pages, 0, + (unsigned long) vsgt->num_pages << PAGE_SHIFT, + dma_get_max_seg_size(dev_priv->dev->dev), + NULL, 0, GFP_KERNEL); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); goto out_sg_alloc_fail; + } if (vsgt->num_pages > vmw_tt->sgt.orig_nents) { uint64_t over_alloc = diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 97f2e29265da..1c6b78ad5ade 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -1373,7 +1373,9 @@ static int i3c_master_reattach_i3c_dev(struct i3c_dev_desc *dev, enum i3c_addr_slot_status status; int ret; - if (dev->info.dyn_addr != old_dyn_addr) { + if (dev->info.dyn_addr != old_dyn_addr && + (!dev->boardinfo || + dev->info.dyn_addr != dev->boardinfo->init_dyn_addr)) { status = i3c_bus_get_addr_slot_status(&master->bus, dev->info.dyn_addr); if (status != I3C_ADDR_SLOT_FREE) @@ -1432,33 +1434,49 @@ static void i3c_master_detach_i2c_dev(struct i2c_dev_desc *dev) master->ops->detach_i2c_dev(dev); } -static void i3c_master_pre_assign_dyn_addr(struct i3c_dev_desc *dev) +static int i3c_master_early_i3c_dev_add(struct i3c_master_controller *master, + struct i3c_dev_boardinfo *boardinfo) { - struct i3c_master_controller *master = i3c_dev_get_master(dev); + struct i3c_device_info info = { + .static_addr = boardinfo->static_addr, + }; + struct i3c_dev_desc *i3cdev; int ret; - if (!dev->boardinfo || !dev->boardinfo->init_dyn_addr || - !dev->boardinfo->static_addr) - return; + i3cdev = i3c_master_alloc_i3c_dev(master, &info); + if (IS_ERR(i3cdev)) + return -ENOMEM; - ret = i3c_master_setdasa_locked(master, dev->info.static_addr, - dev->boardinfo->init_dyn_addr); + i3cdev->boardinfo = boardinfo; + + ret = i3c_master_attach_i3c_dev(master, i3cdev); if (ret) - return; + goto err_free_dev; - dev->info.dyn_addr = dev->boardinfo->init_dyn_addr; - ret = i3c_master_reattach_i3c_dev(dev, 0); + ret = i3c_master_setdasa_locked(master, i3cdev->info.static_addr, + i3cdev->boardinfo->init_dyn_addr); + if (ret) + goto err_detach_dev; + + i3cdev->info.dyn_addr = i3cdev->boardinfo->init_dyn_addr; + ret = i3c_master_reattach_i3c_dev(i3cdev, 0); if (ret) goto err_rstdaa; - ret = i3c_master_retrieve_dev_info(dev); + ret = i3c_master_retrieve_dev_info(i3cdev); if (ret) goto err_rstdaa; - return; + return 0; err_rstdaa: - i3c_master_rstdaa_locked(master, dev->boardinfo->init_dyn_addr); + i3c_master_rstdaa_locked(master, i3cdev->boardinfo->init_dyn_addr); +err_detach_dev: + i3c_master_detach_i3c_dev(i3cdev); +err_free_dev: + i3c_master_free_i3c_dev(i3cdev); + + return ret; } static void @@ -1625,8 +1643,8 @@ static void i3c_master_detach_free_devs(struct i3c_master_controller *master) * This function is following all initialisation steps described in the I3C * specification: * - * 1. Attach I2C and statically defined I3C devs to the master so that the - * master can fill its internal device table appropriately + * 1. Attach I2C devs to the master so that the master can fill its internal + * device table appropriately * * 2. Call &i3c_master_controller_ops->bus_init() method to initialize * the master controller. That's usually where the bus mode is selected @@ -1638,8 +1656,10 @@ static void i3c_master_detach_free_devs(struct i3c_master_controller *master) * * 4. Disable all slave events. * - * 5. Pre-assign dynamic addresses requested by the FW with SETDASA for I3C - * devices that have a static address + * 5. Reserve address slots for I3C devices with init_dyn_addr. And if devices + * also have static_addr, try to pre-assign dynamic addresses requested by + * the FW with SETDASA and attach corresponding statically defined I3C + * devices to the master. * * 6. Do a DAA (Dynamic Address Assignment) to assign dynamic addresses to all * remaining I3C devices @@ -1653,7 +1673,6 @@ static int i3c_master_bus_init(struct i3c_master_controller *master) enum i3c_addr_slot_status status; struct i2c_dev_boardinfo *i2cboardinfo; struct i3c_dev_boardinfo *i3cboardinfo; - struct i3c_dev_desc *i3cdev; struct i2c_dev_desc *i2cdev; int ret; @@ -1685,34 +1704,6 @@ static int i3c_master_bus_init(struct i3c_master_controller *master) goto err_detach_devs; } } - list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) { - struct i3c_device_info info = { - .static_addr = i3cboardinfo->static_addr, - }; - - if (i3cboardinfo->init_dyn_addr) { - status = i3c_bus_get_addr_slot_status(&master->bus, - i3cboardinfo->init_dyn_addr); - if (status != I3C_ADDR_SLOT_FREE) { - ret = -EBUSY; - goto err_detach_devs; - } - } - - i3cdev = i3c_master_alloc_i3c_dev(master, &info); - if (IS_ERR(i3cdev)) { - ret = PTR_ERR(i3cdev); - goto err_detach_devs; - } - - i3cdev->boardinfo = i3cboardinfo; - - ret = i3c_master_attach_i3c_dev(master, i3cdev); - if (ret) { - i3c_master_free_i3c_dev(i3cdev); - goto err_detach_devs; - } - } /* * Now execute the controller specific ->bus_init() routine, which @@ -1749,11 +1740,43 @@ static int i3c_master_bus_init(struct i3c_master_controller *master) goto err_bus_cleanup; /* - * Pre-assign dynamic address and retrieve device information if - * needed. + * Reserve init_dyn_addr first, and then try to pre-assign dynamic + * address and retrieve device information if needed. + * In case pre-assign dynamic address fails, setting dynamic address to + * the requested init_dyn_addr is retried after DAA is done in + * i3c_master_add_i3c_dev_locked(). */ - i3c_bus_for_each_i3cdev(&master->bus, i3cdev) - i3c_master_pre_assign_dyn_addr(i3cdev); + list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) { + + /* + * We don't reserve a dynamic address for devices that + * don't explicitly request one. + */ + if (!i3cboardinfo->init_dyn_addr) + continue; + + ret = i3c_bus_get_addr_slot_status(&master->bus, + i3cboardinfo->init_dyn_addr); + if (ret != I3C_ADDR_SLOT_FREE) { + ret = -EBUSY; + goto err_rstdaa; + } + + i3c_bus_set_addr_slot_status(&master->bus, + i3cboardinfo->init_dyn_addr, + I3C_ADDR_SLOT_I3C_DEV); + + /* + * Only try to create/attach devices that have a static + * address. Other devices will be created/attached when + * DAA happens, and the requested dynamic address will + * be set using SETNEWDA once those devices become + * addressable. + */ + + if (i3cboardinfo->static_addr) + i3c_master_early_i3c_dev_add(master, i3cboardinfo); + } ret = i3c_master_do_daa(master); if (ret) @@ -1782,6 +1805,21 @@ static void i3c_master_bus_cleanup(struct i3c_master_controller *master) i3c_master_detach_free_devs(master); } +static void i3c_master_attach_boardinfo(struct i3c_dev_desc *i3cdev) +{ + struct i3c_master_controller *master = i3cdev->common.master; + struct i3c_dev_boardinfo *i3cboardinfo; + + list_for_each_entry(i3cboardinfo, &master->boardinfo.i3c, node) { + if (i3cdev->info.pid != i3cboardinfo->pid) + continue; + + i3cdev->boardinfo = i3cboardinfo; + i3cdev->info.static_addr = i3cboardinfo->static_addr; + return; + } +} + static struct i3c_dev_desc * i3c_master_search_i3c_dev_duplicate(struct i3c_dev_desc *refdev) { @@ -1837,10 +1875,10 @@ int i3c_master_add_i3c_dev_locked(struct i3c_master_controller *master, if (ret) goto err_detach_dev; + i3c_master_attach_boardinfo(newdev); + olddev = i3c_master_search_i3c_dev_duplicate(newdev); if (olddev) { - newdev->boardinfo = olddev->boardinfo; - newdev->info.static_addr = olddev->info.static_addr; newdev->dev = olddev->dev; if (newdev->dev) newdev->dev->desc = newdev; diff --git a/drivers/i3c/master/i3c-master-cdns.c b/drivers/i3c/master/i3c-master-cdns.c index 3fee8bd7fe20..3f2226928fe0 100644 --- a/drivers/i3c/master/i3c-master-cdns.c +++ b/drivers/i3c/master/i3c-master-cdns.c @@ -1635,8 +1635,10 @@ static int cdns_i3c_master_probe(struct platform_device *pdev) master->ibi.slots = devm_kcalloc(&pdev->dev, master->ibi.num_slots, sizeof(*master->ibi.slots), GFP_KERNEL); - if (!master->ibi.slots) + if (!master->ibi.slots) { + ret = -ENOMEM; goto err_disable_sysclk; + } writel(IBIR_THR(1), master->regs + CMD_IBI_THR_CTRL); writel(MST_INT_IBIR_THR, master->regs + MST_IER); diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 91b023341b77..32a51432ec4f 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -48,6 +48,7 @@ config INFINIBAND_ON_DEMAND_PAGING depends on INFINIBAND_USER_MEM select MMU_NOTIFIER select INTERVAL_TREE + select HMM_MIRROR default y help On demand paging support for the InfiniBand subsystem. diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile index 24cb71a16a28..ccf2670ef45e 100644 --- a/drivers/infiniband/core/Makefile +++ b/drivers/infiniband/core/Makefile @@ -17,7 +17,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \ ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o -ib_cm-y := cm.o +ib_cm-y := cm.o cm_trace.o iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 3a98439bba83..0abce004a959 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -647,13 +647,12 @@ static void process_one_req(struct work_struct *_work) req->callback = NULL; spin_lock_bh(&lock); + /* + * Although the work will normally have been canceled by the workqueue, + * it can still be requeued as long as it is on the req_list. + */ + cancel_delayed_work(&req->work); if (!list_empty(&req->list)) { - /* - * Although the work will normally have been canceled by the - * workqueue, it can still be requeued as long as it is on the - * req_list. - */ - cancel_delayed_work(&req->work); list_del_init(&req->list); kfree(req); } diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 5a76611e684a..8017c40dd110 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -133,7 +133,11 @@ static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) } static const char * const gid_type_str[] = { + /* IB/RoCE v1 value is set for IB_GID_TYPE_IB and IB_GID_TYPE_ROCE for + * user space compatibility reasons. + */ [IB_GID_TYPE_IB] = "IB/RoCE v1", + [IB_GID_TYPE_ROCE] = "IB/RoCE v1", [IB_GID_TYPE_ROCE_UDP_ENCAP] = "RoCE v2", }; @@ -1220,7 +1224,7 @@ EXPORT_SYMBOL(ib_get_cached_port_state); const struct ib_gid_attr * rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index) { - const struct ib_gid_attr *attr = ERR_PTR(-EINVAL); + const struct ib_gid_attr *attr = ERR_PTR(-ENODATA); struct ib_gid_table *table; unsigned long flags; @@ -1243,6 +1247,67 @@ done: } EXPORT_SYMBOL(rdma_get_gid_attr); +/** + * rdma_query_gid_table - Reads GID table entries of all the ports of a device up to max_entries. + * @device: The device to query. + * @entries: Entries where GID entries are returned. + * @max_entries: Maximum number of entries that can be returned. + * Entries array must be allocated to hold max_entries number of entries. + * @num_entries: Updated to the number of entries that were successfully read. + * + * Returns number of entries on success or appropriate error code. + */ +ssize_t rdma_query_gid_table(struct ib_device *device, + struct ib_uverbs_gid_entry *entries, + size_t max_entries) +{ + const struct ib_gid_attr *gid_attr; + ssize_t num_entries = 0, ret; + struct ib_gid_table *table; + unsigned int port_num, i; + struct net_device *ndev; + unsigned long flags; + + rdma_for_each_port(device, port_num) { + if (!rdma_ib_or_roce(device, port_num)) + continue; + + table = rdma_gid_table(device, port_num); + read_lock_irqsave(&table->rwlock, flags); + for (i = 0; i < table->sz; i++) { + if (!is_gid_entry_valid(table->data_vec[i])) + continue; + if (num_entries >= max_entries) { + ret = -EINVAL; + goto err; + } + + gid_attr = &table->data_vec[i]->attr; + + memcpy(&entries->gid, &gid_attr->gid, + sizeof(gid_attr->gid)); + entries->gid_index = gid_attr->index; + entries->port_num = gid_attr->port_num; + entries->gid_type = gid_attr->gid_type; + ndev = rcu_dereference_protected( + gid_attr->ndev, + lockdep_is_held(&table->rwlock)); + if (ndev) + entries->netdev_ifindex = ndev->ifindex; + + num_entries++; + entries++; + } + read_unlock_irqrestore(&table->rwlock, flags); + } + + return num_entries; +err: + read_unlock_irqrestore(&table->rwlock, flags); + return ret; +} +EXPORT_SYMBOL(rdma_query_gid_table); + /** * rdma_put_gid_attr - Release reference to the GID attribute * @attr: Pointer to the GID attribute whose reference @@ -1299,7 +1364,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) struct ib_gid_table_entry *entry = container_of(attr, struct ib_gid_table_entry, attr); struct ib_device *device = entry->attr.device; - struct net_device *ndev = ERR_PTR(-ENODEV); + struct net_device *ndev = ERR_PTR(-EINVAL); u8 port_num = entry->attr.port_num; struct ib_gid_table *table; unsigned long flags; @@ -1311,8 +1376,7 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) valid = is_gid_entry_valid(table->data_vec[attr->index]); if (valid) { ndev = rcu_dereference(attr->ndev); - if (!ndev || - (ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0))) + if (!ndev) ndev = ERR_PTR(-ENODEV); } read_unlock_irqrestore(&table->rwlock, flags); diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index fbc28f1a8b92..5740d1ba3568 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -27,6 +27,7 @@ #include #include "cm_msgs.h" #include "core_priv.h" +#include "cm_trace.h" MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("InfiniBand CM"); @@ -201,7 +202,6 @@ static struct attribute *cm_counter_default_attrs[] = { struct cm_port { struct cm_device *cm_dev; struct ib_mad_agent *mad_agent; - struct kobject port_obj; u8 port_num; struct list_head cm_priv_prim_list; struct list_head cm_priv_altr_list; @@ -1563,6 +1563,7 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg)); cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg)); + trace_icm_send_req(&cm_id_priv->id); spin_lock_irqsave(&cm_id_priv->lock, flags); ret = ib_post_send_mad(cm_id_priv->msg, NULL); if (ret) { @@ -1610,6 +1611,9 @@ static int cm_issue_rej(struct cm_port *port, IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length); } + trace_icm_issue_rej( + IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg), + IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); @@ -1961,6 +1965,7 @@ static void cm_dup_req_handler(struct cm_work *work, } spin_unlock_irq(&cm_id_priv->lock); + trace_icm_send_dup_req(&cm_id_priv->id); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; @@ -2124,8 +2129,7 @@ static int cm_req_handler(struct cm_work *work) listen_cm_id_priv = cm_match_req(work, cm_id_priv); if (!listen_cm_id_priv) { - pr_debug("%s: local_id %d, no listen_cm_id_priv\n", __func__, - be32_to_cpu(cm_id_priv->id.local_id)); + trace_icm_no_listener_err(&cm_id_priv->id); cm_id_priv->id.state = IB_CM_IDLE; ret = -EINVAL; goto destroy; @@ -2274,8 +2278,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_REQ_RCVD && cm_id->state != IB_CM_MRA_REQ_SENT) { - pr_debug("%s: local_comm_id %d, cm_id->state: %d\n", __func__, - be32_to_cpu(cm_id_priv->id.local_id), cm_id->state); + trace_icm_send_rep_err(cm_id_priv->id.local_id, cm_id->state); ret = -EINVAL; goto out; } @@ -2289,6 +2292,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id, msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT; + trace_icm_send_rep(cm_id); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -2348,8 +2352,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id->state != IB_CM_REP_RCVD && cm_id->state != IB_CM_MRA_REP_SENT) { - pr_debug("%s: local_id %d, cm_id->state %d\n", __func__, - be32_to_cpu(cm_id->local_id), cm_id->state); + trace_icm_send_cm_rtu_err(cm_id); ret = -EINVAL; goto error; } @@ -2361,6 +2364,7 @@ int ib_send_cm_rtu(struct ib_cm_id *cm_id, cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv, private_data, private_data_len); + trace_icm_send_rtu(cm_id); ret = ib_post_send_mad(msg, NULL); if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -2442,6 +2446,7 @@ static void cm_dup_rep_handler(struct cm_work *work) goto unlock; spin_unlock_irq(&cm_id_priv->lock); + trace_icm_send_dup_rep(&cm_id_priv->id); ret = ib_post_send_mad(msg, NULL); if (ret) goto free; @@ -2465,7 +2470,7 @@ static int cm_rep_handler(struct cm_work *work) cpu_to_be32(IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)), 0); if (!cm_id_priv) { cm_dup_rep_handler(work); - pr_debug("%s: remote_comm_id %d, no cm_id_priv\n", __func__, + trace_icm_remote_no_priv_err( IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); return -EINVAL; } @@ -2479,11 +2484,10 @@ static int cm_rep_handler(struct cm_work *work) break; default: ret = -EINVAL; - pr_debug( - "%s: cm_id_priv->id.state: %d, local_comm_id %d, remote_comm_id %d\n", - __func__, cm_id_priv->id.state, + trace_icm_rep_unknown_err( IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg), - IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); + IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg), + cm_id_priv->id.state); spin_unlock_irq(&cm_id_priv->lock); goto error; } @@ -2500,7 +2504,7 @@ static int cm_rep_handler(struct cm_work *work) spin_unlock(&cm.lock); spin_unlock_irq(&cm_id_priv->lock); ret = -EINVAL; - pr_debug("%s: Failed to insert remote id %d\n", __func__, + trace_icm_insert_failed_err( IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); goto error; } @@ -2517,9 +2521,8 @@ static int cm_rep_handler(struct cm_work *work) IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP, NULL, 0); ret = -EINVAL; - pr_debug( - "%s: Stale connection. local_comm_id %d, remote_comm_id %d\n", - __func__, IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg), + trace_icm_staleconn_err( + IBA_GET(CM_REP_LOCAL_COMM_ID, rep_msg), IBA_GET(CM_REP_REMOTE_COMM_ID, rep_msg)); if (cur_cm_id_priv) { @@ -2646,9 +2649,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, return -EINVAL; if (cm_id_priv->id.state != IB_CM_ESTABLISHED) { - pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__, - be32_to_cpu(cm_id_priv->id.local_id), - cm_id_priv->id.state); + trace_icm_dreq_skipped(&cm_id_priv->id); return -EINVAL; } @@ -2667,6 +2668,7 @@ static int cm_send_dreq_locked(struct cm_id_private *cm_id_priv, msg->timeout_ms = cm_id_priv->timeout_ms; msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT; + trace_icm_send_dreq(&cm_id_priv->id); ret = ib_post_send_mad(msg, NULL); if (ret) { cm_enter_timewait(cm_id_priv); @@ -2722,10 +2724,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv, return -EINVAL; if (cm_id_priv->id.state != IB_CM_DREQ_RCVD) { - pr_debug( - "%s: local_id %d, cm_idcm_id->state(%d) != IB_CM_DREQ_RCVD\n", - __func__, be32_to_cpu(cm_id_priv->id.local_id), - cm_id_priv->id.state); + trace_icm_send_drep_err(&cm_id_priv->id); kfree(private_data); return -EINVAL; } @@ -2740,6 +2739,7 @@ static int cm_send_drep_locked(struct cm_id_private *cm_id_priv, cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv, private_data, private_data_len); + trace_icm_send_drep(&cm_id_priv->id); ret = ib_post_send_mad(msg, NULL); if (ret) { cm_free_msg(msg); @@ -2789,6 +2789,9 @@ static int cm_issue_drep(struct cm_port *port, IBA_SET(CM_DREP_LOCAL_COMM_ID, drep_msg, IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); + trace_icm_issue_drep( + IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg), + IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); ret = ib_post_send_mad(msg, NULL); if (ret) cm_free_msg(msg); @@ -2810,9 +2813,8 @@ static int cm_dreq_handler(struct cm_work *work) atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES]. counter[CM_DREQ_COUNTER]); cm_issue_drep(work->port, work->mad_recv_wc); - pr_debug( - "%s: no cm_id_priv, local_comm_id %d, remote_comm_id %d\n", - __func__, IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg), + trace_icm_no_priv_err( + IBA_GET(CM_DREQ_LOCAL_COMM_ID, dreq_msg), IBA_GET(CM_DREQ_REMOTE_COMM_ID, dreq_msg)); return -EINVAL; } @@ -2858,9 +2860,7 @@ static int cm_dreq_handler(struct cm_work *work) counter[CM_DREQ_COUNTER]); goto unlock; default: - pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n", - __func__, be32_to_cpu(cm_id_priv->id.local_id), - cm_id_priv->id.state); + trace_icm_dreq_unknown_err(&cm_id_priv->id); goto unlock; } cm_id_priv->id.state = IB_CM_DREQ_RCVD; @@ -2945,12 +2945,11 @@ static int cm_send_rej_locked(struct cm_id_private *cm_id_priv, state); break; default: - pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__, - be32_to_cpu(cm_id_priv->id.local_id), - cm_id_priv->id.state); + trace_icm_send_unknown_rej_err(&cm_id_priv->id); return -EINVAL; } + trace_icm_send_rej(&cm_id_priv->id, reason); ret = ib_post_send_mad(msg, NULL); if (ret) { cm_free_msg(msg); @@ -3060,9 +3059,7 @@ static int cm_rej_handler(struct cm_work *work) } fallthrough; default: - pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n", - __func__, be32_to_cpu(cm_id_priv->id.local_id), - cm_id_priv->id.state); + trace_icm_rej_unknown_err(&cm_id_priv->id); spin_unlock_irq(&cm_id_priv->lock); goto out; } @@ -3118,9 +3115,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, } fallthrough; default: - pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n", - __func__, be32_to_cpu(cm_id_priv->id.local_id), - cm_id_priv->id.state); + trace_icm_send_mra_unknown_err(&cm_id_priv->id); ret = -EINVAL; goto error1; } @@ -3133,6 +3128,7 @@ int ib_send_cm_mra(struct ib_cm_id *cm_id, cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv, msg_response, service_timeout, private_data, private_data_len); + trace_icm_send_mra(cm_id); ret = ib_post_send_mad(msg, NULL); if (ret) goto error2; @@ -3229,9 +3225,7 @@ static int cm_mra_handler(struct cm_work *work) counter[CM_MRA_COUNTER]); fallthrough; default: - pr_debug("%s local_id %d, cm_id_priv->id.state: %d\n", - __func__, be32_to_cpu(cm_id_priv->id.local_id), - cm_id_priv->id.state); + trace_icm_mra_unknown_err(&cm_id_priv->id); goto out; } @@ -3505,10 +3499,12 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT; spin_lock_irqsave(&cm_id_priv->lock, flags); - if (cm_id->state == IB_CM_IDLE) + if (cm_id->state == IB_CM_IDLE) { + trace_icm_send_sidr_req(&cm_id_priv->id); ret = ib_post_send_mad(msg, NULL); - else + } else { ret = -EINVAL; + } if (ret) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); @@ -3670,6 +3666,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv, param); + trace_icm_send_sidr_rep(&cm_id_priv->id); ret = ib_post_send_mad(msg, NULL); if (ret) { cm_free_msg(msg); @@ -3767,8 +3764,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg, if (msg != cm_id_priv->msg || state != cm_id_priv->id.state) goto discard; - pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n", - state, ib_wc_status_msg(wc_status)); + trace_icm_mad_send_err(state, wc_status); switch (state) { case IB_CM_REQ_SENT: case IB_CM_MRA_REQ_RCVD: @@ -3891,7 +3887,7 @@ static void cm_work_handler(struct work_struct *_work) ret = cm_timewait_handler(work); break; default: - pr_debug("cm_event.event: 0x%x\n", work->cm_event.event); + trace_icm_handler_err(work->cm_event.event); ret = -EINVAL; break; } @@ -3927,8 +3923,7 @@ static int cm_establish(struct ib_cm_id *cm_id) ret = -EISCONN; break; default: - pr_debug("%s: local_id %d, cm_id->state: %d\n", __func__, - be32_to_cpu(cm_id->local_id), cm_id->state); + trace_icm_establish_err(cm_id); ret = -EINVAL; break; } @@ -4125,9 +4120,7 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, ret = 0; break; default: - pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n", - __func__, be32_to_cpu(cm_id_priv->id.local_id), - cm_id_priv->id.state); + trace_icm_qp_init_err(&cm_id_priv->id); ret = -EINVAL; break; } @@ -4175,9 +4168,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, ret = 0; break; default: - pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n", - __func__, be32_to_cpu(cm_id_priv->id.local_id), - cm_id_priv->id.state); + trace_icm_qp_rtr_err(&cm_id_priv->id); ret = -EINVAL; break; } @@ -4237,9 +4228,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv, ret = 0; break; default: - pr_debug("%s: local_id %d, cm_id_priv->id.state: %d\n", - __func__, be32_to_cpu(cm_id_priv->id.local_id), - cm_id_priv->id.state); + trace_icm_qp_rts_err(&cm_id_priv->id); ret = -EINVAL; break; } @@ -4295,20 +4284,6 @@ static struct kobj_type cm_counter_obj_type = { .default_attrs = cm_counter_default_attrs }; -static char *cm_devnode(struct device *dev, umode_t *mode) -{ - if (mode) - *mode = 0666; - return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); -} - -struct class cm_class = { - .owner = THIS_MODULE, - .name = "infiniband_cm", - .devnode = cm_devnode, -}; -EXPORT_SYMBOL(cm_class); - static int cm_create_port_fs(struct cm_port *port) { int i, ret; @@ -4511,12 +4486,6 @@ static int __init ib_cm_init(void) get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); INIT_LIST_HEAD(&cm.timewait_list); - ret = class_register(&cm_class); - if (ret) { - ret = -ENOMEM; - goto error1; - } - cm.wq = alloc_workqueue("ib_cm", 0, 1); if (!cm.wq) { ret = -ENOMEM; @@ -4531,8 +4500,6 @@ static int __init ib_cm_init(void) error3: destroy_workqueue(cm.wq); error2: - class_unregister(&cm_class); -error1: return ret; } @@ -4553,7 +4520,6 @@ static void __exit ib_cm_cleanup(void) kfree(timewait_info); } - class_unregister(&cm_class); WARN_ON(!xa_empty(&cm.local_id_table)); } diff --git a/drivers/infiniband/core/cm_trace.c b/drivers/infiniband/core/cm_trace.c new file mode 100644 index 000000000000..8f3482f66338 --- /dev/null +++ b/drivers/infiniband/core/cm_trace.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Trace points for the IB Connection Manager. + * + * Author: Chuck Lever + * + * Copyright (c) 2020, Oracle and/or its affiliates. + */ + +#include +#include "cma_priv.h" + +#define CREATE_TRACE_POINTS + +#include "cm_trace.h" diff --git a/drivers/infiniband/core/cm_trace.h b/drivers/infiniband/core/cm_trace.h new file mode 100644 index 000000000000..e9d282679ef1 --- /dev/null +++ b/drivers/infiniband/core/cm_trace.h @@ -0,0 +1,414 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Trace point definitions for the RDMA Connect Manager. + * + * Author: Chuck Lever + * + * Copyright (c) 2020 Oracle and/or its affiliates. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ib_cma + +#if !defined(_TRACE_IB_CMA_H) || defined(TRACE_HEADER_MULTI_READ) + +#define _TRACE_IB_CMA_H + +#include +#include +#include + +/* + * enum ib_cm_state, from include/rdma/ib_cm.h + */ +#define IB_CM_STATE_LIST \ + ib_cm_state(IDLE) \ + ib_cm_state(LISTEN) \ + ib_cm_state(REQ_SENT) \ + ib_cm_state(REQ_RCVD) \ + ib_cm_state(MRA_REQ_SENT) \ + ib_cm_state(MRA_REQ_RCVD) \ + ib_cm_state(REP_SENT) \ + ib_cm_state(REP_RCVD) \ + ib_cm_state(MRA_REP_SENT) \ + ib_cm_state(MRA_REP_RCVD) \ + ib_cm_state(ESTABLISHED) \ + ib_cm_state(DREQ_SENT) \ + ib_cm_state(DREQ_RCVD) \ + ib_cm_state(TIMEWAIT) \ + ib_cm_state(SIDR_REQ_SENT) \ + ib_cm_state_end(SIDR_REQ_RCVD) + +#undef ib_cm_state +#undef ib_cm_state_end +#define ib_cm_state(x) TRACE_DEFINE_ENUM(IB_CM_##x); +#define ib_cm_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x); + +IB_CM_STATE_LIST + +#undef ib_cm_state +#undef ib_cm_state_end +#define ib_cm_state(x) { IB_CM_##x, #x }, +#define ib_cm_state_end(x) { IB_CM_##x, #x } + +#define show_ib_cm_state(x) \ + __print_symbolic(x, IB_CM_STATE_LIST) + +/* + * enum ib_cm_lap_state, from include/rdma/ib_cm.h + */ +#define IB_CM_LAP_STATE_LIST \ + ib_cm_lap_state(LAP_UNINIT) \ + ib_cm_lap_state(LAP_IDLE) \ + ib_cm_lap_state(LAP_SENT) \ + ib_cm_lap_state(LAP_RCVD) \ + ib_cm_lap_state(MRA_LAP_SENT) \ + ib_cm_lap_state_end(MRA_LAP_RCVD) + +#undef ib_cm_lap_state +#undef ib_cm_lap_state_end +#define ib_cm_lap_state(x) TRACE_DEFINE_ENUM(IB_CM_##x); +#define ib_cm_lap_state_end(x) TRACE_DEFINE_ENUM(IB_CM_##x); + +IB_CM_LAP_STATE_LIST + +#undef ib_cm_lap_state +#undef ib_cm_lap_state_end +#define ib_cm_lap_state(x) { IB_CM_##x, #x }, +#define ib_cm_lap_state_end(x) { IB_CM_##x, #x } + +#define show_ib_cm_lap_state(x) \ + __print_symbolic(x, IB_CM_LAP_STATE_LIST) + +/* + * enum ib_cm_rej_reason, from include/rdma/ib_cm.h + */ +#define IB_CM_REJ_REASON_LIST \ + ib_cm_rej_reason(REJ_NO_QP) \ + ib_cm_rej_reason(REJ_NO_EEC) \ + ib_cm_rej_reason(REJ_NO_RESOURCES) \ + ib_cm_rej_reason(REJ_TIMEOUT) \ + ib_cm_rej_reason(REJ_UNSUPPORTED) \ + ib_cm_rej_reason(REJ_INVALID_COMM_ID) \ + ib_cm_rej_reason(REJ_INVALID_COMM_INSTANCE) \ + ib_cm_rej_reason(REJ_INVALID_SERVICE_ID) \ + ib_cm_rej_reason(REJ_INVALID_TRANSPORT_TYPE) \ + ib_cm_rej_reason(REJ_STALE_CONN) \ + ib_cm_rej_reason(REJ_RDC_NOT_EXIST) \ + ib_cm_rej_reason(REJ_INVALID_GID) \ + ib_cm_rej_reason(REJ_INVALID_LID) \ + ib_cm_rej_reason(REJ_INVALID_SL) \ + ib_cm_rej_reason(REJ_INVALID_TRAFFIC_CLASS) \ + ib_cm_rej_reason(REJ_INVALID_HOP_LIMIT) \ + ib_cm_rej_reason(REJ_INVALID_PACKET_RATE) \ + ib_cm_rej_reason(REJ_INVALID_ALT_GID) \ + ib_cm_rej_reason(REJ_INVALID_ALT_LID) \ + ib_cm_rej_reason(REJ_INVALID_ALT_SL) \ + ib_cm_rej_reason(REJ_INVALID_ALT_TRAFFIC_CLASS) \ + ib_cm_rej_reason(REJ_INVALID_ALT_HOP_LIMIT) \ + ib_cm_rej_reason(REJ_INVALID_ALT_PACKET_RATE) \ + ib_cm_rej_reason(REJ_PORT_CM_REDIRECT) \ + ib_cm_rej_reason(REJ_PORT_REDIRECT) \ + ib_cm_rej_reason(REJ_INVALID_MTU) \ + ib_cm_rej_reason(REJ_INSUFFICIENT_RESP_RESOURCES) \ + ib_cm_rej_reason(REJ_CONSUMER_DEFINED) \ + ib_cm_rej_reason(REJ_INVALID_RNR_RETRY) \ + ib_cm_rej_reason(REJ_DUPLICATE_LOCAL_COMM_ID) \ + ib_cm_rej_reason(REJ_INVALID_CLASS_VERSION) \ + ib_cm_rej_reason(REJ_INVALID_FLOW_LABEL) \ + ib_cm_rej_reason(REJ_INVALID_ALT_FLOW_LABEL) \ + ib_cm_rej_reason_end(REJ_VENDOR_OPTION_NOT_SUPPORTED) + +#undef ib_cm_rej_reason +#undef ib_cm_rej_reason_end +#define ib_cm_rej_reason(x) TRACE_DEFINE_ENUM(IB_CM_##x); +#define ib_cm_rej_reason_end(x) TRACE_DEFINE_ENUM(IB_CM_##x); + +IB_CM_REJ_REASON_LIST + +#undef ib_cm_rej_reason +#undef ib_cm_rej_reason_end +#define ib_cm_rej_reason(x) { IB_CM_##x, #x }, +#define ib_cm_rej_reason_end(x) { IB_CM_##x, #x } + +#define show_ib_cm_rej_reason(x) \ + __print_symbolic(x, IB_CM_REJ_REASON_LIST) + +DECLARE_EVENT_CLASS(icm_id_class, + TP_PROTO( + const struct ib_cm_id *cm_id + ), + + TP_ARGS(cm_id), + + TP_STRUCT__entry( + __field(const void *, cm_id) /* for eBPF scripts */ + __field(unsigned int, local_id) + __field(unsigned int, remote_id) + __field(unsigned long, state) + __field(unsigned long, lap_state) + ), + + TP_fast_assign( + __entry->cm_id = cm_id; + __entry->local_id = be32_to_cpu(cm_id->local_id); + __entry->remote_id = be32_to_cpu(cm_id->remote_id); + __entry->state = cm_id->state; + __entry->lap_state = cm_id->lap_state; + ), + + TP_printk("local_id=%u remote_id=%u state=%s lap_state=%s", + __entry->local_id, __entry->remote_id, + show_ib_cm_state(__entry->state), + show_ib_cm_lap_state(__entry->lap_state) + ) +); + +#define DEFINE_CM_SEND_EVENT(name) \ + DEFINE_EVENT(icm_id_class, \ + icm_send_##name, \ + TP_PROTO( \ + const struct ib_cm_id *cm_id \ + ), \ + TP_ARGS(cm_id)) + +DEFINE_CM_SEND_EVENT(req); +DEFINE_CM_SEND_EVENT(rep); +DEFINE_CM_SEND_EVENT(dup_req); +DEFINE_CM_SEND_EVENT(dup_rep); +DEFINE_CM_SEND_EVENT(rtu); +DEFINE_CM_SEND_EVENT(mra); +DEFINE_CM_SEND_EVENT(sidr_req); +DEFINE_CM_SEND_EVENT(sidr_rep); +DEFINE_CM_SEND_EVENT(dreq); +DEFINE_CM_SEND_EVENT(drep); + +TRACE_EVENT(icm_send_rej, + TP_PROTO( + const struct ib_cm_id *cm_id, + enum ib_cm_rej_reason reason + ), + + TP_ARGS(cm_id, reason), + + TP_STRUCT__entry( + __field(const void *, cm_id) + __field(u32, local_id) + __field(u32, remote_id) + __field(unsigned long, state) + __field(unsigned long, reason) + ), + + TP_fast_assign( + __entry->cm_id = cm_id; + __entry->local_id = be32_to_cpu(cm_id->local_id); + __entry->remote_id = be32_to_cpu(cm_id->remote_id); + __entry->state = cm_id->state; + __entry->reason = reason; + ), + + TP_printk("local_id=%u remote_id=%u state=%s reason=%s", + __entry->local_id, __entry->remote_id, + show_ib_cm_state(__entry->state), + show_ib_cm_rej_reason(__entry->reason) + ) +); + +#define DEFINE_CM_ERR_EVENT(name) \ + DEFINE_EVENT(icm_id_class, \ + icm_##name##_err, \ + TP_PROTO( \ + const struct ib_cm_id *cm_id \ + ), \ + TP_ARGS(cm_id)) + +DEFINE_CM_ERR_EVENT(send_cm_rtu); +DEFINE_CM_ERR_EVENT(establish); +DEFINE_CM_ERR_EVENT(no_listener); +DEFINE_CM_ERR_EVENT(send_drep); +DEFINE_CM_ERR_EVENT(dreq_unknown); +DEFINE_CM_ERR_EVENT(send_unknown_rej); +DEFINE_CM_ERR_EVENT(rej_unknown); +DEFINE_CM_ERR_EVENT(send_mra_unknown); +DEFINE_CM_ERR_EVENT(mra_unknown); +DEFINE_CM_ERR_EVENT(qp_init); +DEFINE_CM_ERR_EVENT(qp_rtr); +DEFINE_CM_ERR_EVENT(qp_rts); + +DEFINE_EVENT(icm_id_class, \ + icm_dreq_skipped, \ + TP_PROTO( \ + const struct ib_cm_id *cm_id \ + ), \ + TP_ARGS(cm_id) \ +); + +DECLARE_EVENT_CLASS(icm_local_class, + TP_PROTO( + unsigned int local_id, + unsigned int remote_id + ), + + TP_ARGS(local_id, remote_id), + + TP_STRUCT__entry( + __field(unsigned int, local_id) + __field(unsigned int, remote_id) + ), + + TP_fast_assign( + __entry->local_id = local_id; + __entry->remote_id = remote_id; + ), + + TP_printk("local_id=%u remote_id=%u", + __entry->local_id, __entry->remote_id + ) +); + +#define DEFINE_CM_LOCAL_EVENT(name) \ + DEFINE_EVENT(icm_local_class, \ + icm_##name, \ + TP_PROTO( \ + unsigned int local_id, \ + unsigned int remote_id \ + ), \ + TP_ARGS(local_id, remote_id)) + +DEFINE_CM_LOCAL_EVENT(issue_rej); +DEFINE_CM_LOCAL_EVENT(issue_drep); +DEFINE_CM_LOCAL_EVENT(staleconn_err); +DEFINE_CM_LOCAL_EVENT(no_priv_err); + +DECLARE_EVENT_CLASS(icm_remote_class, + TP_PROTO( + u32 remote_id + ), + + TP_ARGS(remote_id), + + TP_STRUCT__entry( + __field(u32, remote_id) + ), + + TP_fast_assign( + __entry->remote_id = remote_id; + ), + + TP_printk("remote_id=%u", + __entry->remote_id + ) +); + +#define DEFINE_CM_REMOTE_EVENT(name) \ + DEFINE_EVENT(icm_remote_class, \ + icm_##name, \ + TP_PROTO( \ + u32 remote_id \ + ), \ + TP_ARGS(remote_id)) + +DEFINE_CM_REMOTE_EVENT(remote_no_priv_err); +DEFINE_CM_REMOTE_EVENT(insert_failed_err); + +TRACE_EVENT(icm_send_rep_err, + TP_PROTO( + __be32 local_id, + enum ib_cm_state state + ), + + TP_ARGS(local_id, state), + + TP_STRUCT__entry( + __field(unsigned int, local_id) + __field(unsigned long, state) + ), + + TP_fast_assign( + __entry->local_id = be32_to_cpu(local_id); + __entry->state = state; + ), + + TP_printk("local_id=%u state=%s", + __entry->local_id, show_ib_cm_state(__entry->state) + ) +); + +TRACE_EVENT(icm_rep_unknown_err, + TP_PROTO( + unsigned int local_id, + unsigned int remote_id, + enum ib_cm_state state + ), + + TP_ARGS(local_id, remote_id, state), + + TP_STRUCT__entry( + __field(unsigned int, local_id) + __field(unsigned int, remote_id) + __field(unsigned long, state) + ), + + TP_fast_assign( + __entry->local_id = local_id; + __entry->remote_id = remote_id; + __entry->state = state; + ), + + TP_printk("local_id=%u remote_id=%u state=%s", + __entry->local_id, __entry->remote_id, + show_ib_cm_state(__entry->state) + ) +); + +TRACE_EVENT(icm_handler_err, + TP_PROTO( + enum ib_cm_event_type event + ), + + TP_ARGS(event), + + TP_STRUCT__entry( + __field(unsigned long, event) + ), + + TP_fast_assign( + __entry->event = event; + ), + + TP_printk("unhandled event=%s", + rdma_show_ib_cm_event(__entry->event) + ) +); + +TRACE_EVENT(icm_mad_send_err, + TP_PROTO( + enum ib_cm_state state, + enum ib_wc_status wc_status + ), + + TP_ARGS(state, wc_status), + + TP_STRUCT__entry( + __field(unsigned long, state) + __field(unsigned long, wc_status) + ), + + TP_fast_assign( + __entry->state = state; + __entry->wc_status = wc_status; + ), + + TP_printk("state=%s completion status=%s", + show_ib_cm_state(__entry->state), + rdma_show_wc_status(__entry->wc_status) + ) +); + +#endif /* _TRACE_IB_CMA_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../drivers/infiniband/core +#define TRACE_INCLUDE_FILE cm_trace + +#include diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 5888311b2119..7c2ab1f2fbea 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -68,6 +68,9 @@ static const char * const cma_events[] = { [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", }; +static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, + union ib_gid *mgid); + const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; @@ -301,6 +304,10 @@ int cma_set_default_gid_type(struct cma_device *cma_dev, if (!rdma_is_port_valid(cma_dev->device, port)) return -EINVAL; + if (default_gid_type == IB_GID_TYPE_IB && + rdma_protocol_roce_eth_encap(cma_dev->device, port)) + default_gid_type = IB_GID_TYPE_ROCE; + supported_gids = roce_gid_type_mask_support(cma_dev->device, port); if (!(supported_gids & 1 << default_gid_type)) @@ -345,13 +352,10 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) struct cma_multicast { struct rdma_id_private *id_priv; - union { - struct ib_sa_multicast *ib; - } multicast; + struct ib_sa_multicast *sa_mc; struct list_head list; void *context; struct sockaddr_storage addr; - struct kref mcref; u8 join_state; }; @@ -363,18 +367,6 @@ struct cma_work { struct rdma_cm_event event; }; -struct cma_ndev_work { - struct work_struct work; - struct rdma_id_private *id; - struct rdma_cm_event event; -}; - -struct iboe_mcast_work { - struct work_struct work; - struct rdma_id_private *id; - struct cma_multicast *mc; -}; - union cma_ip_addr { struct in6_addr ip6; struct { @@ -404,23 +396,21 @@ struct cma_req_info { u16 pkey; }; -static int cma_comp(struct rdma_id_private *id_priv, enum rdma_cm_state comp) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&id_priv->lock, flags); - ret = (id_priv->state == comp); - spin_unlock_irqrestore(&id_priv->lock, flags); - return ret; -} - static int cma_comp_exch(struct rdma_id_private *id_priv, enum rdma_cm_state comp, enum rdma_cm_state exch) { unsigned long flags; int ret; + /* + * The FSM uses a funny double locking where state is protected by both + * the handler_mutex and the spinlock. State is not allowed to change + * away from a handler_mutex protected value without also holding + * handler_mutex. + */ + if (comp == RDMA_CM_CONNECT) + lockdep_assert_held(&id_priv->handler_mutex); + spin_lock_irqsave(&id_priv->lock, flags); if ((ret = (id_priv->state == comp))) id_priv->state = exch; @@ -467,10 +457,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv, id_priv->id.route.addr.dev_addr.transport = rdma_node_get_transport(cma_dev->device->node_type); list_add_tail(&id_priv->list, &cma_dev->id_list); - if (id_priv->res.kern_name) - rdma_restrack_kadd(&id_priv->res); - else - rdma_restrack_uadd(&id_priv->res); + rdma_restrack_add(&id_priv->res); + trace_cm_id_attach(id_priv, cma_dev->device); } @@ -483,14 +471,6 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv, rdma_start_port(cma_dev->device)]; } -static inline void release_mc(struct kref *kref) -{ - struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref); - - kfree(mc->multicast.ib); - kfree(mc); -} - static void cma_release_dev(struct rdma_id_private *id_priv) { mutex_lock(&lock); @@ -844,10 +824,10 @@ static void cma_id_put(struct rdma_id_private *id_priv) complete(&id_priv->comp); } -struct rdma_cm_id *__rdma_create_id(struct net *net, - rdma_cm_event_handler event_handler, - void *context, enum rdma_ucm_port_space ps, - enum ib_qp_type qp_type, const char *caller) +static struct rdma_id_private * +__rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, + void *context, enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type, const struct rdma_id_private *parent) { struct rdma_id_private *id_priv; @@ -855,8 +835,6 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, if (!id_priv) return ERR_PTR(-ENOMEM); - rdma_restrack_set_task(&id_priv->res, caller); - id_priv->res.type = RDMA_RESTRACK_CM_ID; id_priv->state = RDMA_CM_IDLE; id_priv->id.context = context; id_priv->id.event_handler = event_handler; @@ -876,9 +854,45 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; - return &id_priv->id; + rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID); + if (parent) + rdma_restrack_parent_name(&id_priv->res, &parent->res); + + return id_priv; } -EXPORT_SYMBOL(__rdma_create_id); + +struct rdma_cm_id * +__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler, + void *context, enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type, const char *caller) +{ + struct rdma_id_private *ret; + + ret = __rdma_create_id(net, event_handler, context, ps, qp_type, NULL); + if (IS_ERR(ret)) + return ERR_CAST(ret); + + rdma_restrack_set_name(&ret->res, caller); + return &ret->id; +} +EXPORT_SYMBOL(__rdma_create_kernel_id); + +struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler, + void *context, + enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type) +{ + struct rdma_id_private *ret; + + ret = __rdma_create_id(current->nsproxy->net_ns, event_handler, context, + ps, qp_type, NULL); + if (IS_ERR(ret)) + return ERR_CAST(ret); + + rdma_restrack_set_name(&ret->res, NULL); + return &ret->id; +} +EXPORT_SYMBOL(rdma_create_user_id); static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) { @@ -1783,19 +1797,30 @@ static void cma_release_port(struct rdma_id_private *id_priv) mutex_unlock(&lock); } -static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv, - struct cma_multicast *mc) +static void destroy_mc(struct rdma_id_private *id_priv, + struct cma_multicast *mc) { - struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = NULL; + if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) + ib_sa_free_multicast(mc->sa_mc); - if (dev_addr->bound_dev_if) - ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (ndev) { - cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false); - dev_put(ndev); + if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) { + struct rdma_dev_addr *dev_addr = + &id_priv->id.route.addr.dev_addr; + struct net_device *ndev = NULL; + + if (dev_addr->bound_dev_if) + ndev = dev_get_by_index(dev_addr->net, + dev_addr->bound_dev_if); + if (ndev) { + union ib_gid mgid; + + cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr, + &mgid); + cma_igmp_send(ndev, &mgid, false); + dev_put(ndev); + } } - kref_put(&mc->mcref, release_mc); + kfree(mc); } static void cma_leave_mc_groups(struct rdma_id_private *id_priv) @@ -1803,16 +1828,10 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv) struct cma_multicast *mc; while (!list_empty(&id_priv->mc_list)) { - mc = container_of(id_priv->mc_list.next, - struct cma_multicast, list); + mc = list_first_entry(&id_priv->mc_list, struct cma_multicast, + list); list_del(&mc->list); - if (rdma_cap_ib_mcast(id_priv->cma_dev->device, - id_priv->id.port_num)) { - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - } else { - cma_leave_roce_mc_group(id_priv, mc); - } + destroy_mc(id_priv, mc); } } @@ -1821,7 +1840,6 @@ static void _destroy_id(struct rdma_id_private *id_priv, { cma_cancel_operation(id_priv, state); - rdma_restrack_del(&id_priv->res); if (id_priv->cma_dev) { if (rdma_cap_ib_cm(id_priv->id.device, 1)) { if (id_priv->cm_id.ib) @@ -1847,6 +1865,7 @@ static void _destroy_id(struct rdma_id_private *id_priv, rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr); put_net(id_priv->id.route.addr.dev_addr.net); + rdma_restrack_del(&id_priv->res); kfree(id_priv); } @@ -1949,13 +1968,15 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, { struct rdma_id_private *id_priv = cm_id->context; struct rdma_cm_event event = {}; + enum rdma_cm_state state; int ret; mutex_lock(&id_priv->handler_mutex); + state = READ_ONCE(id_priv->state); if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && - id_priv->state != RDMA_CM_CONNECT) || + state != RDMA_CM_CONNECT) || (ib_event->event == IB_CM_TIMEWAIT_EXIT && - id_priv->state != RDMA_CM_DISCONNECT)) + state != RDMA_CM_DISCONNECT)) goto out; switch (ib_event->event) { @@ -1965,7 +1986,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, event.status = -ETIMEDOUT; break; case IB_CM_REP_RECEIVED: - if (cma_comp(id_priv, RDMA_CM_CONNECT) && + if (state == RDMA_CM_CONNECT && (id_priv->id.qp_type != IB_QPT_UD)) { trace_cm_send_mra(id_priv); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); @@ -2043,14 +2064,15 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, int ret; listen_id_priv = container_of(listen_id, struct rdma_id_private, id); - id = __rdma_create_id(listen_id->route.addr.dev_addr.net, - listen_id->event_handler, listen_id->context, - listen_id->ps, ib_event->param.req_rcvd.qp_type, - listen_id_priv->res.kern_name); - if (IS_ERR(id)) + id_priv = __rdma_create_id(listen_id->route.addr.dev_addr.net, + listen_id->event_handler, listen_id->context, + listen_id->ps, + ib_event->param.req_rcvd.qp_type, + listen_id_priv); + if (IS_ERR(id_priv)) return NULL; - id_priv = container_of(id, struct rdma_id_private, id); + id = &id_priv->id; if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, (struct sockaddr *)&id->route.addr.dst_addr, listen_id, ib_event, ss_family, service_id)) @@ -2104,13 +2126,13 @@ cma_ib_new_udp_id(const struct rdma_cm_id *listen_id, int ret; listen_id_priv = container_of(listen_id, struct rdma_id_private, id); - id = __rdma_create_id(net, listen_id->event_handler, listen_id->context, - listen_id->ps, IB_QPT_UD, - listen_id_priv->res.kern_name); - if (IS_ERR(id)) + id_priv = __rdma_create_id(net, listen_id->event_handler, + listen_id->context, listen_id->ps, IB_QPT_UD, + listen_id_priv); + if (IS_ERR(id_priv)) return NULL; - id_priv = container_of(id, struct rdma_id_private, id); + id = &id_priv->id; if (cma_save_net_info((struct sockaddr *)&id->route.addr.src_addr, (struct sockaddr *)&id->route.addr.dst_addr, listen_id, ib_event, ss_family, @@ -2184,7 +2206,7 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, } mutex_lock(&listen_id->handler_mutex); - if (listen_id->state != RDMA_CM_LISTEN) { + if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) { ret = -ECONNABORTED; goto err_unlock; } @@ -2226,8 +2248,8 @@ static int cma_ib_req_handler(struct ib_cm_id *cm_id, goto net_dev_put; } - if (cma_comp(conn_id, RDMA_CM_CONNECT) && - (conn_id->id.qp_type != IB_QPT_UD)) { + if (READ_ONCE(conn_id->state) == RDMA_CM_CONNECT && + conn_id->id.qp_type != IB_QPT_UD) { trace_cm_send_mra(cm_id->context); ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); } @@ -2288,7 +2310,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != RDMA_CM_CONNECT) + if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) goto out; switch (iw_event->event) { @@ -2346,7 +2368,6 @@ out: static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct iw_cm_event *iw_event) { - struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event = {}; int ret = -ECONNABORTED; @@ -2362,20 +2383,18 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, listen_id = cm_id->context; mutex_lock(&listen_id->handler_mutex); - if (listen_id->state != RDMA_CM_LISTEN) + if (READ_ONCE(listen_id->state) != RDMA_CM_LISTEN) goto out; /* Create a new RDMA id for the new IW CM ID */ - new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net, - listen_id->id.event_handler, - listen_id->id.context, - RDMA_PS_TCP, IB_QPT_RC, - listen_id->res.kern_name); - if (IS_ERR(new_cm_id)) { + conn_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net, + listen_id->id.event_handler, + listen_id->id.context, RDMA_PS_TCP, + IB_QPT_RC, listen_id); + if (IS_ERR(conn_id)) { ret = -ENOMEM; goto out; } - conn_id = container_of(new_cm_id, struct rdma_id_private, id); mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); conn_id->state = RDMA_CM_CONNECT; @@ -2480,7 +2499,6 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, struct cma_device *cma_dev) { struct rdma_id_private *dev_id_priv; - struct rdma_cm_id *id; struct net *net = id_priv->id.route.addr.dev_addr.net; int ret; @@ -2489,13 +2507,12 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1)) return; - id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps, - id_priv->id.qp_type, id_priv->res.kern_name); - if (IS_ERR(id)) + dev_id_priv = + __rdma_create_id(net, cma_listen_handler, id_priv, + id_priv->id.ps, id_priv->id.qp_type, id_priv); + if (IS_ERR(dev_id_priv)) return; - dev_id_priv = container_of(id, struct rdma_id_private, id); - dev_id_priv->state = RDMA_CM_ADDR_BOUND; memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv), rdma_addr_size(cma_src_addr(id_priv))); @@ -2508,7 +2525,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv, dev_id_priv->tos_set = id_priv->tos_set; dev_id_priv->tos = id_priv->tos; - ret = rdma_listen(id, id_priv->backlog); + ret = rdma_listen(&dev_id_priv->id, id_priv->backlog); if (ret) dev_warn(&cma_dev->device->dev, "RDMA CMA: cma_listen_on_dev, error %d\n", ret); @@ -2647,31 +2664,13 @@ static void cma_work_handler(struct work_struct *_work) struct rdma_id_private *id_priv = work->id; mutex_lock(&id_priv->handler_mutex); - if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) - goto out_unlock; - - if (cma_cm_event_handler(id_priv, &work->event)) { - cma_id_put(id_priv); - destroy_id_handler_unlock(id_priv); - goto out_free; - } - -out_unlock: - mutex_unlock(&id_priv->handler_mutex); - cma_id_put(id_priv); -out_free: - kfree(work); -} - -static void cma_ndev_work_handler(struct work_struct *_work) -{ - struct cma_ndev_work *work = container_of(_work, struct cma_ndev_work, work); - struct rdma_id_private *id_priv = work->id; - - mutex_lock(&id_priv->handler_mutex); - if (id_priv->state == RDMA_CM_DESTROYING || - id_priv->state == RDMA_CM_DEVICE_REMOVAL) + if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || + READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) goto out_unlock; + if (work->old_state != 0 || work->new_state != 0) { + if (!cma_comp_exch(id_priv, work->old_state, work->new_state)) + goto out_unlock; + } if (cma_cm_event_handler(id_priv, &work->event)) { cma_id_put(id_priv); @@ -2683,6 +2682,8 @@ out_unlock: mutex_unlock(&id_priv->handler_mutex); cma_id_put(id_priv); out_free: + if (work->event.event == RDMA_CM_EVENT_MULTICAST_JOIN) + rdma_destroy_ah_attr(&work->event.param.ud.ah_attr); kfree(work); } @@ -3240,32 +3241,54 @@ static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, return rdma_bind_addr(id, src_addr); } -int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, - const struct sockaddr *dst_addr, unsigned long timeout_ms) +/* + * If required, resolve the source address for bind and leave the id_priv in + * state RDMA_CM_ADDR_BOUND. This oddly uses the state to determine the prior + * calls made by ULP, a previously bound ID will not be re-bound and src_addr is + * ignored. + */ +static int resolve_prepare_src(struct rdma_id_private *id_priv, + struct sockaddr *src_addr, + const struct sockaddr *dst_addr) { - struct rdma_id_private *id_priv; int ret; - id_priv = container_of(id, struct rdma_id_private, id); memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr)); - if (id_priv->state == RDMA_CM_IDLE) { - ret = cma_bind_addr(id, src_addr, dst_addr); - if (ret) { - memset(cma_dst_addr(id_priv), 0, - rdma_addr_size(dst_addr)); - return ret; + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { + /* For a well behaved ULP state will be RDMA_CM_IDLE */ + ret = cma_bind_addr(&id_priv->id, src_addr, dst_addr); + if (ret) + goto err_dst; + if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, + RDMA_CM_ADDR_QUERY))) { + ret = -EINVAL; + goto err_dst; } } if (cma_family(id_priv) != dst_addr->sa_family) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return -EINVAL; + ret = -EINVAL; + goto err_state; } + return 0; - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) { - memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); - return -EINVAL; - } +err_state: + cma_comp_exch(id_priv, RDMA_CM_ADDR_QUERY, RDMA_CM_ADDR_BOUND); +err_dst: + memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr)); + return ret; +} + +int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, + const struct sockaddr *dst_addr, unsigned long timeout_ms) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + int ret; + + ret = resolve_prepare_src(id_priv, src_addr, dst_addr); + if (ret) + return ret; if (cma_any_addr(dst_addr)) { ret = cma_resolve_loopback(id_priv); @@ -3297,7 +3320,8 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse) id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irqsave(&id_priv->lock, flags); - if (reuse || id_priv->state == RDMA_CM_IDLE) { + if ((reuse && id_priv->state != RDMA_CM_LISTEN) || + id_priv->state == RDMA_CM_IDLE) { id_priv->reuseaddr = reuse; ret = 0; } else { @@ -3491,8 +3515,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list, if (id_priv == cur_id) continue; - if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && - cur_id->reuseaddr) + if (reuseaddr && cur_id->reuseaddr) continue; cur_addr = cma_src_addr(cur_id); @@ -3533,18 +3556,6 @@ static int cma_use_port(enum rdma_ucm_port_space ps, return ret; } -static int cma_bind_listen(struct rdma_id_private *id_priv) -{ - struct rdma_bind_list *bind_list = id_priv->bind_list; - int ret = 0; - - mutex_lock(&lock); - if (bind_list->owners.first->next) - ret = cma_check_port(bind_list, id_priv, 0); - mutex_unlock(&lock); - return ret; -} - static enum rdma_ucm_port_space cma_select_inet_ps(struct rdma_id_private *id_priv) { @@ -3638,22 +3649,31 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, int rdma_listen(struct rdma_cm_id *id, int backlog) { - struct rdma_id_private *id_priv; + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); int ret; - id_priv = container_of(id, struct rdma_id_private, id); - if (id_priv->state == RDMA_CM_IDLE) { + if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) { + /* For a well behaved ULP state will be RDMA_CM_IDLE */ id->route.addr.src_addr.ss_family = AF_INET; ret = rdma_bind_addr(id, cma_src_addr(id_priv)); if (ret) return ret; + if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, + RDMA_CM_LISTEN))) + return -EINVAL; } - if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) - return -EINVAL; - + /* + * Once the ID reaches RDMA_CM_LISTEN it is not allowed to be reusable + * any more, and has to be unique in the bind list. + */ if (id_priv->reuseaddr) { - ret = cma_bind_listen(id_priv); + mutex_lock(&lock); + ret = cma_check_port(id_priv->bind_list, id_priv, 0); + if (!ret) + id_priv->reuseaddr = 0; + mutex_unlock(&lock); if (ret) goto err; } @@ -3678,6 +3698,10 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) return 0; err: id_priv->backlog = 0; + /* + * All the failure paths that lead here will not allow the req_handler's + * to have run. + */ cma_comp_exch(id_priv, RDMA_CM_LISTEN, RDMA_CM_ADDR_BOUND); return ret; } @@ -3732,7 +3756,6 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr) return 0; err2: - rdma_restrack_del(&id_priv->res); if (id_priv->cma_dev) cma_release_dev(id_priv); err1: @@ -3781,7 +3804,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, int ret; mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != RDMA_CM_CONNECT) + if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) goto out; switch (ib_event->event) { @@ -4017,12 +4040,15 @@ out: int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { - struct rdma_id_private *id_priv; + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); int ret; - id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) - return -EINVAL; + mutex_lock(&id_priv->handler_mutex); + if (!cma_comp_exch(id_priv, RDMA_CM_ROUTE_RESOLVED, RDMA_CM_CONNECT)) { + ret = -EINVAL; + goto err_unlock; + } if (!id->qp) { id_priv->qp_num = conn_param->qp_num; @@ -4039,11 +4065,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) else ret = -ENOSYS; if (ret) - goto err; - + goto err_state; + mutex_unlock(&id_priv->handler_mutex); return 0; -err: +err_state: cma_comp_exch(id_priv, RDMA_CM_CONNECT, RDMA_CM_ROUTE_RESOLVED); +err_unlock: + mutex_unlock(&id_priv->handler_mutex); return ret; } EXPORT_SYMBOL(rdma_connect); @@ -4155,17 +4183,33 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv, return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep); } -int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, - const char *caller) +/** + * rdma_accept - Called to accept a connection request or response. + * @id: Connection identifier associated with the request. + * @conn_param: Information needed to establish the connection. This must be + * provided if accepting a connection request. If accepting a connection + * response, this parameter must be NULL. + * + * Typically, this routine is only called by the listener to accept a connection + * request. It must also be called on the active side of a connection if the + * user is performing their own QP transitions. + * + * In the case of error, a reject message is sent to the remote side and the + * state of the qp associated with the id is modified to error, such that any + * previously posted receive buffers would be flushed. + * + * This function is for use by kernel ULPs and must be called from under the + * handler callback. + */ +int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param) { - struct rdma_id_private *id_priv; + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); int ret; - id_priv = container_of(id, struct rdma_id_private, id); + lockdep_assert_held(&id_priv->handler_mutex); - rdma_restrack_set_task(&id_priv->res, caller); - - if (!cma_comp(id_priv, RDMA_CM_CONNECT)) + if (READ_ONCE(id_priv->state) != RDMA_CM_CONNECT) return -EINVAL; if (!id->qp && conn_param) { @@ -4203,10 +4247,10 @@ reject: rdma_reject(id, NULL, 0, IB_CM_REJ_CONSUMER_DEFINED); return ret; } -EXPORT_SYMBOL(__rdma_accept); +EXPORT_SYMBOL(rdma_accept); -int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, - const char *caller, struct rdma_ucm_ece *ece) +int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + struct rdma_ucm_ece *ece) { struct rdma_id_private *id_priv = container_of(id, struct rdma_id_private, id); @@ -4214,9 +4258,27 @@ int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, id_priv->ece.vendor_id = ece->vendor_id; id_priv->ece.attr_mod = ece->attr_mod; - return __rdma_accept(id, conn_param, caller); + return rdma_accept(id, conn_param); } -EXPORT_SYMBOL(__rdma_accept_ece); +EXPORT_SYMBOL(rdma_accept_ece); + +void rdma_lock_handler(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + mutex_lock(&id_priv->handler_mutex); +} +EXPORT_SYMBOL(rdma_lock_handler); + +void rdma_unlock_handler(struct rdma_cm_id *id) +{ + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); + + mutex_unlock(&id_priv->handler_mutex); +} +EXPORT_SYMBOL(rdma_unlock_handler); int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event) { @@ -4299,63 +4361,66 @@ out: } EXPORT_SYMBOL(rdma_disconnect); -static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) +static void cma_make_mc_event(int status, struct rdma_id_private *id_priv, + struct ib_sa_multicast *multicast, + struct rdma_cm_event *event, + struct cma_multicast *mc) { - struct rdma_id_private *id_priv; - struct cma_multicast *mc = multicast->context; - struct rdma_cm_event event = {}; - int ret = 0; - - id_priv = mc->id_priv; - mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != RDMA_CM_ADDR_BOUND && - id_priv->state != RDMA_CM_ADDR_RESOLVED) - goto out; + struct rdma_dev_addr *dev_addr; + enum ib_gid_type gid_type; + struct net_device *ndev; if (!status) status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); else pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n", status); - mutex_lock(&id_priv->qp_mutex); - if (!status && id_priv->id.qp) { - status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, - be16_to_cpu(multicast->rec.mlid)); - if (status) - pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n", - status); + + event->status = status; + event->param.ud.private_data = mc->context; + if (status) { + event->event = RDMA_CM_EVENT_MULTICAST_ERROR; + return; } - mutex_unlock(&id_priv->qp_mutex); - event.status = status; - event.param.ud.private_data = mc->context; - if (!status) { - struct rdma_dev_addr *dev_addr = - &id_priv->id.route.addr.dev_addr; - struct net_device *ndev = - dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - enum ib_gid_type gid_type = - id_priv->cma_dev->default_gid_type[id_priv->id.port_num - - rdma_start_port(id_priv->cma_dev->device)]; + dev_addr = &id_priv->id.route.addr.dev_addr; + ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); + gid_type = + id_priv->cma_dev + ->default_gid_type[id_priv->id.port_num - + rdma_start_port( + id_priv->cma_dev->device)]; - event.event = RDMA_CM_EVENT_MULTICAST_JOIN; - ret = ib_init_ah_from_mcmember(id_priv->id.device, - id_priv->id.port_num, - &multicast->rec, - ndev, gid_type, - &event.param.ud.ah_attr); - if (ret) - event.event = RDMA_CM_EVENT_MULTICAST_ERROR; + event->event = RDMA_CM_EVENT_MULTICAST_JOIN; + if (ib_init_ah_from_mcmember(id_priv->id.device, id_priv->id.port_num, + &multicast->rec, ndev, gid_type, + &event->param.ud.ah_attr)) { + event->event = RDMA_CM_EVENT_MULTICAST_ERROR; + goto out; + } - event.param.ud.qp_num = 0xFFFFFF; - event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey); - if (ndev) - dev_put(ndev); - } else - event.event = RDMA_CM_EVENT_MULTICAST_ERROR; + event->param.ud.qp_num = 0xFFFFFF; + event->param.ud.qkey = be32_to_cpu(multicast->rec.qkey); +out: + if (ndev) + dev_put(ndev); +} + +static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) +{ + struct cma_multicast *mc = multicast->context; + struct rdma_id_private *id_priv = mc->id_priv; + struct rdma_cm_event event = {}; + int ret = 0; + + mutex_lock(&id_priv->handler_mutex); + if (READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL || + READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING) + goto out; + + cma_make_mc_event(status, id_priv, multicast, &event, mc); ret = cma_cm_event_handler(id_priv, &event); - rdma_destroy_ah_attr(&event.param.ud.ah_attr); if (ret) { destroy_id_handler_unlock(id_priv); @@ -4445,23 +4510,10 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv, IB_SA_MCMEMBER_REC_MTU | IB_SA_MCMEMBER_REC_HOP_LIMIT; - mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, - id_priv->id.port_num, &rec, - comp_mask, GFP_KERNEL, - cma_ib_mc_handler, mc); - return PTR_ERR_OR_ZERO(mc->multicast.ib); -} - -static void iboe_mcast_work_handler(struct work_struct *work) -{ - struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work); - struct cma_multicast *mc = mw->mc; - struct ib_sa_multicast *m = mc->multicast.ib; - - mc->multicast.ib->context = mc; - cma_ib_mc_handler(0, m); - kref_put(&mc->mcref, release_mc); - kfree(mw); + mc->sa_mc = ib_sa_join_multicast(&sa_client, id_priv->id.device, + id_priv->id.port_num, &rec, comp_mask, + GFP_KERNEL, cma_ib_mc_handler, mc); + return PTR_ERR_OR_ZERO(mc->sa_mc); } static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, @@ -4496,52 +4548,47 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { - struct iboe_mcast_work *work; + struct cma_work *work; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; struct net_device *ndev = NULL; + struct ib_sa_multicast ib; enum ib_gid_type gid_type; bool send_only; send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); - if (cma_zero_addr((struct sockaddr *)&mc->addr)) + if (cma_zero_addr(addr)) return -EINVAL; work = kzalloc(sizeof *work, GFP_KERNEL); if (!work) return -ENOMEM; - mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL); - if (!mc->multicast.ib) { - err = -ENOMEM; - goto out1; - } - gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; - cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid, gid_type); + cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); - mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff); + ib.rec.pkey = cpu_to_be16(0xffff); if (id_priv->id.ps == RDMA_PS_UDP) - mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); + ib.rec.qkey = cpu_to_be32(RDMA_UDP_QKEY); if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!ndev) { err = -ENODEV; - goto out2; + goto err_free; } - mc->multicast.ib->rec.rate = iboe_get_rate(ndev); - mc->multicast.ib->rec.hop_limit = 1; - mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu); + ib.rec.rate = iboe_get_rate(ndev); + ib.rec.hop_limit = 1; + ib.rec.mtu = iboe_get_mtu(ndev->mtu); if (addr->sa_family == AF_INET) { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { - mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; + ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; if (!send_only) { - err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, + err = cma_igmp_send(ndev, &ib.rec.mgid, true); } } @@ -4550,24 +4597,22 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, err = -ENOTSUPP; } dev_put(ndev); - if (err || !mc->multicast.ib->rec.mtu) { + if (err || !ib.rec.mtu) { if (!err) err = -EINVAL; - goto out2; + goto err_free; } rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, - &mc->multicast.ib->rec.port_gid); + &ib.rec.port_gid); work->id = id_priv; - work->mc = mc; - INIT_WORK(&work->work, iboe_mcast_work_handler); - kref_get(&mc->mcref); + INIT_WORK(&work->work, cma_work_handler); + cma_make_mc_event(0, id_priv, &ib, &work->event, mc); + /* Balances with cma_id_put() in cma_work_handler */ + cma_id_get(id_priv); queue_work(cma_wq, &work->work); - return 0; -out2: - kfree(mc->multicast.ib); -out1: +err_free: kfree(work); return err; } @@ -4575,19 +4620,21 @@ out1: int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, u8 join_state, void *context) { - struct rdma_id_private *id_priv; + struct rdma_id_private *id_priv = + container_of(id, struct rdma_id_private, id); struct cma_multicast *mc; int ret; - if (!id->device) + /* Not supported for kernel QPs */ + if (WARN_ON(id->qp)) return -EINVAL; - id_priv = container_of(id, struct rdma_id_private, id); - if (!cma_comp(id_priv, RDMA_CM_ADDR_BOUND) && - !cma_comp(id_priv, RDMA_CM_ADDR_RESOLVED)) + /* ULP is calling this wrong. */ + if (!id->device || (READ_ONCE(id_priv->state) != RDMA_CM_ADDR_BOUND && + READ_ONCE(id_priv->state) != RDMA_CM_ADDR_RESOLVED)) return -EINVAL; - mc = kmalloc(sizeof *mc, GFP_KERNEL); + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) return -ENOMEM; @@ -4597,7 +4644,6 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, mc->join_state = join_state; if (rdma_protocol_roce(id->device, id->port_num)) { - kref_init(&mc->mcref); ret = cma_iboe_join_multicast(id_priv, mc); if (ret) goto out_err; @@ -4629,25 +4675,14 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) id_priv = container_of(id, struct rdma_id_private, id); spin_lock_irq(&id_priv->lock); list_for_each_entry(mc, &id_priv->mc_list, list) { - if (!memcmp(&mc->addr, addr, rdma_addr_size(addr))) { - list_del(&mc->list); - spin_unlock_irq(&id_priv->lock); + if (memcmp(&mc->addr, addr, rdma_addr_size(addr)) != 0) + continue; + list_del(&mc->list); + spin_unlock_irq(&id_priv->lock); - if (id->qp) - ib_detach_mcast(id->qp, - &mc->multicast.ib->rec.mgid, - be16_to_cpu(mc->multicast.ib->rec.mlid)); - - BUG_ON(id_priv->cma_dev->device != id->device); - - if (rdma_cap_ib_mcast(id->device, id->port_num)) { - ib_sa_free_multicast(mc->multicast.ib); - kfree(mc); - } else if (rdma_protocol_roce(id->device, id->port_num)) { - cma_leave_roce_mc_group(id_priv, mc); - } - return; - } + WARN_ON(id_priv->cma_dev->device != id->device); + destroy_mc(id_priv, mc); + return; } spin_unlock_irq(&id_priv->lock); } @@ -4656,7 +4691,7 @@ EXPORT_SYMBOL(rdma_leave_multicast); static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr; - struct cma_ndev_work *work; + struct cma_work *work; dev_addr = &id_priv->id.route.addr.dev_addr; @@ -4669,7 +4704,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id if (!work) return -ENOMEM; - INIT_WORK(&work->work, cma_ndev_work_handler); + INIT_WORK(&work->work, cma_work_handler); work->id = id_priv; work->event.event = RDMA_CM_EVENT_ADDR_CHANGE; cma_id_get(id_priv); diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index 3c1e2ca564fe..7ec4af2ed87a 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -123,16 +123,17 @@ static ssize_t default_roce_mode_store(struct config_item *item, { struct cma_device *cma_dev; struct cma_dev_port_group *group; - int gid_type = ib_cache_gid_parse_type_str(buf); + int gid_type; ssize_t ret; - if (gid_type < 0) - return -EINVAL; - ret = cma_configfs_params_get(item, &cma_dev, &group); if (ret) return ret; + gid_type = ib_cache_gid_parse_type_str(buf); + if (gid_type < 0) + return -EINVAL; + ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type); cma_configfs_params_put(cma_dev); diff --git a/drivers/infiniband/core/cma_trace.h b/drivers/infiniband/core/cma_trace.h index e6e20c36c538..e45264267bcc 100644 --- a/drivers/infiniband/core/cma_trace.h +++ b/drivers/infiniband/core/cma_trace.h @@ -17,46 +17,6 @@ #include #include -/* - * enum ib_cm_event_type, from include/rdma/ib_cm.h - */ -#define IB_CM_EVENT_LIST \ - ib_cm_event(REQ_ERROR) \ - ib_cm_event(REQ_RECEIVED) \ - ib_cm_event(REP_ERROR) \ - ib_cm_event(REP_RECEIVED) \ - ib_cm_event(RTU_RECEIVED) \ - ib_cm_event(USER_ESTABLISHED) \ - ib_cm_event(DREQ_ERROR) \ - ib_cm_event(DREQ_RECEIVED) \ - ib_cm_event(DREP_RECEIVED) \ - ib_cm_event(TIMEWAIT_EXIT) \ - ib_cm_event(MRA_RECEIVED) \ - ib_cm_event(REJ_RECEIVED) \ - ib_cm_event(LAP_ERROR) \ - ib_cm_event(LAP_RECEIVED) \ - ib_cm_event(APR_RECEIVED) \ - ib_cm_event(SIDR_REQ_ERROR) \ - ib_cm_event(SIDR_REQ_RECEIVED) \ - ib_cm_event_end(SIDR_REP_RECEIVED) - -#undef ib_cm_event -#undef ib_cm_event_end - -#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x); -#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x); - -IB_CM_EVENT_LIST - -#undef ib_cm_event -#undef ib_cm_event_end - -#define ib_cm_event(x) { IB_CM_##x, #x }, -#define ib_cm_event_end(x) { IB_CM_##x, #x } - -#define rdma_show_ib_cm_event(x) \ - __print_symbolic(x, IB_CM_EVENT_LIST) - DECLARE_EVENT_CLASS(cma_fsm_class, TP_PROTO( diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index a1e6a67b2c4a..e84b0fedaacb 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -44,6 +44,7 @@ #include #include #include "mad_priv.h" +#include "restrack.h" /* Total number of ports combined across all struct ib_devices's */ #define RDMA_MAX_PORTS 8192 @@ -352,6 +353,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, INIT_LIST_HEAD(&qp->rdma_mrs); INIT_LIST_HEAD(&qp->sig_mrs); + rdma_restrack_new(&qp->res, RDMA_RESTRACK_QP); /* * We don't track XRC QPs for now, because they don't have PD * and more importantly they are created internaly by driver, @@ -359,14 +361,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev, */ is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT; if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) { - qp->res.type = RDMA_RESTRACK_QP; - if (uobj) - rdma_restrack_uadd(&qp->res); - else - rdma_restrack_kadd(&qp->res); - } else - qp->res.valid = false; - + rdma_restrack_parent_name(&qp->res, &pd->res); + rdma_restrack_add(&qp->res); + } return qp; } diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c index 636166880442..e4ff0d3328b6 100644 --- a/drivers/infiniband/core/counters.c +++ b/drivers/infiniband/core/counters.c @@ -80,8 +80,9 @@ static struct rdma_counter *rdma_counter_alloc(struct ib_device *dev, u8 port, counter->device = dev; counter->port = port; - counter->res.type = RDMA_RESTRACK_COUNTER; - counter->stats = dev->ops.counter_alloc_stats(counter); + + rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER); + counter->stats = dev->ops.counter_alloc_stats(counter); if (!counter->stats) goto err_stats; @@ -107,6 +108,7 @@ err_mode: mutex_unlock(&port_counter->lock); kfree(counter->stats); err_stats: + rdma_restrack_put(&counter->res); kfree(counter); return NULL; } @@ -248,13 +250,8 @@ next: static void rdma_counter_res_add(struct rdma_counter *counter, struct ib_qp *qp) { - if (rdma_is_kernel_res(&qp->res)) { - rdma_restrack_set_task(&counter->res, qp->res.kern_name); - rdma_restrack_kadd(&counter->res); - } else { - rdma_restrack_attach_task(&counter->res, qp->res.task); - rdma_restrack_uadd(&counter->res); - } + rdma_restrack_parent_name(&counter->res, &qp->res); + rdma_restrack_add(&counter->res); } static void counter_release(struct kref *kref) diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c index a92fc3f90bb5..12ebacf52958 100644 --- a/drivers/infiniband/core/cq.c +++ b/drivers/infiniband/core/cq.c @@ -197,24 +197,22 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private) } /** - * __ib_alloc_cq_user - allocate a completion queue + * __ib_alloc_cq allocate a completion queue * @dev: device to allocate the CQ for * @private: driver private data, accessible from cq->cq_context * @nr_cqe: number of CQEs to allocate * @comp_vector: HCA completion vectors for this CQ * @poll_ctx: context to poll the CQ from. * @caller: module owner name. - * @udata: Valid user data or NULL for kernel object * * This is the proper interface to allocate a CQ for in-kernel users. A * CQ allocated with this interface will automatically be polled from the * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id * to use this CQ abstraction. */ -struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, - int nr_cqe, int comp_vector, - enum ib_poll_context poll_ctx, - const char *caller, struct ib_udata *udata) +struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, + int comp_vector, enum ib_poll_context poll_ctx, + const char *caller) { struct ib_cq_init_attr cq_attr = { .cqe = nr_cqe, @@ -237,15 +235,13 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, if (!cq->wc) goto out_free_cq; - cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_set_task(&cq->res, caller); + rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); + rdma_restrack_set_name(&cq->res, caller); ret = dev->ops.create_cq(cq, &cq_attr, NULL); if (ret) goto out_free_wc; - rdma_restrack_kadd(&cq->res); - rdma_dim_init(cq); switch (cq->poll_ctx) { @@ -271,21 +267,22 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, goto out_destroy_cq; } + rdma_restrack_add(&cq->res); trace_cq_alloc(cq, nr_cqe, comp_vector, poll_ctx); return cq; out_destroy_cq: rdma_dim_destroy(cq); - rdma_restrack_del(&cq->res); - cq->device->ops.destroy_cq(cq, udata); + cq->device->ops.destroy_cq(cq, NULL); out_free_wc: + rdma_restrack_put(&cq->res); kfree(cq->wc); out_free_cq: kfree(cq); trace_cq_alloc_error(nr_cqe, comp_vector, poll_ctx, ret); return ERR_PTR(ret); } -EXPORT_SYMBOL(__ib_alloc_cq_user); +EXPORT_SYMBOL(__ib_alloc_cq); /** * __ib_alloc_cq_any - allocate a completion queue @@ -310,18 +307,19 @@ struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, atomic_inc_return(&counter) % min_t(int, dev->num_comp_vectors, num_online_cpus()); - return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, - caller, NULL); + return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx, + caller); } EXPORT_SYMBOL(__ib_alloc_cq_any); /** - * ib_free_cq_user - free a completion queue + * ib_free_cq - free a completion queue * @cq: completion queue to free. - * @udata: User data or NULL for kernel object */ -void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) +void ib_free_cq(struct ib_cq *cq) { + int ret; + if (WARN_ON_ONCE(atomic_read(&cq->usecnt))) return; if (WARN_ON_ONCE(cq->cqe_used)) @@ -343,12 +341,13 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata) rdma_dim_destroy(cq); trace_cq_free(cq); + ret = cq->device->ops.destroy_cq(cq, NULL); + WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail"); rdma_restrack_del(&cq->res); - cq->device->ops.destroy_cq(cq, udata); kfree(cq->wc); kfree(cq); } -EXPORT_SYMBOL(ib_free_cq_user); +EXPORT_SYMBOL(ib_free_cq); void ib_cq_pool_init(struct ib_device *dev) { diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 23ee65a9185f..a3b1fc84cdca 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1177,58 +1177,23 @@ out: return ret; } -static void setup_dma_device(struct ib_device *device) +static void setup_dma_device(struct ib_device *device, + struct device *dma_device) { - struct device *parent = device->dev.parent; - - WARN_ON_ONCE(device->dma_device); - -#ifdef CONFIG_DMA_OPS - if (device->dev.dma_ops) { - /* - * The caller provided custom DMA operations. Copy the - * DMA-related fields that are used by e.g. dma_alloc_coherent() - * into device->dev. - */ - device->dma_device = &device->dev; - if (!device->dev.dma_mask) { - if (parent) - device->dev.dma_mask = parent->dma_mask; - else - WARN_ON_ONCE(true); - } - if (!device->dev.coherent_dma_mask) { - if (parent) - device->dev.coherent_dma_mask = - parent->coherent_dma_mask; - else - WARN_ON_ONCE(true); - } - } else -#endif /* CONFIG_DMA_OPS */ - { - /* - * The caller did not provide custom DMA operations. Use the - * DMA mapping operations of the parent device. - */ - WARN_ON_ONCE(!parent); - device->dma_device = parent; - } - - if (!device->dev.dma_parms) { - if (parent) { - /* - * The caller did not provide DMA parameters, so - * 'parent' probably represents a PCI device. The PCI - * core sets the maximum segment size to 64 - * KB. Increase this parameter to 2 GB. - */ - device->dev.dma_parms = parent->dma_parms; - dma_set_max_seg_size(device->dma_device, SZ_2G); - } else { - WARN_ON_ONCE(true); - } + /* + * If the caller does not provide a DMA capable device then the IB + * device will be used. In this case the caller should fully setup the + * ibdev for DMA. This usually means using dma_virt_ops. + */ +#ifdef CONFIG_DMA_VIRT_OPS + if (!dma_device) { + device->dev.dma_ops = &dma_virt_ops; + dma_device = &device->dev; } +#endif + WARN_ON(!dma_device); + device->dma_device = dma_device; + WARN_ON(!device->dma_device->dma_parms); } /* @@ -1241,7 +1206,6 @@ static int setup_device(struct ib_device *device) struct ib_udata uhw = {.outlen = 0, .inlen = 0}; int ret; - setup_dma_device(device); ib_device_check_mandatory(device); ret = setup_port_data(device); @@ -1354,7 +1318,10 @@ static void prevent_dealloc_device(struct ib_device *ib_dev) * ib_register_device - Register an IB device with IB core * @device: Device to register * @name: unique string device name. This may include a '%' which will - * cause a unique index to be added to the passed device name. + * cause a unique index to be added to the passed device name. + * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB + * device will be used. In this case the caller should fully + * setup the ibdev for DMA. This usually means using dma_virt_ops. * * Low-level drivers use ib_register_device() to register their * devices with the IB core. All registered clients will receive a @@ -1365,7 +1332,8 @@ static void prevent_dealloc_device(struct ib_device *ib_dev) * asynchronously then the device pointer may become freed as soon as this * function returns. */ -int ib_register_device(struct ib_device *device, const char *name) +int ib_register_device(struct ib_device *device, const char *name, + struct device *dma_device) { int ret; @@ -1373,6 +1341,7 @@ int ib_register_device(struct ib_device *device, const char *name) if (ret) return ret; + setup_dma_device(device, dma_device); ret = setup_device(device); if (ret) return ret; @@ -2697,7 +2666,9 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_OBJ_SIZE(dev_ops, ib_ah); SET_OBJ_SIZE(dev_ops, ib_counters); SET_OBJ_SIZE(dev_ops, ib_cq); + SET_OBJ_SIZE(dev_ops, ib_mw); SET_OBJ_SIZE(dev_ops, ib_pd); + SET_OBJ_SIZE(dev_ops, ib_rwq_ind_table); SET_OBJ_SIZE(dev_ops, ib_srq); SET_OBJ_SIZE(dev_ops, ib_ucontext); SET_OBJ_SIZE(dev_ops, ib_xrcd); diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c index 6d3ed7c6e19e..ffe11b03724c 100644 --- a/drivers/infiniband/core/rdma_core.c +++ b/drivers/infiniband/core/rdma_core.c @@ -130,17 +130,6 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, lockdep_assert_held(&ufile->hw_destroy_rwsem); assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE); - if (reason == RDMA_REMOVE_ABORT_HWOBJ) { - reason = RDMA_REMOVE_ABORT; - ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason, - attrs); - /* - * Drivers are not permitted to ignore RDMA_REMOVE_ABORT, see - * ib_is_destroy_retryable, cleanup_retryable == false here. - */ - WARN_ON(ret); - } - if (reason == RDMA_REMOVE_ABORT) { WARN_ON(!list_empty(&uobj->list)); WARN_ON(!uobj->context); @@ -674,11 +663,22 @@ void rdma_alloc_abort_uobject(struct ib_uobject *uobj, bool hw_obj_valid) { struct ib_uverbs_file *ufile = uobj->ufile; + int ret; - uverbs_destroy_uobject(uobj, - hw_obj_valid ? RDMA_REMOVE_ABORT_HWOBJ : - RDMA_REMOVE_ABORT, - attrs); + if (hw_obj_valid) { + ret = uobj->uapi_object->type_class->destroy_hw( + uobj, RDMA_REMOVE_ABORT, attrs); + /* + * If the driver couldn't destroy the object then go ahead and + * commit it. Leaking objects that can't be destroyed is only + * done during FD close after the driver has a few more tries to + * destroy it. + */ + if (WARN_ON(ret)) + return rdma_alloc_commit_uobject(uobj, attrs); + } + + uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs); /* Matches the down_read in rdma_alloc_begin_uobject */ up_read(&ufile->hw_destroy_rwsem); @@ -889,14 +889,14 @@ void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile, if (!ufile->ucontext) goto done; - ufile->ucontext->closing = true; ufile->ucontext->cleanup_retryable = true; while (!list_empty(&ufile->uobjects)) if (__uverbs_cleanup_ufile(ufile, reason)) { /* * No entry was cleaned-up successfully during this - * iteration + * iteration. It is a driver bug to fail destruction. */ + WARN_ON(!list_empty(&ufile->uobjects)); break; } diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 62fbb0ae9cb4..4aeeaaed0f17 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -123,32 +123,6 @@ int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type) } EXPORT_SYMBOL(rdma_restrack_count); -static void set_kern_name(struct rdma_restrack_entry *res) -{ - struct ib_pd *pd; - - switch (res->type) { - case RDMA_RESTRACK_QP: - pd = container_of(res, struct ib_qp, res)->pd; - if (!pd) { - WARN_ONCE(true, "XRC QPs are not supported\n"); - /* Survive, despite the programmer's error */ - res->kern_name = " "; - } - break; - case RDMA_RESTRACK_MR: - pd = container_of(res, struct ib_mr, res)->pd; - break; - default: - /* Other types set kern_name directly */ - pd = NULL; - break; - } - - if (pd) - res->kern_name = pd->res.kern_name; -} - static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) { switch (res->type) { @@ -173,36 +147,77 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res) } } -void rdma_restrack_set_task(struct rdma_restrack_entry *res, - const char *caller) +/** + * rdma_restrack_attach_task() - attach the task onto this resource, + * valid for user space restrack entries. + * @res: resource entry + * @task: the task to attach + */ +static void rdma_restrack_attach_task(struct rdma_restrack_entry *res, + struct task_struct *task) +{ + if (WARN_ON_ONCE(!task)) + return; + + if (res->task) + put_task_struct(res->task); + get_task_struct(task); + res->task = task; + res->user = true; +} + +/** + * rdma_restrack_set_name() - set the task for this resource + * @res: resource entry + * @caller: kernel name, the current task will be used if the caller is NULL. + */ +void rdma_restrack_set_name(struct rdma_restrack_entry *res, const char *caller) { if (caller) { res->kern_name = caller; return; } - if (res->task) - put_task_struct(res->task); - get_task_struct(current); - res->task = current; + rdma_restrack_attach_task(res, current); } -EXPORT_SYMBOL(rdma_restrack_set_task); +EXPORT_SYMBOL(rdma_restrack_set_name); /** - * rdma_restrack_attach_task() - attach the task onto this resource - * @res: resource entry - * @task: the task to attach, the current task will be used if it is NULL. + * rdma_restrack_parent_name() - set the restrack name properties based + * on parent restrack + * @dst: destination resource entry + * @parent: parent resource entry */ -void rdma_restrack_attach_task(struct rdma_restrack_entry *res, - struct task_struct *task) +void rdma_restrack_parent_name(struct rdma_restrack_entry *dst, + const struct rdma_restrack_entry *parent) { - if (res->task) - put_task_struct(res->task); - get_task_struct(task); - res->task = task; + if (rdma_is_kernel_res(parent)) + dst->kern_name = parent->kern_name; + else + rdma_restrack_attach_task(dst, parent->task); } +EXPORT_SYMBOL(rdma_restrack_parent_name); -static void rdma_restrack_add(struct rdma_restrack_entry *res) +/** + * rdma_restrack_new() - Initializes new restrack entry to allow _put() interface + * to release memory in fully automatic way. + * @res - Entry to initialize + * @type - REstrack type + */ +void rdma_restrack_new(struct rdma_restrack_entry *res, + enum rdma_restrack_type type) +{ + kref_init(&res->kref); + init_completion(&res->comp); + res->type = type; +} +EXPORT_SYMBOL(rdma_restrack_new); + +/** + * rdma_restrack_add() - add object to the reource tracking database + * @res: resource entry + */ +void rdma_restrack_add(struct rdma_restrack_entry *res) { struct ib_device *dev = res_to_dev(res); struct rdma_restrack_root *rt; @@ -213,8 +228,6 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res) rt = &dev->res[res->type]; - kref_init(&res->kref); - init_completion(&res->comp); if (res->type == RDMA_RESTRACK_QP) { /* Special case to ensure that LQPN points to right QP */ struct ib_qp *qp = container_of(res, struct ib_qp, res); @@ -236,38 +249,7 @@ static void rdma_restrack_add(struct rdma_restrack_entry *res) if (!ret) res->valid = true; } - -/** - * rdma_restrack_kadd() - add kernel object to the reource tracking database - * @res: resource entry - */ -void rdma_restrack_kadd(struct rdma_restrack_entry *res) -{ - res->task = NULL; - set_kern_name(res); - res->user = false; - rdma_restrack_add(res); -} -EXPORT_SYMBOL(rdma_restrack_kadd); - -/** - * rdma_restrack_uadd() - add user object to the reource tracking database - * @res: resource entry - */ -void rdma_restrack_uadd(struct rdma_restrack_entry *res) -{ - if ((res->type != RDMA_RESTRACK_CM_ID) && - (res->type != RDMA_RESTRACK_COUNTER)) - res->task = NULL; - - if (!res->task) - rdma_restrack_set_task(res, NULL); - res->kern_name = NULL; - - res->user = true; - rdma_restrack_add(res); -} -EXPORT_SYMBOL(rdma_restrack_uadd); +EXPORT_SYMBOL(rdma_restrack_add); int __must_check rdma_restrack_get(struct rdma_restrack_entry *res) { @@ -305,6 +287,10 @@ static void restrack_release(struct kref *kref) struct rdma_restrack_entry *res; res = container_of(kref, struct rdma_restrack_entry, kref); + if (res->task) { + put_task_struct(res->task); + res->task = NULL; + } complete(&res->comp); } @@ -314,14 +300,23 @@ int rdma_restrack_put(struct rdma_restrack_entry *res) } EXPORT_SYMBOL(rdma_restrack_put); +/** + * rdma_restrack_del() - delete object from the reource tracking database + * @res: resource entry + */ void rdma_restrack_del(struct rdma_restrack_entry *res) { struct rdma_restrack_entry *old; struct rdma_restrack_root *rt; struct ib_device *dev; - if (!res->valid) - goto out; + if (!res->valid) { + if (res->task) { + put_task_struct(res->task); + res->task = NULL; + } + return; + } dev = res_to_dev(res); if (WARN_ON(!dev)) @@ -330,16 +325,12 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) rt = &dev->res[res->type]; old = xa_erase(&rt->xa, res->id); + if (res->type == RDMA_RESTRACK_MR || res->type == RDMA_RESTRACK_QP) + return; WARN_ON(old != res); res->valid = false; rdma_restrack_put(res); wait_for_completion(&res->comp); - -out: - if (res->task) { - put_task_struct(res->task); - res->task = NULL; - } } EXPORT_SYMBOL(rdma_restrack_del); diff --git a/drivers/infiniband/core/restrack.h b/drivers/infiniband/core/restrack.h index d084e5f89849..6a04fc41f738 100644 --- a/drivers/infiniband/core/restrack.h +++ b/drivers/infiniband/core/restrack.h @@ -25,6 +25,12 @@ struct rdma_restrack_root { int rdma_restrack_init(struct ib_device *dev); void rdma_restrack_clean(struct ib_device *dev); -void rdma_restrack_attach_task(struct rdma_restrack_entry *res, - struct task_struct *task); +void rdma_restrack_add(struct rdma_restrack_entry *res); +void rdma_restrack_del(struct rdma_restrack_entry *res); +void rdma_restrack_new(struct rdma_restrack_entry *res, + enum rdma_restrack_type type); +void rdma_restrack_set_name(struct rdma_restrack_entry *res, + const char *caller); +void rdma_restrack_parent_name(struct rdma_restrack_entry *dst, + const struct rdma_restrack_entry *parent); #endif /* _RDMA_CORE_RESTRACK_H_ */ diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index c11e50510e49..914cddea525d 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -59,7 +59,7 @@ struct ib_port { struct gid_attr_group *gid_attr_group; struct attribute_group gid_group; struct attribute_group *pkey_group; - struct attribute_group *pma_table; + const struct attribute_group *pma_table; struct attribute_group *hw_stats_ag; struct rdma_hw_stats *hw_stats; u8 port_num; @@ -387,7 +387,8 @@ static ssize_t _show_port_gid_attr( gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index); if (IS_ERR(gid_attr)) - return PTR_ERR(gid_attr); + /* -EINVAL is returned for user space compatibility reasons. */ + return -EINVAL; ret = print(gid_attr, buf); rdma_put_gid_attr(gid_attr); @@ -653,17 +654,17 @@ static struct attribute *pma_attrs_noietf[] = { NULL }; -static struct attribute_group pma_group = { +static const struct attribute_group pma_group = { .name = "counters", .attrs = pma_attrs }; -static struct attribute_group pma_group_ext = { +static const struct attribute_group pma_group_ext = { .name = "counters", .attrs = pma_attrs_ext }; -static struct attribute_group pma_group_noietf = { +static const struct attribute_group pma_group_noietf = { .name = "counters", .attrs = pma_attrs_noietf }; @@ -778,8 +779,8 @@ err: * Figure out which counter table to use depending on * the device capabilities. */ -static struct attribute_group *get_counter_table(struct ib_device *dev, - int port_num) +static const struct attribute_group *get_counter_table(struct ib_device *dev, + int port_num) { struct ib_class_port_info cpi; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 1d184ea05eba..ffe2563ad345 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -80,7 +80,6 @@ struct ucma_file { struct list_head ctx_list; struct list_head event_list; wait_queue_head_t poll_wait; - struct workqueue_struct *close_wq; }; struct ucma_context { @@ -88,7 +87,7 @@ struct ucma_context { struct completion comp; refcount_t ref; int events_reported; - int backlog; + atomic_t backlog; struct ucma_file *file; struct rdma_cm_id *cm_id; @@ -96,11 +95,6 @@ struct ucma_context { u64 uid; struct list_head list; - struct list_head mc_list; - /* mark that device is in process of destroying the internal HW - * resources, protected by the ctx_table lock - */ - int closing; /* sync between removal event and id destroy, protected by file mut */ int destroying; struct work_struct close_work; @@ -113,23 +107,22 @@ struct ucma_multicast { u64 uid; u8 join_state; - struct list_head list; struct sockaddr_storage addr; }; struct ucma_event { struct ucma_context *ctx; + struct ucma_context *conn_req_ctx; struct ucma_multicast *mc; struct list_head list; - struct rdma_cm_id *cm_id; struct rdma_ucm_event_resp resp; - struct work_struct close_work; }; static DEFINE_XARRAY_ALLOC(ctx_table); static DEFINE_XARRAY_ALLOC(multicast_table); static const struct file_operations ucma_fops; +static int __destroy_id(struct ucma_context *ctx); static inline struct ucma_context *_ucma_find_context(int id, struct ucma_file *file) @@ -139,7 +132,7 @@ static inline struct ucma_context *_ucma_find_context(int id, ctx = xa_load(&ctx_table, id); if (!ctx) ctx = ERR_PTR(-ENOENT); - else if (ctx->file != file || !ctx->cm_id) + else if (ctx->file != file) ctx = ERR_PTR(-EINVAL); return ctx; } @@ -150,12 +143,9 @@ static struct ucma_context *ucma_get_ctx(struct ucma_file *file, int id) xa_lock(&ctx_table); ctx = _ucma_find_context(id, file); - if (!IS_ERR(ctx)) { - if (ctx->closing) - ctx = ERR_PTR(-EIO); - else - refcount_inc(&ctx->ref); - } + if (!IS_ERR(ctx)) + if (!refcount_inc_not_zero(&ctx->ref)) + ctx = ERR_PTR(-ENXIO); xa_unlock(&ctx_table); return ctx; } @@ -183,14 +173,6 @@ static struct ucma_context *ucma_get_ctx_dev(struct ucma_file *file, int id) return ctx; } -static void ucma_close_event_id(struct work_struct *work) -{ - struct ucma_event *uevent_close = container_of(work, struct ucma_event, close_work); - - rdma_destroy_id(uevent_close->cm_id); - kfree(uevent_close); -} - static void ucma_close_id(struct work_struct *work) { struct ucma_context *ctx = container_of(work, struct ucma_context, close_work); @@ -203,6 +185,14 @@ static void ucma_close_id(struct work_struct *work) wait_for_completion(&ctx->comp); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); + + /* + * At this point ctx->ref is zero so the only place the ctx can be is in + * a uevent or in __destroy_id(). Since the former doesn't touch + * ctx->cm_id and the latter sync cancels this, there is no races with + * this store. + */ + ctx->cm_id = NULL; } static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) @@ -216,39 +206,23 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) INIT_WORK(&ctx->close_work, ucma_close_id); refcount_set(&ctx->ref, 1); init_completion(&ctx->comp); - INIT_LIST_HEAD(&ctx->mc_list); + /* So list_del() will work if we don't do ucma_finish_ctx() */ + INIT_LIST_HEAD(&ctx->list); ctx->file = file; mutex_init(&ctx->mutex); - if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) - goto error; - - list_add_tail(&ctx->list, &file->ctx_list); + if (xa_alloc(&ctx_table, &ctx->id, NULL, xa_limit_32b, GFP_KERNEL)) { + kfree(ctx); + return NULL; + } return ctx; - -error: - kfree(ctx); - return NULL; } -static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx) +static void ucma_finish_ctx(struct ucma_context *ctx) { - struct ucma_multicast *mc; - - mc = kzalloc(sizeof(*mc), GFP_KERNEL); - if (!mc) - return NULL; - - mc->ctx = ctx; - if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL)) - goto error; - - list_add_tail(&mc->list, &ctx->mc_list); - return mc; - -error: - kfree(mc); - return NULL; + lockdep_assert_held(&ctx->file->mut); + list_add_tail(&ctx->list, &ctx->file->ctx_list); + xa_store(&ctx_table, ctx->id, ctx, GFP_KERNEL); } static void ucma_copy_conn_event(struct rdma_ucm_conn_param *dst, @@ -280,10 +254,15 @@ static void ucma_copy_ud_event(struct ib_device *device, dst->qkey = src->qkey; } -static void ucma_set_event_context(struct ucma_context *ctx, - struct rdma_cm_event *event, - struct ucma_event *uevent) +static struct ucma_event *ucma_create_uevent(struct ucma_context *ctx, + struct rdma_cm_event *event) { + struct ucma_event *uevent; + + uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); + if (!uevent) + return NULL; + uevent->ctx = ctx; switch (event->event) { case RDMA_CM_EVENT_MULTICAST_JOIN: @@ -298,64 +277,10 @@ static void ucma_set_event_context(struct ucma_context *ctx, uevent->resp.id = ctx->id; break; } -} - -/* Called with file->mut locked for the relevant context. */ -static void ucma_removal_event_handler(struct rdma_cm_id *cm_id) -{ - struct ucma_context *ctx = cm_id->context; - struct ucma_event *con_req_eve; - int event_found = 0; - - if (ctx->destroying) - return; - - /* only if context is pointing to cm_id that it owns it and can be - * queued to be closed, otherwise that cm_id is an inflight one that - * is part of that context event list pending to be detached and - * reattached to its new context as part of ucma_get_event, - * handled separately below. - */ - if (ctx->cm_id == cm_id) { - xa_lock(&ctx_table); - ctx->closing = 1; - xa_unlock(&ctx_table); - queue_work(ctx->file->close_wq, &ctx->close_work); - return; - } - - list_for_each_entry(con_req_eve, &ctx->file->event_list, list) { - if (con_req_eve->cm_id == cm_id && - con_req_eve->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { - list_del(&con_req_eve->list); - INIT_WORK(&con_req_eve->close_work, ucma_close_event_id); - queue_work(ctx->file->close_wq, &con_req_eve->close_work); - event_found = 1; - break; - } - } - if (!event_found) - pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n"); -} - -static int ucma_event_handler(struct rdma_cm_id *cm_id, - struct rdma_cm_event *event) -{ - struct ucma_event *uevent; - struct ucma_context *ctx = cm_id->context; - int ret = 0; - - uevent = kzalloc(sizeof(*uevent), GFP_KERNEL); - if (!uevent) - return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; - - mutex_lock(&ctx->file->mut); - uevent->cm_id = cm_id; - ucma_set_event_context(ctx, event, uevent); uevent->resp.event = event->event; uevent->resp.status = event->status; - if (cm_id->qp_type == IB_QPT_UD) - ucma_copy_ud_event(cm_id->device, &uevent->resp.param.ud, + if (ctx->cm_id->qp_type == IB_QPT_UD) + ucma_copy_ud_event(ctx->cm_id->device, &uevent->resp.param.ud, &event->param.ud); else ucma_copy_conn_event(&uevent->resp.param.conn, @@ -363,46 +288,84 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id, uevent->resp.ece.vendor_id = event->ece.vendor_id; uevent->resp.ece.attr_mod = event->ece.attr_mod; + return uevent; +} - if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { - if (!ctx->backlog) { - ret = -ENOMEM; - kfree(uevent); - goto out; - } - ctx->backlog--; - } else if (!ctx->uid || ctx->cm_id != cm_id) { - /* - * We ignore events for new connections until userspace has set - * their context. This can only happen if an error occurs on a - * new connection before the user accepts it. This is okay, - * since the accept will just fail later. However, we do need - * to release the underlying HW resources in case of a device - * removal event. - */ - if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) - ucma_removal_event_handler(cm_id); +static int ucma_connect_event_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct ucma_context *listen_ctx = cm_id->context; + struct ucma_context *ctx; + struct ucma_event *uevent; - kfree(uevent); - goto out; + if (!atomic_add_unless(&listen_ctx->backlog, -1, 0)) + return -ENOMEM; + ctx = ucma_alloc_ctx(listen_ctx->file); + if (!ctx) + goto err_backlog; + ctx->cm_id = cm_id; + + uevent = ucma_create_uevent(listen_ctx, event); + if (!uevent) + goto err_alloc; + uevent->conn_req_ctx = ctx; + uevent->resp.id = ctx->id; + + ctx->cm_id->context = ctx; + + mutex_lock(&ctx->file->mut); + ucma_finish_ctx(ctx); + list_add_tail(&uevent->list, &ctx->file->event_list); + mutex_unlock(&ctx->file->mut); + wake_up_interruptible(&ctx->file->poll_wait); + return 0; + +err_alloc: + xa_erase(&ctx_table, ctx->id); + kfree(ctx); +err_backlog: + atomic_inc(&listen_ctx->backlog); + /* Returning error causes the new ID to be destroyed */ + return -ENOMEM; +} + +static int ucma_event_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + struct ucma_event *uevent; + struct ucma_context *ctx = cm_id->context; + + if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) + return ucma_connect_event_handler(cm_id, event); + + /* + * We ignore events for new connections until userspace has set their + * context. This can only happen if an error occurs on a new connection + * before the user accepts it. This is okay, since the accept will just + * fail later. However, we do need to release the underlying HW + * resources in case of a device removal event. + */ + if (ctx->uid) { + uevent = ucma_create_uevent(ctx, event); + if (!uevent) + return 0; + + mutex_lock(&ctx->file->mut); + list_add_tail(&uevent->list, &ctx->file->event_list); + mutex_unlock(&ctx->file->mut); + wake_up_interruptible(&ctx->file->poll_wait); } - list_add_tail(&uevent->list, &ctx->file->event_list); - wake_up_interruptible(&ctx->file->poll_wait); - if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL) - ucma_removal_event_handler(cm_id); -out: - mutex_unlock(&ctx->file->mut); - return ret; + if (event->event == RDMA_CM_EVENT_DEVICE_REMOVAL && !ctx->destroying) + queue_work(system_unbound_wq, &ctx->close_work); + return 0; } static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { - struct ucma_context *ctx; struct rdma_ucm_get_event cmd; struct ucma_event *uevent; - int ret = 0; /* * Old 32 bit user space does not send the 4 byte padding in the @@ -429,35 +392,25 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf, mutex_lock(&file->mut); } - uevent = list_entry(file->event_list.next, struct ucma_event, list); - - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) { - ctx = ucma_alloc_ctx(file); - if (!ctx) { - ret = -ENOMEM; - goto done; - } - uevent->ctx->backlog++; - ctx->cm_id = uevent->cm_id; - ctx->cm_id->context = ctx; - uevent->resp.id = ctx->id; - } + uevent = list_first_entry(&file->event_list, struct ucma_event, list); if (copy_to_user(u64_to_user_ptr(cmd.response), &uevent->resp, min_t(size_t, out_len, sizeof(uevent->resp)))) { - ret = -EFAULT; - goto done; + mutex_unlock(&file->mut); + return -EFAULT; } list_del(&uevent->list); uevent->ctx->events_reported++; if (uevent->mc) uevent->mc->events_reported++; - kfree(uevent); -done: + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) + atomic_inc(&uevent->ctx->backlog); mutex_unlock(&file->mut); - return ret; + + kfree(uevent); + return 0; } static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_type) @@ -498,58 +451,60 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf, if (ret) return ret; - mutex_lock(&file->mut); ctx = ucma_alloc_ctx(file); - mutex_unlock(&file->mut); if (!ctx) return -ENOMEM; ctx->uid = cmd.uid; - cm_id = __rdma_create_id(current->nsproxy->net_ns, - ucma_event_handler, ctx, cmd.ps, qp_type, NULL); + cm_id = rdma_create_user_id(ucma_event_handler, ctx, cmd.ps, qp_type); if (IS_ERR(cm_id)) { ret = PTR_ERR(cm_id); goto err1; } + ctx->cm_id = cm_id; resp.id = ctx->id; if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) { - ret = -EFAULT; - goto err2; + xa_erase(&ctx_table, ctx->id); + __destroy_id(ctx); + return -EFAULT; } - ctx->cm_id = cm_id; + mutex_lock(&file->mut); + ucma_finish_ctx(ctx); + mutex_unlock(&file->mut); return 0; -err2: - rdma_destroy_id(cm_id); err1: xa_erase(&ctx_table, ctx->id); - mutex_lock(&file->mut); - list_del(&ctx->list); - mutex_unlock(&file->mut); kfree(ctx); return ret; } static void ucma_cleanup_multicast(struct ucma_context *ctx) { - struct ucma_multicast *mc, *tmp; + struct ucma_multicast *mc; + unsigned long index; - mutex_lock(&ctx->file->mut); - list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { - list_del(&mc->list); - xa_erase(&multicast_table, mc->id); + xa_for_each(&multicast_table, index, mc) { + if (mc->ctx != ctx) + continue; + /* + * At this point mc->ctx->ref is 0 so the mc cannot leave the + * lock on the reader and this is enough serialization + */ + xa_erase(&multicast_table, index); kfree(mc); } - mutex_unlock(&ctx->file->mut); } static void ucma_cleanup_mc_events(struct ucma_multicast *mc) { struct ucma_event *uevent, *tmp; + rdma_lock_handler(mc->ctx->cm_id); + mutex_lock(&mc->ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &mc->ctx->file->event_list, list) { if (uevent->mc != mc) continue; @@ -557,6 +512,8 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) list_del(&uevent->list); kfree(uevent); } + mutex_unlock(&mc->ctx->file->mut); + rdma_unlock_handler(mc->ctx->cm_id); } /* @@ -564,10 +521,6 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) * this point, no new events will be reported from the hardware. However, we * still need to cleanup the UCMA context for this ID. Specifically, there * might be events that have not yet been consumed by the user space software. - * These might include pending connect requests which we have not completed - * processing. We cannot call rdma_destroy_id while holding the lock of the - * context (file->mut), as it might cause a deadlock. We therefore extract all - * relevant events from the context pending events list while holding the * mutex. After that we release them as needed. */ static int ucma_free_ctx(struct ucma_context *ctx) @@ -576,31 +529,57 @@ static int ucma_free_ctx(struct ucma_context *ctx) struct ucma_event *uevent, *tmp; LIST_HEAD(list); - ucma_cleanup_multicast(ctx); /* Cleanup events not yet reported to the user. */ mutex_lock(&ctx->file->mut); list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { - if (uevent->ctx == ctx) + if (uevent->ctx == ctx || uevent->conn_req_ctx == ctx) list_move_tail(&uevent->list, &list); } list_del(&ctx->list); + events_reported = ctx->events_reported; mutex_unlock(&ctx->file->mut); + /* + * If this was a listening ID then any connections spawned from it + * that have not been delivered to userspace are cleaned up too. + * Must be done outside any locks. + */ list_for_each_entry_safe(uevent, tmp, &list, list) { list_del(&uevent->list); - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) - rdma_destroy_id(uevent->cm_id); + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST && + uevent->conn_req_ctx != ctx) + __destroy_id(uevent->conn_req_ctx); kfree(uevent); } - events_reported = ctx->events_reported; mutex_destroy(&ctx->mutex); kfree(ctx); return events_reported; } +static int __destroy_id(struct ucma_context *ctx) +{ + /* + * If the refcount is already 0 then ucma_close_id() has already + * destroyed the cm_id, otherwise holding the refcount keeps cm_id + * valid. Prevent queue_work() from being called. + */ + if (refcount_inc_not_zero(&ctx->ref)) { + rdma_lock_handler(ctx->cm_id); + ctx->destroying = 1; + rdma_unlock_handler(ctx->cm_id); + ucma_put_ctx(ctx); + } + + cancel_work_sync(&ctx->close_work); + /* At this point it's guaranteed that there is no inflight closing task */ + if (ctx->cm_id) + ucma_close_id(&ctx->close_work); + return ucma_free_ctx(ctx); +} + static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, int in_len, int out_len) { @@ -624,24 +603,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - mutex_lock(&ctx->file->mut); - ctx->destroying = 1; - mutex_unlock(&ctx->file->mut); - - flush_workqueue(ctx->file->close_wq); - /* At this point it's guaranteed that there is no inflight - * closing task */ - xa_lock(&ctx_table); - if (!ctx->closing) { - xa_unlock(&ctx_table); - ucma_put_ctx(ctx); - wait_for_completion(&ctx->comp); - rdma_destroy_id(ctx->cm_id); - } else { - xa_unlock(&ctx_table); - } - - resp.events_reported = ucma_free_ctx(ctx); + resp.events_reported = __destroy_id(ctx); if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; @@ -1124,10 +1086,12 @@ static ssize_t ucma_listen(struct ucma_file *file, const char __user *inbuf, if (IS_ERR(ctx)) return PTR_ERR(ctx); - ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? - cmd.backlog : max_backlog; + if (cmd.backlog <= 0 || cmd.backlog > max_backlog) + cmd.backlog = max_backlog; + atomic_set(&ctx->backlog, cmd.backlog); + mutex_lock(&ctx->mutex); - ret = rdma_listen(ctx->cm_id, ctx->backlog); + ret = rdma_listen(ctx->cm_id, cmd.backlog); mutex_unlock(&ctx->mutex); ucma_put_ctx(ctx); return ret; @@ -1160,16 +1124,20 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, if (cmd.conn_param.valid) { ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); - mutex_lock(&file->mut); mutex_lock(&ctx->mutex); - ret = __rdma_accept_ece(ctx->cm_id, &conn_param, NULL, &ece); - mutex_unlock(&ctx->mutex); - if (!ret) + rdma_lock_handler(ctx->cm_id); + ret = rdma_accept_ece(ctx->cm_id, &conn_param, &ece); + if (!ret) { + /* The uid must be set atomically with the handler */ ctx->uid = cmd.uid; - mutex_unlock(&file->mut); + } + rdma_unlock_handler(ctx->cm_id); + mutex_unlock(&ctx->mutex); } else { mutex_lock(&ctx->mutex); - ret = __rdma_accept_ece(ctx->cm_id, NULL, NULL, &ece); + rdma_lock_handler(ctx->cm_id); + ret = rdma_accept_ece(ctx->cm_id, NULL, &ece); + rdma_unlock_handler(ctx->cm_id); mutex_unlock(&ctx->mutex); } ucma_put_ctx(ctx); @@ -1482,44 +1450,52 @@ static ssize_t ucma_process_join(struct ucma_file *file, if (IS_ERR(ctx)) return PTR_ERR(ctx); - mutex_lock(&file->mut); - mc = ucma_alloc_multicast(ctx); + mc = kzalloc(sizeof(*mc), GFP_KERNEL); if (!mc) { ret = -ENOMEM; - goto err1; + goto err_put_ctx; } + + mc->ctx = ctx; mc->join_state = join_state; mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); + + if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, + GFP_KERNEL)) { + ret = -ENOMEM; + goto err_free_mc; + } + mutex_lock(&ctx->mutex); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); mutex_unlock(&ctx->mutex); if (ret) - goto err2; + goto err_xa_erase; resp.id = mc->id; if (copy_to_user(u64_to_user_ptr(cmd->response), &resp, sizeof(resp))) { ret = -EFAULT; - goto err3; + goto err_leave_multicast; } xa_store(&multicast_table, mc->id, mc, 0); - mutex_unlock(&file->mut); ucma_put_ctx(ctx); return 0; -err3: +err_leave_multicast: + mutex_lock(&ctx->mutex); rdma_leave_multicast(ctx->cm_id, (struct sockaddr *) &mc->addr); + mutex_unlock(&ctx->mutex); ucma_cleanup_mc_events(mc); -err2: +err_xa_erase: xa_erase(&multicast_table, mc->id); - list_del(&mc->list); +err_free_mc: kfree(mc); -err1: - mutex_unlock(&file->mut); +err_put_ctx: ucma_put_ctx(ctx); return ret; } @@ -1581,7 +1557,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, mc = xa_load(&multicast_table, cmd.id); if (!mc) mc = ERR_PTR(-ENOENT); - else if (mc->ctx->file != file) + else if (READ_ONCE(mc->ctx->file) != file) mc = ERR_PTR(-EINVAL); else if (!refcount_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); @@ -1598,10 +1574,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); mutex_unlock(&mc->ctx->mutex); - mutex_lock(&mc->ctx->file->mut); ucma_cleanup_mc_events(mc); - list_del(&mc->list); - mutex_unlock(&mc->ctx->file->mut); ucma_put_ctx(mc->ctx); resp.events_reported = mc->events_reported; @@ -1614,45 +1587,15 @@ out: return ret; } -static void ucma_lock_files(struct ucma_file *file1, struct ucma_file *file2) -{ - /* Acquire mutex's based on pointer comparison to prevent deadlock. */ - if (file1 < file2) { - mutex_lock(&file1->mut); - mutex_lock_nested(&file2->mut, SINGLE_DEPTH_NESTING); - } else { - mutex_lock(&file2->mut); - mutex_lock_nested(&file1->mut, SINGLE_DEPTH_NESTING); - } -} - -static void ucma_unlock_files(struct ucma_file *file1, struct ucma_file *file2) -{ - if (file1 < file2) { - mutex_unlock(&file2->mut); - mutex_unlock(&file1->mut); - } else { - mutex_unlock(&file1->mut); - mutex_unlock(&file2->mut); - } -} - -static void ucma_move_events(struct ucma_context *ctx, struct ucma_file *file) -{ - struct ucma_event *uevent, *tmp; - - list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) - if (uevent->ctx == ctx) - list_move_tail(&uevent->list, &file->event_list); -} - static ssize_t ucma_migrate_id(struct ucma_file *new_file, const char __user *inbuf, int in_len, int out_len) { struct rdma_ucm_migrate_id cmd; struct rdma_ucm_migrate_resp resp; + struct ucma_event *uevent, *tmp; struct ucma_context *ctx; + LIST_HEAD(event_list); struct fd f; struct ucma_file *cur_file; int ret = 0; @@ -1668,40 +1611,53 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file, ret = -EINVAL; goto file_put; } + cur_file = f.file->private_data; /* Validate current fd and prevent destruction of id. */ - ctx = ucma_get_ctx(f.file->private_data, cmd.id); + ctx = ucma_get_ctx(cur_file, cmd.id); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto file_put; } - cur_file = ctx->file; - if (cur_file == new_file) { - resp.events_reported = ctx->events_reported; - goto response; - } - + rdma_lock_handler(ctx->cm_id); /* - * Migrate events between fd's, maintaining order, and avoiding new - * events being added before existing events. + * ctx->file can only be changed under the handler & xa_lock. xa_load() + * must be checked again to ensure the ctx hasn't begun destruction + * since the ucma_get_ctx(). */ - ucma_lock_files(cur_file, new_file); xa_lock(&ctx_table); - - list_move_tail(&ctx->list, &new_file->ctx_list); - ucma_move_events(ctx, new_file); + if (_ucma_find_context(cmd.id, cur_file) != ctx) { + xa_unlock(&ctx_table); + ret = -ENOENT; + goto err_unlock; + } ctx->file = new_file; - resp.events_reported = ctx->events_reported; - xa_unlock(&ctx_table); - ucma_unlock_files(cur_file, new_file); -response: + mutex_lock(&cur_file->mut); + list_del(&ctx->list); + /* + * At this point lock_handler() prevents addition of new uevents for + * this ctx. + */ + list_for_each_entry_safe(uevent, tmp, &cur_file->event_list, list) + if (uevent->ctx == ctx) + list_move_tail(&uevent->list, &event_list); + resp.events_reported = ctx->events_reported; + mutex_unlock(&cur_file->mut); + + mutex_lock(&new_file->mut); + list_add_tail(&ctx->list, &new_file->ctx_list); + list_splice_tail(&event_list, &new_file->event_list); + mutex_unlock(&new_file->mut); + if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) ret = -EFAULT; +err_unlock: + rdma_unlock_handler(ctx->cm_id); ucma_put_ctx(ctx); file_put: fdput(f); @@ -1801,13 +1757,6 @@ static int ucma_open(struct inode *inode, struct file *filp) if (!file) return -ENOMEM; - file->close_wq = alloc_ordered_workqueue("ucma_close_id", - WQ_MEM_RECLAIM); - if (!file->close_wq) { - kfree(file); - return -ENOMEM; - } - INIT_LIST_HEAD(&file->event_list); INIT_LIST_HEAD(&file->ctx_list); init_waitqueue_head(&file->poll_wait); @@ -1822,37 +1771,22 @@ static int ucma_open(struct inode *inode, struct file *filp) static int ucma_close(struct inode *inode, struct file *filp) { struct ucma_file *file = filp->private_data; - struct ucma_context *ctx, *tmp; - mutex_lock(&file->mut); - list_for_each_entry_safe(ctx, tmp, &file->ctx_list, list) { - ctx->destroying = 1; - mutex_unlock(&file->mut); + /* + * All paths that touch ctx_list or ctx_list starting from write() are + * prevented by this being a FD release function. The list_add_tail() in + * ucma_connect_event_handler() can run concurrently, however it only + * adds to the list *after* a listening ID. By only reading the first of + * the list, and relying on __destroy_id() to block + * ucma_connect_event_handler(), no additional locking is needed. + */ + while (!list_empty(&file->ctx_list)) { + struct ucma_context *ctx = list_first_entry( + &file->ctx_list, struct ucma_context, list); xa_erase(&ctx_table, ctx->id); - flush_workqueue(file->close_wq); - /* At that step once ctx was marked as destroying and workqueue - * was flushed we are safe from any inflights handlers that - * might put other closing task. - */ - xa_lock(&ctx_table); - if (!ctx->closing) { - xa_unlock(&ctx_table); - ucma_put_ctx(ctx); - wait_for_completion(&ctx->comp); - /* rdma_destroy_id ensures that no event handlers are - * inflight for that id before releasing it. - */ - rdma_destroy_id(ctx->cm_id); - } else { - xa_unlock(&ctx_table); - } - - ucma_free_ctx(ctx); - mutex_lock(&file->mut); + __destroy_id(ctx); } - mutex_unlock(&file->mut); - destroy_workqueue(file->close_wq); kfree(file); return 0; } diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 831bff8d52e5..e9fecbdf391b 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "uverbs.h" @@ -60,73 +61,6 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d sg_free_table(&umem->sg_head); } -/* ib_umem_add_sg_table - Add N contiguous pages to scatter table - * - * sg: current scatterlist entry - * page_list: array of npage struct page pointers - * npages: number of pages in page_list - * max_seg_sz: maximum segment size in bytes - * nents: [out] number of entries in the scatterlist - * - * Return new end of scatterlist - */ -static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, - struct page **page_list, - unsigned long npages, - unsigned int max_seg_sz, - int *nents) -{ - unsigned long first_pfn; - unsigned long i = 0; - bool update_cur_sg = false; - bool first = !sg_page(sg); - - /* Check if new page_list is contiguous with end of previous page_list. - * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0. - */ - if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) == - page_to_pfn(page_list[0]))) - update_cur_sg = true; - - while (i != npages) { - unsigned long len; - struct page *first_page = page_list[i]; - - first_pfn = page_to_pfn(first_page); - - /* Compute the number of contiguous pages we have starting - * at i - */ - for (len = 0; i != npages && - first_pfn + len == page_to_pfn(page_list[i]) && - len < (max_seg_sz >> PAGE_SHIFT); - len++) - i++; - - /* Squash N contiguous pages from page_list into current sge */ - if (update_cur_sg) { - if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) { - sg_set_page(sg, sg_page(sg), - sg->length + (len << PAGE_SHIFT), - 0); - update_cur_sg = false; - continue; - } - update_cur_sg = false; - } - - /* Squash N contiguous pages into next sge or first sge */ - if (!first) - sg = sg_next(sg); - - (*nents)++; - sg_set_page(sg, first_page, len << PAGE_SHIFT, 0); - first = false; - } - - return sg; -} - /** * ib_umem_find_best_pgsz - Find best HW page size to use for this MR * @@ -146,18 +80,28 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long virt) { struct scatterlist *sg; - unsigned int best_pg_bit; unsigned long va, pgoff; dma_addr_t mask; int i; + /* rdma_for_each_block() has a bug if the page size is smaller than the + * page size used to build the umem. For now prevent smaller page sizes + * from being returned. + */ + pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT); + /* At minimum, drivers must support PAGE_SIZE or smaller */ if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0)))) return 0; - va = virt; - /* max page size not to exceed MR length */ - mask = roundup_pow_of_two(umem->length); + umem->iova = va = virt; + /* The best result is the smallest page size that results in the minimum + * number of required pages. Compute the largest page size that could + * work based on VA address bits that don't change. + */ + mask = pgsz_bitmap & + GENMASK(BITS_PER_LONG - 1, + bits_per((umem->length - 1 + virt) ^ virt)); /* offset into first SGL */ pgoff = umem->address & ~PAGE_MASK; @@ -175,9 +119,14 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, mask |= va; pgoff = 0; } - best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap); - return BIT_ULL(best_pg_bit); + /* The mask accumulates 1's in each position where the VA and physical + * address differ, thus the length of trailing 0 is the largest page + * size that can pass the VA through to the physical. + */ + if (mask) + pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0); + return rounddown_pow_of_two(pgsz_bitmap); } EXPORT_SYMBOL(ib_umem_find_best_pgsz); @@ -201,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, struct mm_struct *mm; unsigned long npages; int ret; - struct scatterlist *sg; + struct scatterlist *sg = NULL; unsigned int gup_flags = FOLL_WRITE; /* @@ -224,6 +173,11 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, umem->ibdev = device; umem->length = size; umem->address = addr; + /* + * Drivers should call ib_umem_find_best_pgsz() to set the iova + * correctly. + */ + umem->iova = addr; umem->writable = ib_access_writable(access); umem->owning_mm = mm = current->mm; mmgrab(mm); @@ -251,15 +205,9 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, cur_base = addr & PAGE_MASK; - ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); - if (ret) - goto vma; - if (!umem->writable) gup_flags |= FOLL_FORCE; - sg = umem->sg_head.sgl; - while (npages) { cond_resched(); ret = pin_user_pages_fast(cur_base, @@ -271,15 +219,19 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, goto umem_release; cur_base += ret * PAGE_SIZE; - npages -= ret; - - sg = ib_umem_add_sg_table(sg, page_list, ret, - dma_get_max_seg_size(device->dma_device), - &umem->sg_nents); + npages -= ret; + sg = __sg_alloc_table_from_pages( + &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, + dma_get_max_seg_size(device->dma_device), sg, npages, + GFP_KERNEL); + umem->sg_nents = umem->sg_head.nents; + if (IS_ERR(sg)) { + unpin_user_pages_dirty_lock(page_list, ret, 0); + ret = PTR_ERR(sg); + goto umem_release; + } } - sg_mark_end(sg); - if (access & IB_ACCESS_RELAXED_ORDERING) dma_attr |= DMA_ATTR_WEAK_ORDERING; @@ -297,7 +249,6 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, umem_release: __ib_umem_release(device, umem, 0); -vma: atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm); out: free_page((unsigned long) page_list); @@ -329,18 +280,6 @@ void ib_umem_release(struct ib_umem *umem) } EXPORT_SYMBOL(ib_umem_release); -int ib_umem_page_count(struct ib_umem *umem) -{ - int i, n = 0; - struct scatterlist *sg; - - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) - n += sg_dma_len(sg) >> PAGE_SHIFT; - - return n; -} -EXPORT_SYMBOL(ib_umem_page_count); - /* * Copy from the given ib_umem's pages to the given buffer. * diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index cc6b4befde7c..323f6cf00682 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -60,7 +61,7 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, size_t page_size = 1UL << umem_odp->page_shift; unsigned long start; unsigned long end; - size_t pages; + size_t ndmas, npfns; start = ALIGN_DOWN(umem_odp->umem.address, page_size); if (check_add_overflow(umem_odp->umem.address, @@ -71,20 +72,21 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, if (unlikely(end < page_size)) return -EOVERFLOW; - pages = (end - start) >> umem_odp->page_shift; - if (!pages) + ndmas = (end - start) >> umem_odp->page_shift; + if (!ndmas) return -EINVAL; - umem_odp->page_list = kvcalloc( - pages, sizeof(*umem_odp->page_list), GFP_KERNEL); - if (!umem_odp->page_list) + npfns = (end - start) >> PAGE_SHIFT; + umem_odp->pfn_list = kvcalloc( + npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL); + if (!umem_odp->pfn_list) return -ENOMEM; umem_odp->dma_list = kvcalloc( - pages, sizeof(*umem_odp->dma_list), GFP_KERNEL); + ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL); if (!umem_odp->dma_list) { ret = -ENOMEM; - goto out_page_list; + goto out_pfn_list; } ret = mmu_interval_notifier_insert(&umem_odp->notifier, @@ -98,8 +100,8 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, out_dma_list: kvfree(umem_odp->dma_list); -out_page_list: - kvfree(umem_odp->page_list); +out_pfn_list: + kvfree(umem_odp->pfn_list); return ret; } @@ -276,7 +278,7 @@ void ib_umem_odp_release(struct ib_umem_odp *umem_odp) mutex_unlock(&umem_odp->umem_mutex); mmu_interval_notifier_remove(&umem_odp->notifier); kvfree(umem_odp->dma_list); - kvfree(umem_odp->page_list); + kvfree(umem_odp->pfn_list); } put_pid(umem_odp->tgid); kfree(umem_odp); @@ -287,87 +289,56 @@ EXPORT_SYMBOL(ib_umem_odp_release); * Map for DMA and insert a single page into the on-demand paging page tables. * * @umem: the umem to insert the page to. - * @page_index: index in the umem to add the page to. + * @dma_index: index in the umem to add the dma to. * @page: the page struct to map and add. * @access_mask: access permissions needed for this page. * @current_seq: sequence number for synchronization with invalidations. * the sequence number is taken from * umem_odp->notifiers_seq. * - * The function returns -EFAULT if the DMA mapping operation fails. It returns - * -EAGAIN if a concurrent invalidation prevents us from updating the page. + * The function returns -EFAULT if the DMA mapping operation fails. * - * The page is released via put_page even if the operation failed. For on-demand - * pinning, the page is released whenever it isn't stored in the umem. */ static int ib_umem_odp_map_dma_single_page( struct ib_umem_odp *umem_odp, - unsigned int page_index, + unsigned int dma_index, struct page *page, - u64 access_mask, - unsigned long current_seq) + u64 access_mask) { struct ib_device *dev = umem_odp->umem.ibdev; - dma_addr_t dma_addr; - int ret = 0; + dma_addr_t *dma_addr = &umem_odp->dma_list[dma_index]; - if (mmu_interval_check_retry(&umem_odp->notifier, current_seq)) { - ret = -EAGAIN; - goto out; - } - if (!(umem_odp->dma_list[page_index])) { - dma_addr = - ib_dma_map_page(dev, page, 0, BIT(umem_odp->page_shift), - DMA_BIDIRECTIONAL); - if (ib_dma_mapping_error(dev, dma_addr)) { - ret = -EFAULT; - goto out; - } - umem_odp->dma_list[page_index] = dma_addr | access_mask; - umem_odp->page_list[page_index] = page; - umem_odp->npages++; - } else if (umem_odp->page_list[page_index] == page) { - umem_odp->dma_list[page_index] |= access_mask; - } else { + if (*dma_addr) { /* - * This is a race here where we could have done: - * - * CPU0 CPU1 - * get_user_pages() - * invalidate() - * page_fault() - * mutex_lock(umem_mutex) - * page from GUP != page in ODP - * - * It should be prevented by the retry test above as reading - * the seq number should be reliable under the - * umem_mutex. Thus something is really not working right if - * things get here. + * If the page is already dma mapped it means it went through + * a non-invalidating trasition, like read-only to writable. + * Resync the flags. */ - WARN(true, - "Got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n", - umem_odp->page_list[page_index], page); - ret = -EAGAIN; + *dma_addr = (*dma_addr & ODP_DMA_ADDR_MASK) | access_mask; + return 0; } -out: - put_page(page); - return ret; + *dma_addr = ib_dma_map_page(dev, page, 0, 1 << umem_odp->page_shift, + DMA_BIDIRECTIONAL); + if (ib_dma_mapping_error(dev, *dma_addr)) { + *dma_addr = 0; + return -EFAULT; + } + umem_odp->npages++; + *dma_addr |= access_mask; + return 0; } /** - * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR. + * ib_umem_odp_map_dma_and_lock - DMA map userspace memory in an ODP MR and lock it. * - * Pins the range of pages passed in the argument, and maps them to - * DMA addresses. The DMA addresses of the mapped pages is updated in - * umem_odp->dma_list. + * Maps the range passed in the argument to DMA addresses. + * The DMA addresses of the mapped pages is updated in umem_odp->dma_list. + * Upon success the ODP MR will be locked to let caller complete its device + * page table update. * * Returns the number of pages mapped in success, negative error code * for failure. - * An -EAGAIN error code is returned when a concurrent mmu notifier prevents - * the function from completing its task. - * An -ENOENT error code indicates that userspace process is being terminated - * and mm was already destroyed. * @umem_odp: the umem to map and pin * @user_virt: the address from which we need to map. * @bcnt: the minimal number of bytes to pin and map. The mapping might be @@ -376,21 +347,19 @@ out: * the return value. * @access_mask: bit mask of the requested access permissions for the given * range. - * @current_seq: the MMU notifiers sequance value for synchronization with - * invalidations. the sequance number is read from - * umem_odp->notifiers_seq before calling this function + * @fault: is faulting required for the given range */ -int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, - u64 bcnt, u64 access_mask, - unsigned long current_seq) +int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 user_virt, + u64 bcnt, u64 access_mask, bool fault) + __acquires(&umem_odp->umem_mutex) { struct task_struct *owning_process = NULL; struct mm_struct *owning_mm = umem_odp->umem.owning_mm; - struct page **local_page_list = NULL; - u64 page_mask, off; - int j, k, ret = 0, start_idx, npages = 0; - unsigned int flags = 0, page_shift; - phys_addr_t p = 0; + int pfn_index, dma_index, ret = 0, start_idx; + unsigned int page_shift, hmm_order, pfn_start_idx; + unsigned long num_pfns, current_seq; + struct hmm_range range = {}; + unsigned long timeout; if (access_mask == 0) return -EINVAL; @@ -399,15 +368,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, user_virt + bcnt > ib_umem_end(umem_odp)) return -EFAULT; - local_page_list = (struct page **)__get_free_page(GFP_KERNEL); - if (!local_page_list) - return -ENOMEM; - page_shift = umem_odp->page_shift; - page_mask = ~(BIT(page_shift) - 1); - off = user_virt & (~page_mask); - user_virt = user_virt & page_mask; - bcnt += off; /* Charge for the first page offset as well. */ /* * owning_process is allowed to be NULL, this means somehow the mm is @@ -420,99 +381,104 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt, goto out_put_task; } - if (access_mask & ODP_WRITE_ALLOWED_BIT) - flags |= FOLL_WRITE; + range.notifier = &umem_odp->notifier; + range.start = ALIGN_DOWN(user_virt, 1UL << page_shift); + range.end = ALIGN(user_virt + bcnt, 1UL << page_shift); + pfn_start_idx = (range.start - ib_umem_start(umem_odp)) >> PAGE_SHIFT; + num_pfns = (range.end - range.start) >> PAGE_SHIFT; + if (fault) { + range.default_flags = HMM_PFN_REQ_FAULT; - start_idx = (user_virt - ib_umem_start(umem_odp)) >> page_shift; - k = start_idx; + if (access_mask & ODP_WRITE_ALLOWED_BIT) + range.default_flags |= HMM_PFN_REQ_WRITE; + } - while (bcnt > 0) { - const size_t gup_num_pages = min_t(size_t, - ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, - PAGE_SIZE / sizeof(struct page *)); + range.hmm_pfns = &(umem_odp->pfn_list[pfn_start_idx]); + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - mmap_read_lock(owning_mm); - /* - * Note: this might result in redundent page getting. We can - * avoid this by checking dma_list to be 0 before calling - * get_user_pages. However, this make the code much more - * complex (and doesn't gain us much performance in most use - * cases). - */ - npages = get_user_pages_remote(owning_mm, - user_virt, gup_num_pages, - flags, local_page_list, NULL, NULL); - mmap_read_unlock(owning_mm); +retry: + current_seq = range.notifier_seq = + mmu_interval_read_begin(&umem_odp->notifier); - if (npages < 0) { - if (npages != -EAGAIN) - pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages); - else - pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages); - break; - } + mmap_read_lock(owning_mm); + ret = hmm_range_fault(&range); + mmap_read_unlock(owning_mm); + if (unlikely(ret)) { + if (ret == -EBUSY && !time_after(jiffies, timeout)) + goto retry; + goto out_put_mm; + } - bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt); - mutex_lock(&umem_odp->umem_mutex); - for (j = 0; j < npages; j++, user_virt += PAGE_SIZE) { - if (user_virt & ~page_mask) { - p += PAGE_SIZE; - if (page_to_phys(local_page_list[j]) != p) { - ret = -EFAULT; - break; - } - put_page(local_page_list[j]); + start_idx = (range.start - ib_umem_start(umem_odp)) >> page_shift; + dma_index = start_idx; + + mutex_lock(&umem_odp->umem_mutex); + if (mmu_interval_read_retry(&umem_odp->notifier, current_seq)) { + mutex_unlock(&umem_odp->umem_mutex); + goto retry; + } + + for (pfn_index = 0; pfn_index < num_pfns; + pfn_index += 1 << (page_shift - PAGE_SHIFT), dma_index++) { + + if (fault) { + /* + * Since we asked for hmm_range_fault() to populate + * pages it shouldn't return an error entry on success. + */ + WARN_ON(range.hmm_pfns[pfn_index] & HMM_PFN_ERROR); + WARN_ON(!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID)); + } else { + if (!(range.hmm_pfns[pfn_index] & HMM_PFN_VALID)) { + WARN_ON(umem_odp->dma_list[dma_index]); continue; } - - ret = ib_umem_odp_map_dma_single_page( - umem_odp, k, local_page_list[j], - access_mask, current_seq); - if (ret < 0) { - if (ret != -EAGAIN) - pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret); - else - pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret); - break; - } - - p = page_to_phys(local_page_list[j]); - k++; + access_mask = ODP_READ_ALLOWED_BIT; + if (range.hmm_pfns[pfn_index] & HMM_PFN_WRITE) + access_mask |= ODP_WRITE_ALLOWED_BIT; } - mutex_unlock(&umem_odp->umem_mutex); + hmm_order = hmm_pfn_to_map_order(range.hmm_pfns[pfn_index]); + /* If a hugepage was detected and ODP wasn't set for, the umem + * page_shift will be used, the opposite case is an error. + */ + if (hmm_order + PAGE_SHIFT < page_shift) { + ret = -EINVAL; + ibdev_dbg(umem_odp->umem.ibdev, + "%s: un-expected hmm_order %d, page_shift %d\n", + __func__, hmm_order, page_shift); + break; + } + + ret = ib_umem_odp_map_dma_single_page( + umem_odp, dma_index, hmm_pfn_to_page(range.hmm_pfns[pfn_index]), + access_mask); if (ret < 0) { - /* - * Release pages, remembering that the first page - * to hit an error was already released by - * ib_umem_odp_map_dma_single_page(). - */ - if (npages - (j + 1) > 0) - release_pages(&local_page_list[j+1], - npages - (j + 1)); + ibdev_dbg(umem_odp->umem.ibdev, + "ib_umem_odp_map_dma_single_page failed with error %d\n", ret); break; } } + /* upon sucesss lock should stay on hold for the callee */ + if (!ret) + ret = dma_index - start_idx; + else + mutex_unlock(&umem_odp->umem_mutex); - if (ret >= 0) { - if (npages < 0 && k == start_idx) - ret = npages; - else - ret = k - start_idx; - } - +out_put_mm: mmput(owning_mm); out_put_task: if (owning_process) put_task_struct(owning_process); - free_page((unsigned long)local_page_list); return ret; } -EXPORT_SYMBOL(ib_umem_odp_map_dma_pages); +EXPORT_SYMBOL(ib_umem_odp_map_dma_and_lock); void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, u64 bound) { + dma_addr_t dma_addr; + dma_addr_t dma; int idx; u64 addr; struct ib_device *dev = umem_odp->umem.ibdev; @@ -521,20 +487,16 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, virt = max_t(u64, virt, ib_umem_start(umem_odp)); bound = min_t(u64, bound, ib_umem_end(umem_odp)); - /* Note that during the run of this function, the - * notifiers_count of the MR is > 0, preventing any racing - * faults from completion. We might be racing with other - * invalidations, so we must make sure we free each page only - * once. */ for (addr = virt; addr < bound; addr += BIT(umem_odp->page_shift)) { idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; - if (umem_odp->page_list[idx]) { - struct page *page = umem_odp->page_list[idx]; - dma_addr_t dma = umem_odp->dma_list[idx]; - dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK; + dma = umem_odp->dma_list[idx]; - WARN_ON(!dma_addr); + /* The access flags guaranteed a valid DMA address in case was NULL */ + if (dma) { + unsigned long pfn_idx = (addr - ib_umem_start(umem_odp)) >> PAGE_SHIFT; + struct page *page = hmm_pfn_to_page(umem_odp->pfn_list[pfn_idx]); + dma_addr = dma & ODP_DMA_ADDR_MASK; ib_dma_unmap_page(dev, dma_addr, BIT(umem_odp->page_shift), DMA_BIDIRECTIONAL); @@ -551,7 +513,6 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 virt, */ set_page_dirty(head_page); } - umem_odp->page_list[idx] = NULL; umem_odp->dma_list[idx] = 0; umem_odp->npages--; } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2fbc583d5bdd..418d133a8fb0 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -218,10 +218,12 @@ int ib_alloc_ucontext(struct uverbs_attr_bundle *attrs) if (!ucontext) return -ENOMEM; - ucontext->res.type = RDMA_RESTRACK_CTX; ucontext->device = ib_dev; ucontext->ufile = ufile; xa_init_flags(&ucontext->mmap_xa, XA_FLAGS_ALLOC); + + rdma_restrack_new(&ucontext->res, RDMA_RESTRACK_CTX); + rdma_restrack_set_name(&ucontext->res, NULL); attrs->context = ucontext; return 0; } @@ -250,7 +252,7 @@ int ib_init_ucontext(struct uverbs_attr_bundle *attrs) if (ret) goto err_uncharge; - rdma_restrack_uadd(&ucontext->res); + rdma_restrack_add(&ucontext->res); /* * Make sure that ib_uverbs_get_ucontext() sees the pointer update @@ -313,6 +315,7 @@ static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs) err_uobj: rdma_alloc_abort_uobject(uobj, attrs, false); err_ucontext: + rdma_restrack_put(&attrs->context->res); kfree(attrs->context); attrs->context = NULL; return ret; @@ -439,12 +442,14 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) pd->device = ib_dev; pd->uobject = uobj; atomic_set(&pd->usecnt, 0); - pd->res.type = RDMA_RESTRACK_PD; + + rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); + rdma_restrack_set_name(&pd->res, NULL); ret = ib_dev->ops.alloc_pd(pd, &attrs->driver_udata); if (ret) goto err_alloc; - rdma_restrack_uadd(&pd->res); + rdma_restrack_add(&pd->res); uobj->object = pd; uobj_finalize_uobj_create(uobj, attrs); @@ -453,6 +458,7 @@ static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs) return uverbs_response(attrs, &resp, sizeof(resp)); err_alloc: + rdma_restrack_put(&pd->res); kfree(pd); err: uobj_alloc_abort(uobj, attrs); @@ -742,9 +748,11 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs) mr->sig_attrs = NULL; mr->uobject = uobj; atomic_inc(&pd->usecnt); - mr->res.type = RDMA_RESTRACK_MR; mr->iova = cmd.hca_va; - rdma_restrack_uadd(&mr->res); + + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_set_name(&mr->res, NULL); + rdma_restrack_add(&mr->res); uobj->object = mr; uobj_put_obj_read(pd); @@ -858,7 +866,7 @@ static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs) static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_alloc_mw cmd; - struct ib_uverbs_alloc_mw_resp resp; + struct ib_uverbs_alloc_mw_resp resp = {}; struct ib_uobject *uobj; struct ib_pd *pd; struct ib_mw *mw; @@ -884,15 +892,21 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) goto err_put; } - mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata); - if (IS_ERR(mw)) { - ret = PTR_ERR(mw); + mw = rdma_zalloc_drv_obj(ib_dev, ib_mw); + if (!mw) { + ret = -ENOMEM; goto err_put; } - mw->device = pd->device; - mw->pd = pd; + mw->device = ib_dev; + mw->pd = pd; mw->uobject = uobj; + mw->type = cmd.mw_type; + + ret = pd->device->ops.alloc_mw(mw, &attrs->driver_udata); + if (ret) + goto err_alloc; + atomic_inc(&pd->usecnt); uobj->object = mw; @@ -903,6 +917,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs) resp.mw_handle = uobj->id; return uverbs_response(attrs, &resp, sizeof(resp)); +err_alloc: + kfree(mw); err_put: uobj_put_obj_read(pd); err_free: @@ -994,12 +1010,14 @@ static int create_cq(struct uverbs_attr_bundle *attrs, cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); - cq->res.type = RDMA_RESTRACK_CQ; + + rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); + rdma_restrack_set_name(&cq->res, NULL); ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); if (ret) goto err_free; - rdma_restrack_uadd(&cq->res); + rdma_restrack_add(&cq->res); obj->uevent.uobject.object = cq; obj->uevent.event_file = READ_ONCE(attrs->ufile->default_async_file); @@ -1013,6 +1031,7 @@ static int create_cq(struct uverbs_attr_bundle *attrs, return uverbs_response(attrs, &resp, sizeof(resp)); err_free: + rdma_restrack_put(&cq->res); kfree(cq); err_file: if (ev_file) @@ -1237,8 +1256,21 @@ static int create_qp(struct uverbs_attr_bundle *attrs, bool has_sq = true; struct ib_device *ib_dev; - if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW)) - return -EPERM; + switch (cmd->qp_type) { + case IB_QPT_RAW_PACKET: + if (!capable(CAP_NET_RAW)) + return -EPERM; + break; + case IB_QPT_RC: + case IB_QPT_UC: + case IB_QPT_UD: + case IB_QPT_XRC_INI: + case IB_QPT_XRC_TGT: + case IB_QPT_DRIVER: + break; + default: + return -EINVAL; + } obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs, &ib_dev); @@ -2985,11 +3017,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) { struct ib_uverbs_ex_create_rwq_ind_table cmd; struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {}; - struct ib_uobject *uobj; + struct ib_uobject *uobj; int err; struct ib_rwq_ind_table_init_attr init_attr = {}; struct ib_rwq_ind_table *rwq_ind_tbl; - struct ib_wq **wqs = NULL; + struct ib_wq **wqs = NULL; u32 *wqs_handles = NULL; struct ib_wq *wq = NULL; int i, num_read_wqs; @@ -3047,17 +3079,15 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) goto put_wqs; } - init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; - init_attr.ind_tbl = wqs; - - rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr, - &attrs->driver_udata); - - if (IS_ERR(rwq_ind_tbl)) { - err = PTR_ERR(rwq_ind_tbl); + rwq_ind_tbl = rdma_zalloc_drv_obj(ib_dev, ib_rwq_ind_table); + if (!rwq_ind_tbl) { + err = -ENOMEM; goto err_uobj; } + init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size; + init_attr.ind_tbl = wqs; + rwq_ind_tbl->ind_tbl = wqs; rwq_ind_tbl->log_ind_tbl_size = init_attr.log_ind_tbl_size; rwq_ind_tbl->uobject = uobj; @@ -3065,6 +3095,11 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) rwq_ind_tbl->device = ib_dev; atomic_set(&rwq_ind_tbl->usecnt, 0); + err = ib_dev->ops.create_rwq_ind_table(rwq_ind_tbl, &init_attr, + &attrs->driver_udata); + if (err) + goto err_create; + for (i = 0; i < num_wq_handles; i++) rdma_lookup_put_uobject(&wqs[i]->uobject->uevent.uobject, UVERBS_LOOKUP_READ); @@ -3076,6 +3111,8 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs) resp.response_length = uverbs_response_length(attrs, sizeof(resp)); return uverbs_response(attrs, &resp, sizeof(resp)); +err_create: + kfree(rwq_ind_tbl); err_uobj: uobj_alloc_abort(uobj, attrs); put_wqs: @@ -3232,8 +3269,8 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs) goto err_free; } - flow_id = qp->device->ops.create_flow( - qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata); + flow_id = qp->device->ops.create_flow(qp, flow_attr, + &attrs->driver_udata); if (IS_ERR(flow_id)) { err = PTR_ERR(flow_id); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index a4ba0b87d6de..4bb7c642f80c 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -108,8 +108,11 @@ int uverbs_dealloc_mw(struct ib_mw *mw) int ret; ret = mw->device->ops.dealloc_mw(mw); - if (!ret) - atomic_dec(&pd->usecnt); + if (ret) + return ret; + + atomic_dec(&pd->usecnt); + kfree(mw); return ret; } diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c index 08c39cfb1bd9..0658101fca00 100644 --- a/drivers/infiniband/core/uverbs_std_types.c +++ b/drivers/infiniband/core/uverbs_std_types.c @@ -81,12 +81,20 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject, { struct ib_rwq_ind_table *rwq_ind_tbl = uobject->object; struct ib_wq **ind_tbl = rwq_ind_tbl->ind_tbl; - int ret; + u32 table_size = (1 << rwq_ind_tbl->log_ind_tbl_size); + int ret, i; - ret = ib_destroy_rwq_ind_table(rwq_ind_tbl); + if (atomic_read(&rwq_ind_tbl->usecnt)) + return -EBUSY; + + ret = rwq_ind_tbl->device->ops.destroy_rwq_ind_table(rwq_ind_tbl); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; + for (i = 0; i < table_size; i++) + atomic_dec(&ind_tbl[i]->usecnt); + + kfree(rwq_ind_tbl); kfree(ind_tbl); return ret; } @@ -122,8 +130,7 @@ static int uverbs_free_pd(struct ib_uobject *uobject, if (ret) return ret; - ib_dealloc_pd_user(pd, &attrs->driver_udata); - return 0; + return ib_dealloc_pd_user(pd, &attrs->driver_udata); } void ib_uverbs_free_event_queue(struct ib_uverbs_event_queue *event_queue) diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index c7e7438752bc..b3c6c066b601 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -46,7 +46,9 @@ static int uverbs_free_counters(struct ib_uobject *uobject, if (ret) return ret; - counters->device->ops.destroy_counters(counters); + ret = counters->device->ops.destroy_counters(counters); + if (ret) + return ret; kfree(counters); return 0; } diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c index b1c7dacc02de..8dabd05988b2 100644 --- a/drivers/infiniband/core/uverbs_std_types_cq.c +++ b/drivers/infiniband/core/uverbs_std_types_cq.c @@ -33,6 +33,7 @@ #include #include "rdma_core.h" #include "uverbs.h" +#include "restrack.h" static int uverbs_free_cq(struct ib_uobject *uobject, enum rdma_remove_reason why, @@ -123,7 +124,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( cq->event_handler = ib_uverbs_cq_event_handler; cq->cq_context = ev_file ? &ev_file->ev_queue : NULL; atomic_set(&cq->usecnt, 0); - cq->res.type = RDMA_RESTRACK_CQ; + + rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); + rdma_restrack_set_name(&cq->res, NULL); ret = ib_dev->ops.create_cq(cq, &attr, &attrs->driver_udata); if (ret) @@ -131,7 +134,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( obj->uevent.uobject.object = cq; obj->uevent.uobject.user_handle = user_handle; - rdma_restrack_uadd(&cq->res); + rdma_restrack_add(&cq->res); uverbs_finalize_uobj_create(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE); ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe, @@ -139,6 +142,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)( return ret; err_free: + rdma_restrack_put(&cq->res); kfree(cq); err_event_file: if (obj->uevent.event_file) diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 75df2094a010..f367d523a46b 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -3,11 +3,13 @@ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. */ +#include #include #include "rdma_core.h" #include "uverbs.h" #include #include +#include /* * This ioctl method allows calling any defined write or write_ex @@ -165,7 +167,8 @@ void copy_port_attr_to_resp(struct ib_port_attr *attr, resp->subnet_timeout = attr->subnet_timeout; resp->init_type_reply = attr->init_type_reply; resp->active_width = attr->active_width; - resp->active_speed = attr->active_speed; + /* This ABI needs to be extended to provide any speed more than IB_SPEED_NDR */ + resp->active_speed = min_t(u16, attr->active_speed, IB_SPEED_NDR); resp->phys_state = attr->phys_state; resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num); } @@ -265,6 +268,172 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_CONTEXT)( return ucontext->device->ops.query_ucontext(ucontext, attrs); } +static int copy_gid_entries_to_user(struct uverbs_attr_bundle *attrs, + struct ib_uverbs_gid_entry *entries, + size_t num_entries, size_t user_entry_size) +{ + const struct uverbs_attr *attr; + void __user *user_entries; + size_t copy_len; + int ret; + int i; + + if (user_entry_size == sizeof(*entries)) { + ret = uverbs_copy_to(attrs, + UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, + entries, sizeof(*entries) * num_entries); + return ret; + } + + copy_len = min_t(size_t, user_entry_size, sizeof(*entries)); + attr = uverbs_attr_get(attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES); + if (IS_ERR(attr)) + return PTR_ERR(attr); + + user_entries = u64_to_user_ptr(attr->ptr_attr.data); + for (i = 0; i < num_entries; i++) { + if (copy_to_user(user_entries, entries, copy_len)) + return -EFAULT; + + if (user_entry_size > sizeof(*entries)) { + if (clear_user(user_entries + sizeof(*entries), + user_entry_size - sizeof(*entries))) + return -EFAULT; + } + + entries++; + user_entries += user_entry_size; + } + + return uverbs_output_written(attrs, + UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES); +} + +static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_gid_entry *entries; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; + size_t user_entry_size; + ssize_t num_entries; + size_t max_entries; + size_t num_bytes; + u32 flags; + int ret; + + ret = uverbs_get_flags32(&flags, attrs, + UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, 0); + if (ret) + return ret; + + ret = uverbs_get_const(&user_entry_size, attrs, + UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE); + if (ret) + return ret; + + max_entries = uverbs_attr_ptr_get_array_size( + attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, + user_entry_size); + if (max_entries <= 0) + return -EINVAL; + + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + + if (check_mul_overflow(max_entries, sizeof(*entries), &num_bytes)) + return -EINVAL; + + entries = uverbs_zalloc(attrs, num_bytes); + if (!entries) + return -ENOMEM; + + num_entries = rdma_query_gid_table(ib_dev, entries, max_entries); + if (num_entries < 0) + return -EINVAL; + + ret = copy_gid_entries_to_user(attrs, entries, num_entries, + user_entry_size); + if (ret) + return ret; + + ret = uverbs_copy_to(attrs, + UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES, + &num_entries, sizeof(num_entries)); + return ret; +} + +static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_ENTRY)( + struct uverbs_attr_bundle *attrs) +{ + struct ib_uverbs_gid_entry entry = {}; + const struct ib_gid_attr *gid_attr; + struct ib_ucontext *ucontext; + struct ib_device *ib_dev; + struct net_device *ndev; + u32 gid_index; + u32 port_num; + u32 flags; + int ret; + + ret = uverbs_get_flags32(&flags, attrs, + UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, 0); + if (ret) + return ret; + + ret = uverbs_get_const(&port_num, attrs, + UVERBS_ATTR_QUERY_GID_ENTRY_PORT); + if (ret) + return ret; + + ret = uverbs_get_const(&gid_index, attrs, + UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX); + if (ret) + return ret; + + ucontext = ib_uverbs_get_ucontext(attrs); + if (IS_ERR(ucontext)) + return PTR_ERR(ucontext); + ib_dev = ucontext->device; + + if (!rdma_is_port_valid(ib_dev, port_num)) + return -EINVAL; + + if (!rdma_ib_or_roce(ib_dev, port_num)) + return -EOPNOTSUPP; + + gid_attr = rdma_get_gid_attr(ib_dev, port_num, gid_index); + if (IS_ERR(gid_attr)) + return PTR_ERR(gid_attr); + + memcpy(&entry.gid, &gid_attr->gid, sizeof(gid_attr->gid)); + entry.gid_index = gid_attr->index; + entry.port_num = gid_attr->port_num; + entry.gid_type = gid_attr->gid_type; + + rcu_read_lock(); + ndev = rdma_read_gid_attr_ndev_rcu(gid_attr); + if (IS_ERR(ndev)) { + if (PTR_ERR(ndev) != -ENODEV) { + ret = PTR_ERR(ndev); + rcu_read_unlock(); + goto out; + } + } else { + entry.netdev_ifindex = ndev->ifindex; + } + rcu_read_unlock(); + + ret = uverbs_copy_to_struct_or_zero( + attrs, UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY, &entry, + sizeof(entry)); +out: + rdma_put_gid_attr(gid_attr); + return ret; +} + DECLARE_UVERBS_NAMED_METHOD( UVERBS_METHOD_GET_CONTEXT, UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_NUM_COMP_VECTORS, @@ -299,12 +468,38 @@ DECLARE_UVERBS_NAMED_METHOD( reserved), UA_MANDATORY)); +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QUERY_GID_TABLE, + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, u64, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, u32, + UA_OPTIONAL), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, + UVERBS_ATTR_MIN_SIZE(0), UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES, + UVERBS_ATTR_TYPE(u64), UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD( + UVERBS_METHOD_QUERY_GID_ENTRY, + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_PORT, u32, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX, u32, + UA_MANDATORY), + UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, u32, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY, + UVERBS_ATTR_STRUCT(struct ib_uverbs_gid_entry, + netdev_ifindex), + UA_MANDATORY)); + DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE, &UVERBS_METHOD(UVERBS_METHOD_GET_CONTEXT), &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE), &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES), &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT), - &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT)); + &UVERBS_METHOD(UVERBS_METHOD_QUERY_CONTEXT), + &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_TABLE), + &UVERBS_METHOD(UVERBS_METHOD_QUERY_GID_ENTRY)); const struct uapi_definition uverbs_def_obj_device[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE), diff --git a/drivers/infiniband/core/uverbs_std_types_wq.c b/drivers/infiniband/core/uverbs_std_types_wq.c index cad842ede077..f2e6a625724a 100644 --- a/drivers/infiniband/core/uverbs_std_types_wq.c +++ b/drivers/infiniband/core/uverbs_std_types_wq.c @@ -16,7 +16,7 @@ static int uverbs_free_wq(struct ib_uobject *uobject, container_of(uobject, struct ib_uwq_object, uevent.uobject); int ret; - ret = ib_destroy_wq(wq, &attrs->driver_udata); + ret = ib_destroy_wq_user(wq, &attrs->driver_udata); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 307886737646..740f8454b6b4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -272,15 +272,16 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, atomic_set(&pd->usecnt, 0); pd->flags = flags; - pd->res.type = RDMA_RESTRACK_PD; - rdma_restrack_set_task(&pd->res, caller); + rdma_restrack_new(&pd->res, RDMA_RESTRACK_PD); + rdma_restrack_set_name(&pd->res, caller); ret = device->ops.alloc_pd(pd, NULL); if (ret) { + rdma_restrack_put(&pd->res); kfree(pd); return ERR_PTR(ret); } - rdma_restrack_kadd(&pd->res); + rdma_restrack_add(&pd->res); if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; @@ -329,7 +330,7 @@ EXPORT_SYMBOL(__ib_alloc_pd); * exist. The caller is responsible to synchronously destroy them and * guarantee no new allocations will happen. */ -void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) +int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) { int ret; @@ -343,9 +344,13 @@ void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata) requires the caller to guarantee we can't race here. */ WARN_ON(atomic_read(&pd->usecnt)); + ret = pd->device->ops.dealloc_pd(pd, udata); + if (ret) + return ret; + rdma_restrack_del(&pd->res); - pd->device->ops.dealloc_pd(pd, udata); kfree(pd); + return ret; } EXPORT_SYMBOL(ib_dealloc_pd_user); @@ -728,7 +733,7 @@ int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, (struct in6_addr *)dgid); return 0; } else if (net_type == RDMA_NETWORK_IPV6 || - net_type == RDMA_NETWORK_IB) { + net_type == RDMA_NETWORK_IB || RDMA_NETWORK_ROCE_V1) { *dgid = hdr->ibgrh.dgid; *sgid = hdr->ibgrh.sgid; return 0; @@ -964,18 +969,22 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata) { const struct ib_gid_attr *sgid_attr = ah->sgid_attr; struct ib_pd *pd; + int ret; might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE); pd = ah->pd; - ah->device->ops.destroy_ah(ah, flags); + ret = ah->device->ops.destroy_ah(ah, flags); + if (ret) + return ret; + atomic_dec(&pd->usecnt); if (sgid_attr) rdma_put_gid_attr(sgid_attr); kfree(ah); - return 0; + return ret; } EXPORT_SYMBOL(rdma_destroy_ah_user); @@ -1060,10 +1069,14 @@ EXPORT_SYMBOL(ib_query_srq); int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata) { + int ret; + if (atomic_read(&srq->usecnt)) return -EBUSY; - srq->device->ops.destroy_srq(srq, udata); + ret = srq->device->ops.destroy_srq(srq, udata); + if (ret) + return ret; atomic_dec(&srq->pd->usecnt); if (srq->srq_type == IB_SRQT_XRC) @@ -1072,7 +1085,7 @@ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata) atomic_dec(&srq->ext.cq->usecnt); kfree(srq); - return 0; + return ret; } EXPORT_SYMBOL(ib_destroy_srq_user); @@ -1781,7 +1794,7 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, } EXPORT_SYMBOL(ib_modify_qp_with_udata); -int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width) +int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width) { int rc; u32 netdev_speed; @@ -1984,16 +1997,18 @@ struct ib_cq *__ib_create_cq(struct ib_device *device, cq->event_handler = event_handler; cq->cq_context = cq_context; atomic_set(&cq->usecnt, 0); - cq->res.type = RDMA_RESTRACK_CQ; - rdma_restrack_set_task(&cq->res, caller); + + rdma_restrack_new(&cq->res, RDMA_RESTRACK_CQ); + rdma_restrack_set_name(&cq->res, caller); ret = device->ops.create_cq(cq, cq_attr, NULL); if (ret) { + rdma_restrack_put(&cq->res); kfree(cq); return ERR_PTR(ret); } - rdma_restrack_kadd(&cq->res); + rdma_restrack_add(&cq->res); return cq; } EXPORT_SYMBOL(__ib_create_cq); @@ -2011,16 +2026,21 @@ EXPORT_SYMBOL(rdma_set_cq_moderation); int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata) { + int ret; + if (WARN_ON_ONCE(cq->shared)) return -EOPNOTSUPP; if (atomic_read(&cq->usecnt)) return -EBUSY; + ret = cq->device->ops.destroy_cq(cq, udata); + if (ret) + return ret; + rdma_restrack_del(&cq->res); - cq->device->ops.destroy_cq(cq, udata); kfree(cq); - return 0; + return ret; } EXPORT_SYMBOL(ib_destroy_cq_user); @@ -2059,8 +2079,10 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->pd = pd; mr->dm = NULL; atomic_inc(&pd->usecnt); - mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_kadd(&mr->res); + + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_parent_name(&mr->res, &pd->res); + rdma_restrack_add(&mr->res); return mr; } @@ -2139,11 +2161,12 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; - mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_kadd(&mr->res); mr->type = mr_type; mr->sig_attrs = NULL; + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_parent_name(&mr->res, &pd->res); + rdma_restrack_add(&mr->res); out: trace_mr_alloc(pd, mr_type, max_num_sg, mr); return mr; @@ -2199,11 +2222,12 @@ struct ib_mr *ib_alloc_mr_integrity(struct ib_pd *pd, mr->uobject = NULL; atomic_inc(&pd->usecnt); mr->need_inval = false; - mr->res.type = RDMA_RESTRACK_MR; - rdma_restrack_kadd(&mr->res); mr->type = IB_MR_TYPE_INTEGRITY; mr->sig_attrs = sig_attrs; + rdma_restrack_new(&mr->res, RDMA_RESTRACK_MR); + rdma_restrack_parent_name(&mr->res, &pd->res); + rdma_restrack_add(&mr->res); out: trace_mr_integ_alloc(pd, max_num_data_sg, max_num_meta_sg, mr); return mr; @@ -2328,13 +2352,17 @@ EXPORT_SYMBOL(ib_alloc_xrcd_user); */ int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata) { + int ret; + if (atomic_read(&xrcd->usecnt)) return -EBUSY; WARN_ON(!xa_empty(&xrcd->tgt_qps)); - xrcd->device->ops.dealloc_xrcd(xrcd, udata); + ret = xrcd->device->ops.dealloc_xrcd(xrcd, udata); + if (ret) + return ret; kfree(xrcd); - return 0; + return ret; } EXPORT_SYMBOL(ib_dealloc_xrcd_user); @@ -2378,25 +2406,28 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd, EXPORT_SYMBOL(ib_create_wq); /** - * ib_destroy_wq - Destroys the specified user WQ. + * ib_destroy_wq_user - Destroys the specified user WQ. * @wq: The WQ to destroy. * @udata: Valid user data */ -int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) +int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata) { struct ib_cq *cq = wq->cq; struct ib_pd *pd = wq->pd; + int ret; if (atomic_read(&wq->usecnt)) return -EBUSY; - wq->device->ops.destroy_wq(wq, udata); + ret = wq->device->ops.destroy_wq(wq, udata); + if (ret) + return ret; + atomic_dec(&pd->usecnt); atomic_dec(&cq->usecnt); - - return 0; + return ret; } -EXPORT_SYMBOL(ib_destroy_wq); +EXPORT_SYMBOL(ib_destroy_wq_user); /** * ib_modify_wq - Modifies the specified WQ. @@ -2419,29 +2450,6 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, } EXPORT_SYMBOL(ib_modify_wq); -/* - * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table. - * @wq_ind_table: The Indirection Table to destroy. -*/ -int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table) -{ - int err, i; - u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size); - struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl; - - if (atomic_read(&rwq_ind_table->usecnt)) - return -EBUSY; - - err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table); - if (!err) { - for (i = 0; i < table_size; i++) - atomic_dec(&ind_tbl[i]->usecnt); - } - - return err; -} -EXPORT_SYMBOL(ib_destroy_rwq_ind_table); - int ib_check_mr_status(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status) { diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index a300588634c5..b930ea3dab7a 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -150,7 +150,7 @@ struct bnxt_re_dev { struct delayed_work worker; u8 cur_prio_map; - u8 active_speed; + u16 active_speed; u8 active_width; /* FP Notification Queue (CQ & SRQ) */ diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 1d7a9ca5240c..cf3db9628397 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -532,7 +532,7 @@ fail: } /* Protection Domains */ -void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) +int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; @@ -542,6 +542,7 @@ void bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) if (pd->qplib_pd.id) bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, &pd->qplib_pd); + return 0; } int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) @@ -601,13 +602,14 @@ fail: } /* Address Handles */ -void bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) +int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, !(flags & RDMA_DESTROY_AH_SLEEPABLE)); + return 0; } static u8 bnxt_re_stack_to_dev_nw_type(enum rdma_network_type ntype) @@ -938,9 +940,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, return PTR_ERR(umem); qp->sumem = umem; - qplib_qp->sq.sg_info.sghead = umem->sg_head.sgl; - qplib_qp->sq.sg_info.npages = ib_umem_num_pages(umem); - qplib_qp->sq.sg_info.nmap = umem->nmap; + qplib_qp->sq.sg_info.umem = umem; qplib_qp->sq.sg_info.pgsize = PAGE_SIZE; qplib_qp->sq.sg_info.pgshft = PAGE_SHIFT; qplib_qp->qp_handle = ureq.qp_handle; @@ -953,9 +953,7 @@ static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd, if (IS_ERR(umem)) goto rqfail; qp->rumem = umem; - qplib_qp->rq.sg_info.sghead = umem->sg_head.sgl; - qplib_qp->rq.sg_info.npages = ib_umem_num_pages(umem); - qplib_qp->rq.sg_info.nmap = umem->nmap; + qplib_qp->rq.sg_info.umem = umem; qplib_qp->rq.sg_info.pgsize = PAGE_SIZE; qplib_qp->rq.sg_info.pgshft = PAGE_SHIFT; } @@ -1568,7 +1566,7 @@ static enum ib_mtu __to_ib_mtu(u32 mtu) } /* Shared Receive Queues */ -void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) +int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_re_srq *srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); @@ -1583,6 +1581,7 @@ void bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) atomic_dec(&rdev->srq_count); if (nq) nq->budget--; + return 0; } static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, @@ -1608,9 +1607,7 @@ static int bnxt_re_init_user_srq(struct bnxt_re_dev *rdev, return PTR_ERR(umem); srq->umem = umem; - qplib_srq->sg_info.sghead = umem->sg_head.sgl; - qplib_srq->sg_info.npages = ib_umem_num_pages(umem); - qplib_srq->sg_info.nmap = umem->nmap; + qplib_srq->sg_info.umem = umem; qplib_srq->sg_info.pgsize = PAGE_SIZE; qplib_srq->sg_info.pgshft = PAGE_SHIFT; qplib_srq->srq_handle = ureq.srq_handle; @@ -2800,7 +2797,7 @@ int bnxt_re_post_recv(struct ib_qp *ib_qp, const struct ib_recv_wr *wr, } /* Completion Queues */ -void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct bnxt_re_cq *cq; struct bnxt_qplib_nq *nq; @@ -2816,6 +2813,7 @@ void bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) atomic_dec(&rdev->cq_count); nq->budget--; kfree(cq->cql); + return 0; } int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, @@ -2860,9 +2858,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, rc = PTR_ERR(cq->umem); goto fail; } - cq->qplib_cq.sg_info.sghead = cq->umem->sg_head.sgl; - cq->qplib_cq.sg_info.npages = ib_umem_num_pages(cq->umem); - cq->qplib_cq.sg_info.nmap = cq->umem->nmap; + cq->qplib_cq.sg_info.umem = cq->umem; cq->qplib_cq.dpi = &uctx->dpi; } else { cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL); @@ -3774,23 +3770,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) return rc; } -static int bnxt_re_page_size_ok(int page_shift) -{ - switch (page_shift) { - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_8K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_64K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_2M: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_256K: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1M: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_4M: - case CMDQ_REGISTER_MR_LOG2_PBL_PG_SIZE_PG_1G: - return 1; - default: - return 0; - } -} - static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, int page_shift) { @@ -3798,7 +3777,7 @@ static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, u64 page_size = BIT_ULL(page_shift); struct ib_block_iter biter; - rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, page_size) + rdma_umem_for_each_dma_block(umem, &biter, page_size) *pbl_tbl++ = rdma_block_iter_dma_address(&biter); return pbl_tbl - pbl_tbl_orig; @@ -3814,7 +3793,8 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, struct bnxt_re_mr *mr; struct ib_umem *umem; u64 *pbl_tbl = NULL; - int umem_pgs, page_shift, rc; + unsigned long page_size; + int umem_pgs, rc; if (length > BNXT_RE_MAX_MR_SIZE) { ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n", @@ -3848,42 +3828,34 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, mr->ib_umem = umem; mr->qplib_mr.va = virt_addr; - umem_pgs = ib_umem_page_count(umem); - if (!umem_pgs) { - ibdev_err(&rdev->ibdev, "umem is invalid!"); - rc = -EINVAL; + page_size = ib_umem_find_best_pgsz( + umem, BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, virt_addr); + if (!page_size) { + ibdev_err(&rdev->ibdev, "umem page size unsupported!"); + rc = -EFAULT; goto free_umem; } mr->qplib_mr.total_size = length; - pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL); + if (page_size == BNXT_RE_PAGE_SIZE_4K && + length > BNXT_RE_MAX_MR_SIZE_LOW) { + ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu", + length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); + rc = -EINVAL; + goto free_umem; + } + + umem_pgs = ib_umem_num_dma_blocks(umem, page_size); + pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL); if (!pbl_tbl) { rc = -ENOMEM; goto free_umem; } - page_shift = __ffs(ib_umem_find_best_pgsz(umem, - BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_2M, - virt_addr)); - - if (!bnxt_re_page_size_ok(page_shift)) { - ibdev_err(&rdev->ibdev, "umem page size unsupported!"); - rc = -EFAULT; - goto fail; - } - - if (page_shift == BNXT_RE_PAGE_SHIFT_4K && - length > BNXT_RE_MAX_MR_SIZE_LOW) { - ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu", - length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); - rc = -EINVAL; - goto fail; - } - /* Map umem buf ptrs to the PBL */ - umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, page_shift); + umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size)); rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl, - umem_pgs, false, 1 << page_shift); + umem_pgs, false, page_size); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register user MR"); goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 1daeb30e06fd..9a8130b79256 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -163,12 +163,12 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num, enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev, u8 port_num); int bnxt_re_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int bnxt_re_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); int bnxt_re_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -void bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); +int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); int bnxt_re_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); @@ -176,7 +176,7 @@ int bnxt_re_modify_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int bnxt_re_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -void bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int bnxt_re_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int bnxt_re_post_srq_recv(struct ib_srq *srq, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd, @@ -193,7 +193,7 @@ int bnxt_re_post_recv(struct ib_qp *qp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int bnxt_re_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc); int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 53aee5a42ab8..04621ba8fa76 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -736,7 +736,8 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) if (ret) return ret; - return ib_register_device(ibdev, "bnxt_re%d"); + dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX); + return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev); } static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index f78da54a0bc5..995d4633b0a1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -295,9 +295,9 @@ static void __wait_for_all_nqes(struct bnxt_qplib_cq *cq, u16 cnq_events) } } -static void bnxt_qplib_service_nq(unsigned long data) +static void bnxt_qplib_service_nq(struct tasklet_struct *t) { - struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data; + struct bnxt_qplib_nq *nq = from_tasklet(nq, t, nq_tasklet); struct bnxt_qplib_hwq *hwq = &nq->hwq; int num_srqne_processed = 0; int num_cqne_processed = 0; @@ -448,8 +448,7 @@ int bnxt_qplib_nq_start_irq(struct bnxt_qplib_nq *nq, int nq_indx, nq->msix_vec = msix_vector; if (need_init) - tasklet_init(&nq->nq_tasklet, bnxt_qplib_service_nq, - (unsigned long)nq); + tasklet_setup(&nq->nq_tasklet, bnxt_qplib_service_nq); else tasklet_enable(&nq->nq_tasklet); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index f7736e34ac64..441eb421e5e5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -50,7 +50,7 @@ #include "qplib_sp.h" #include "qplib_fp.h" -static void bnxt_qplib_service_creq(unsigned long data); +static void bnxt_qplib_service_creq(struct tasklet_struct *t); /* Hardware communication channel */ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) @@ -79,7 +79,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) goto done; do { mdelay(1); /* 1m sec */ - bnxt_qplib_service_creq((unsigned long)rcfw); + bnxt_qplib_service_creq(&rcfw->creq.creq_tasklet); } while (test_bit(cbit, cmdq->cmdq_bitmap) && --count); done: return count ? 0 : -ETIMEDOUT; @@ -370,9 +370,9 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, } /* SP - CREQ Completion handlers */ -static void bnxt_qplib_service_creq(unsigned long data) +static void bnxt_qplib_service_creq(struct tasklet_struct *t) { - struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data; + struct bnxt_qplib_rcfw *rcfw = from_tasklet(rcfw, t, creq.creq_tasklet); struct bnxt_qplib_creq_ctx *creq = &rcfw->creq; u32 type, budget = CREQ_ENTRY_POLL_BUDGET; struct bnxt_qplib_hwq *hwq = &creq->hwq; @@ -687,8 +687,7 @@ int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, creq->msix_vec = msix_vector; if (need_init) - tasklet_init(&creq->creq_tasklet, - bnxt_qplib_service_creq, (unsigned long)rcfw); + tasklet_setup(&creq->creq_tasklet, bnxt_qplib_service_creq); else tasklet_enable(&creq->creq_tasklet); rc = request_irq(creq->msix_vec, bnxt_qplib_creq_irq, 0, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 7efa6e5dce62..fa7878336100 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -45,6 +45,9 @@ #include #include #include +#include +#include + #include "roce_hsi.h" #include "qplib_res.h" #include "qplib_sp.h" @@ -87,12 +90,11 @@ static void __free_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_pbl *pbl, static void bnxt_qplib_fill_user_dma_pages(struct bnxt_qplib_pbl *pbl, struct bnxt_qplib_sg_info *sginfo) { - struct scatterlist *sghead = sginfo->sghead; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; int i = 0; - for_each_sg_dma_page(sghead, &sg_iter, sginfo->nmap, 0) { - pbl->pg_map_arr[i] = sg_page_iter_dma_address(&sg_iter); + rdma_umem_for_each_dma_block(sginfo->umem, &biter, sginfo->pgsize) { + pbl->pg_map_arr[i] = rdma_block_iter_dma_address(&biter); pbl->pg_arr[i] = NULL; pbl->pg_count++; i++; @@ -104,15 +106,16 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, struct bnxt_qplib_sg_info *sginfo) { struct pci_dev *pdev = res->pdev; - struct scatterlist *sghead; bool is_umem = false; u32 pages; int i; if (sginfo->nopte) return 0; - pages = sginfo->npages; - sghead = sginfo->sghead; + if (sginfo->umem) + pages = ib_umem_num_dma_blocks(sginfo->umem, sginfo->pgsize); + else + pages = sginfo->npages; /* page ptr arrays */ pbl->pg_arr = vmalloc(pages * sizeof(void *)); if (!pbl->pg_arr) @@ -127,7 +130,7 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, pbl->pg_count = 0; pbl->pg_size = sginfo->pgsize; - if (!sghead) { + if (!sginfo->umem) { for (i = 0; i < pages; i++) { pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev, pbl->pg_size, @@ -183,14 +186,12 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, struct bnxt_qplib_sg_info sginfo = {}; u32 depth, stride, npbl, npde; dma_addr_t *src_phys_ptr, **dst_virt_ptr; - struct scatterlist *sghead = NULL; struct bnxt_qplib_res *res; struct pci_dev *pdev; int i, rc, lvl; res = hwq_attr->res; pdev = res->pdev; - sghead = hwq_attr->sginfo->sghead; pg_size = hwq_attr->sginfo->pgsize; hwq->level = PBL_LVL_MAX; @@ -204,7 +205,7 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, aux_pages++; } - if (!sghead) { + if (!hwq_attr->sginfo->umem) { hwq->is_user = false; npages = (depth * stride) / pg_size + aux_pages; if ((depth * stride) % pg_size) @@ -213,11 +214,14 @@ int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, return -EINVAL; hwq_attr->sginfo->npages = npages; } else { + unsigned long sginfo_num_pages = ib_umem_num_dma_blocks( + hwq_attr->sginfo->umem, hwq_attr->sginfo->pgsize); + hwq->is_user = true; - npages = hwq_attr->sginfo->npages; + npages = sginfo_num_pages; npages = (npages * PAGE_SIZE) / BIT_ULL(hwq_attr->sginfo->pgshft); - if ((hwq_attr->sginfo->npages * PAGE_SIZE) % + if ((sginfo_num_pages * PAGE_SIZE) % BIT_ULL(hwq_attr->sginfo->pgshft)) if (!npages) npages++; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 9da470d1e4a3..7a1ab38b95da 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -126,8 +126,7 @@ struct bnxt_qplib_pbl { }; struct bnxt_qplib_sg_info { - struct scatterlist *sghead; - u32 nmap; + struct ib_umem *umem; u32 npages; u32 pgshft; u32 pgsize; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 1f288c73ccfc..8769e7aa097f 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -77,9 +77,9 @@ static int enable_ecn; module_param(enable_ecn, int, 0644); MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)"); -static int dack_mode = 1; +static int dack_mode; module_param(dack_mode, int, 0644); -MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); +MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)"); uint c4iw_max_read_depth = 32; module_param(c4iw_max_read_depth, int, 0644); diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 352b8af1998a..28349ed50885 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -967,7 +967,7 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return !err || err == -ENODATA ? npolled : err; } -void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; struct c4iw_ucontext *ucontext; @@ -985,6 +985,7 @@ void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) ucontext ? &ucontext->uctx : &chp->cq.rdev->uctx, chp->destroy_skb, chp->wr_waitp); c4iw_put_wr_wait(chp->wr_waitp); + return 0; } int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 2b2b009b371a..a27899402f59 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -985,21 +985,20 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int c4iw_dealloc_mw(struct ib_mw *mw); void c4iw_dealloc(struct uld_ctx *ctx); -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata); +int c4iw_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata); struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); -void c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int c4iw_modify_srq(struct ib_srq *ib_srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -void c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); +int c4iw_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata); int c4iw_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attrs, struct ib_udata *udata); int c4iw_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 73936c3341b7..42234df896fb 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -510,7 +510,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, __be64 *pages; int shift, n, i; int err = -ENOMEM; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct c4iw_dev *rhp; struct c4iw_pd *php; struct c4iw_mr *mhp; @@ -548,7 +548,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, shift = PAGE_SHIFT; - n = ib_umem_num_pages(mhp->umem); + n = ib_umem_num_dma_blocks(mhp->umem, 1 << shift); err = alloc_pbl(mhp, n); if (err) goto err_umem_release; @@ -561,8 +561,8 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, i = n = 0; - for_each_sg_dma_page(mhp->umem->sg_head.sgl, &sg_iter, mhp->umem->nmap, 0) { - pages[i++] = cpu_to_be64(sg_page_iter_dma_address(&sg_iter)); + rdma_umem_for_each_dma_block(mhp->umem, &biter, 1 << shift) { + pages[i++] = cpu_to_be64(rdma_block_iter_dma_address(&biter)); if (i == PAGE_SIZE / sizeof(*pages)) { err = write_pbl(&mhp->rhp->rdev, pages, mhp->attr.pbl_addr + (n << 3), i, @@ -611,30 +611,23 @@ err_free_mhp: return ERR_PTR(err); } -struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) +int c4iw_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { + struct c4iw_mw *mhp = to_c4iw_mw(ibmw); struct c4iw_dev *rhp; struct c4iw_pd *php; - struct c4iw_mw *mhp; u32 mmid; u32 stag = 0; int ret; - if (type != IB_MW_TYPE_1) - return ERR_PTR(-EINVAL); + if (ibmw->type != IB_MW_TYPE_1) + return -EINVAL; - php = to_c4iw_pd(pd); + php = to_c4iw_pd(ibmw->pd); rhp = php->rhp; - mhp = kzalloc(sizeof(*mhp), GFP_KERNEL); - if (!mhp) - return ERR_PTR(-ENOMEM); - mhp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL); - if (!mhp->wr_waitp) { - ret = -ENOMEM; - goto free_mhp; - } + if (!mhp->wr_waitp) + return -ENOMEM; mhp->dereg_skb = alloc_skb(SGE_MAX_WR_LEN, GFP_KERNEL); if (!mhp->dereg_skb) { @@ -645,18 +638,19 @@ struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, ret = allocate_window(&rhp->rdev, &stag, php->pdid, mhp->wr_waitp); if (ret) goto free_skb; + mhp->rhp = rhp; mhp->attr.pdid = php->pdid; mhp->attr.type = FW_RI_STAG_MW; mhp->attr.stag = stag; mmid = (stag) >> 8; - mhp->ibmw.rkey = stag; + ibmw->rkey = stag; if (xa_insert_irq(&rhp->mrs, mmid, mhp, GFP_KERNEL)) { ret = -ENOMEM; goto dealloc_win; } pr_debug("mmid 0x%x mhp %p stag 0x%x\n", mmid, mhp, stag); - return &(mhp->ibmw); + return 0; dealloc_win: deallocate_window(&rhp->rdev, mhp->attr.stag, mhp->dereg_skb, @@ -665,9 +659,7 @@ free_skb: kfree_skb(mhp->dereg_skb); free_wr_wait: c4iw_put_wr_wait(mhp->wr_waitp); -free_mhp: - kfree(mhp); - return ERR_PTR(ret); + return ret; } int c4iw_dealloc_mw(struct ib_mw *mw) @@ -684,8 +676,6 @@ int c4iw_dealloc_mw(struct ib_mw *mw) mhp->wr_waitp); kfree_skb(mhp->dereg_skb); c4iw_put_wr_wait(mhp->wr_waitp); - pr_debug("ib_mw %p mmid 0x%x ptr %p\n", mw, mmid, mhp); - kfree(mhp); return 0; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 6c579d2d3997..8138c57a1e43 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -190,7 +190,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) return ret; } -static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) +static int c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_pd *php; @@ -202,6 +202,7 @@ static void c4iw_deallocate_pd(struct ib_pd *pd, struct ib_udata *udata) mutex_lock(&rhp->rdev.stats.lock); rhp->rdev.stats.pd.cur--; mutex_unlock(&rhp->rdev.stats.lock); + return 0; } static int c4iw_allocate_pd(struct ib_pd *pd, struct ib_udata *udata) @@ -497,8 +498,10 @@ static const struct ib_device_ops c4iw_dev_ops = { .query_qp = c4iw_ib_query_qp, .reg_user_mr = c4iw_reg_user_mr, .req_notify_cq = c4iw_arm_cq, - INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_cq, c4iw_cq, ibcq), + INIT_RDMA_OBJ_SIZE(ib_mw, c4iw_mw, ibmw), + INIT_RDMA_OBJ_SIZE(ib_pd, c4iw_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, c4iw_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, c4iw_ucontext, ibucontext), }; @@ -567,7 +570,9 @@ void c4iw_register_device(struct work_struct *work) ret = set_netdevs(&dev->ibdev, &dev->rdev); if (ret) goto err_dealloc_ctx; - ret = ib_register_device(&dev->ibdev, "cxgb4_%d"); + dma_set_max_seg_size(&dev->rdev.lldi.pdev->dev, UINT_MAX); + ret = ib_register_device(&dev->ibdev, "cxgb4_%d", + &dev->rdev.lldi.pdev->dev); if (ret) goto err_dealloc_ctx; return; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index cbddb20c6121..f20379e4e2ec 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2797,7 +2797,7 @@ err_free_wr_wait: return ret; } -void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct c4iw_dev *rhp; struct c4iw_srq *srq; @@ -2813,4 +2813,5 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) srq->wr_waitp); c4iw_free_srq_idx(&rhp->rdev, srq->idx); c4iw_put_wr_wait(srq->wr_waitp); + return 0; } diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 1889dd172a25..e5d9712e98c4 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -33,7 +33,8 @@ struct efa_irq { char name[EFA_IRQNAME_SIZE]; }; -struct efa_sw_stats { +/* Don't use anything other than atomic64 */ +struct efa_stats { atomic64_t alloc_pd_err; atomic64_t create_qp_err; atomic64_t create_cq_err; @@ -41,11 +42,6 @@ struct efa_sw_stats { atomic64_t alloc_ucontext_err; atomic64_t create_ah_err; atomic64_t mmap_err; -}; - -/* Don't use anything other than atomic64 */ -struct efa_stats { - struct efa_sw_stats sw_stats; atomic64_t keep_alive_rcvd; }; @@ -134,12 +130,12 @@ int efa_query_gid(struct ib_device *ibdev, u8 port, int index, int efa_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey); int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); -void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); struct ib_qp *efa_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); -void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, @@ -156,7 +152,7 @@ void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int efa_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void efa_destroy_ah(struct ib_ah *ibah, u32 flags); +int efa_destroy_ah(struct ib_ah *ibah, u32 flags); int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 5484b08bbc5d..b199e4ac6cf9 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -61,6 +61,8 @@ enum efa_admin_qp_state { enum efa_admin_get_stats_type { EFA_ADMIN_GET_STATS_TYPE_BASIC = 0, + EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1, + EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2, }; enum efa_admin_get_stats_scope { @@ -68,14 +70,6 @@ enum efa_admin_get_stats_scope { EFA_ADMIN_GET_STATS_SCOPE_QUEUE = 1, }; -enum efa_admin_modify_qp_mask_bits { - EFA_ADMIN_QP_STATE_BIT = 0, - EFA_ADMIN_CUR_QP_STATE_BIT = 1, - EFA_ADMIN_QKEY_BIT = 2, - EFA_ADMIN_SQ_PSN_BIT = 3, - EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT = 4, -}; - /* * QP allocation sizes, converted by fabric QueuePair (QP) create command * from QP capabilities. @@ -199,8 +193,14 @@ struct efa_admin_modify_qp_cmd { struct efa_admin_aq_common_desc aq_common_desc; /* - * Mask indicating which fields should be updated see enum - * efa_admin_modify_qp_mask_bits + * Mask indicating which fields should be updated + * 0 : qp_state + * 1 : cur_qp_state + * 2 : qkey + * 3 : sq_psn + * 4 : sq_drained_async_notify + * 5 : rnr_retry + * 31:6 : reserved */ u32 modify_mask; @@ -222,8 +222,8 @@ struct efa_admin_modify_qp_cmd { /* Enable async notification when SQ is drained */ u8 sq_drained_async_notify; - /* MBZ */ - u8 reserved1; + /* Number of RNR retries (valid only for SRD QPs) */ + u8 rnr_retry; /* MBZ */ u16 reserved2; @@ -258,8 +258,8 @@ struct efa_admin_query_qp_resp { /* Indicates that draining is in progress */ u8 sq_draining; - /* MBZ */ - u8 reserved1; + /* Number of RNR retries (valid only for SRD QPs) */ + u8 rnr_retry; /* MBZ */ u16 reserved2; @@ -530,10 +530,36 @@ struct efa_admin_basic_stats { u64 rx_drops; }; +struct efa_admin_messages_stats { + u64 send_bytes; + + u64 send_wrs; + + u64 recv_bytes; + + u64 recv_wrs; +}; + +struct efa_admin_rdma_read_stats { + u64 read_wrs; + + u64 read_bytes; + + u64 read_wr_err; + + u64 read_resp_bytes; +}; + struct efa_admin_acq_get_stats_resp { struct efa_admin_acq_common_desc acq_common_desc; - struct efa_admin_basic_stats basic_stats; + union { + struct efa_admin_basic_stats basic_stats; + + struct efa_admin_messages_stats messages_stats; + + struct efa_admin_rdma_read_stats rdma_read_stats; + } u; }; struct efa_admin_get_set_feature_common_desc { @@ -576,7 +602,9 @@ struct efa_admin_feature_device_attr_desc { /* * 0 : rdma_read - If set, RDMA Read is supported on * TX queues - * 31:1 : reserved - MBZ + * 1 : rnr_retry - If set, RNR retry is supported on + * modify QP command + * 31:2 : reserved - MBZ */ u32 device_caps; @@ -862,6 +890,14 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_QP_CMD_SQ_VIRT_MASK BIT(0) #define EFA_ADMIN_CREATE_QP_CMD_RQ_VIRT_MASK BIT(1) +/* modify_qp_cmd */ +#define EFA_ADMIN_MODIFY_QP_CMD_QP_STATE_MASK BIT(0) +#define EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE_MASK BIT(1) +#define EFA_ADMIN_MODIFY_QP_CMD_QKEY_MASK BIT(2) +#define EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN_MASK BIT(3) +#define EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY_MASK BIT(4) +#define EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY_MASK BIT(5) + /* reg_mr_cmd */ #define EFA_ADMIN_REG_MR_CMD_PHYS_PAGE_SIZE_SHIFT_MASK GENMASK(4, 0) #define EFA_ADMIN_REG_MR_CMD_MEM_ADDR_PHY_MODE_EN_MASK BIT(7) @@ -878,6 +914,7 @@ struct efa_admin_host_info { /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) +#define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) /* host_info */ #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0) diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 6ac23627f65a..f752ef64159c 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -76,6 +76,7 @@ int efa_com_modify_qp(struct efa_com_dev *edev, cmd.qkey = params->qkey; cmd.sq_psn = params->sq_psn; cmd.sq_drained_async_notify = params->sq_drained_async_notify; + cmd.rnr_retry = params->rnr_retry; err = efa_com_cmd_exec(aq, (struct efa_admin_aq_entry *)&cmd, @@ -121,6 +122,7 @@ int efa_com_query_qp(struct efa_com_dev *edev, result->qkey = resp.qkey; result->sq_draining = resp.sq_draining; result->sq_psn = resp.sq_psn; + result->rnr_retry = resp.rnr_retry; return 0; } @@ -750,11 +752,27 @@ int efa_com_get_stats(struct efa_com_dev *edev, return err; } - result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes; - result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts; - result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes; - result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts; - result->basic_stats.rx_drops = resp.basic_stats.rx_drops; + switch (cmd.type) { + case EFA_ADMIN_GET_STATS_TYPE_BASIC: + result->basic_stats.tx_bytes = resp.u.basic_stats.tx_bytes; + result->basic_stats.tx_pkts = resp.u.basic_stats.tx_pkts; + result->basic_stats.rx_bytes = resp.u.basic_stats.rx_bytes; + result->basic_stats.rx_pkts = resp.u.basic_stats.rx_pkts; + result->basic_stats.rx_drops = resp.u.basic_stats.rx_drops; + break; + case EFA_ADMIN_GET_STATS_TYPE_MESSAGES: + result->messages_stats.send_bytes = resp.u.messages_stats.send_bytes; + result->messages_stats.send_wrs = resp.u.messages_stats.send_wrs; + result->messages_stats.recv_bytes = resp.u.messages_stats.recv_bytes; + result->messages_stats.recv_wrs = resp.u.messages_stats.recv_wrs; + break; + case EFA_ADMIN_GET_STATS_TYPE_RDMA_READ: + result->rdma_read_stats.read_wrs = resp.u.rdma_read_stats.read_wrs; + result->rdma_read_stats.read_bytes = resp.u.rdma_read_stats.read_bytes; + result->rdma_read_stats.read_wr_err = resp.u.rdma_read_stats.read_wr_err; + result->rdma_read_stats.read_resp_bytes = resp.u.rdma_read_stats.read_resp_bytes; + break; + } return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 190bac23f585..eea4ebfbe6ec 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -47,6 +47,7 @@ struct efa_com_modify_qp_params { u32 qkey; u32 sq_psn; u8 sq_drained_async_notify; + u8 rnr_retry; }; struct efa_com_query_qp_params { @@ -58,6 +59,7 @@ struct efa_com_query_qp_result { u32 qkey; u32 sq_draining; u32 sq_psn; + u8 rnr_retry; }; struct efa_com_destroy_qp_params { @@ -238,8 +240,24 @@ struct efa_com_basic_stats { u64 rx_drops; }; +struct efa_com_messages_stats { + u64 send_bytes; + u64 send_wrs; + u64 recv_bytes; + u64 recv_wrs; +}; + +struct efa_com_rdma_read_stats { + u64 read_wrs; + u64 read_bytes; + u64 read_wr_err; + u64 read_resp_bytes; +}; + union efa_com_get_stats_result { struct efa_com_basic_stats basic_stats; + struct efa_com_messages_stats messages_stats; + struct efa_com_rdma_read_stats rdma_read_stats; }; void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 92d701146320..6faed3a81e08 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -331,7 +331,7 @@ static int efa_ib_device_add(struct efa_dev *dev) ib_set_device_ops(&dev->ibdev, &efa_dev_ops); - err = ib_register_device(&dev->ibdev, "efa_%d"); + err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); if (err) goto err_release_doorbell_bar; @@ -418,7 +418,7 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev) err); return err; } - + dma_set_max_seg_size(&pdev->dev, UINT_MAX); return 0; } diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 9e201f169289..191e0843f090 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -4,6 +4,7 @@ */ #include +#include #include #include @@ -35,6 +36,14 @@ struct efa_user_mmap_entry { op(EFA_RX_BYTES, "rx_bytes") \ op(EFA_RX_PKTS, "rx_pkts") \ op(EFA_RX_DROPS, "rx_drops") \ + op(EFA_SEND_BYTES, "send_bytes") \ + op(EFA_SEND_WRS, "send_wrs") \ + op(EFA_RECV_BYTES, "recv_bytes") \ + op(EFA_RECV_WRS, "recv_wrs") \ + op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \ + op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \ + op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \ + op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ op(EFA_COMPLETED_CMDS, "completed_cmds") \ op(EFA_CMDS_ERR, "cmds_err") \ @@ -142,10 +151,9 @@ to_emmap(struct rdma_user_mmap_entry *rdma_entry) return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry); } -static inline bool is_rdma_read_cap(struct efa_dev *dev) -{ - return dev->dev_attr.device_caps & EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK; -} +#define EFA_DEV_CAP(dev, cap) \ + ((dev)->dev_attr.device_caps & \ + EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK) #define is_reserved_cleared(reserved) \ !memchr_inv(reserved, 0, sizeof(reserved)) @@ -221,9 +229,12 @@ int efa_query_device(struct ib_device *ibdev, resp.max_rq_wr = dev_attr->max_rq_depth; resp.max_rdma_size = dev_attr->max_rdma_size; - if (is_rdma_read_cap(dev)) + if (EFA_DEV_CAP(dev, RDMA_READ)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; + if (EFA_DEV_CAP(dev, RNR_RETRY)) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { @@ -269,7 +280,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, #define EFA_QUERY_QP_SUPP_MASK \ (IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \ - IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP) + IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY) if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) { ibdev_dbg(&dev->ibdev, @@ -291,6 +302,7 @@ int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->sq_psn = result.sq_psn; qp_attr->sq_draining = result.sq_draining; qp_attr->port_num = 1; + qp_attr->rnr_retry = result.rnr_retry; qp_attr->cap.max_send_wr = qp->max_send_wr; qp_attr->cap.max_recv_wr = qp->max_recv_wr; @@ -376,17 +388,18 @@ int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) err_dealloc_pd: efa_pd_dealloc(dev, result.pdn); err_out: - atomic64_inc(&dev->stats.sw_stats.alloc_pd_err); + atomic64_inc(&dev->stats.alloc_pd_err); return err; } -void efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibpd->device); struct efa_pd *pd = to_epd(ibpd); ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn); efa_pd_dealloc(dev, pd->pdn); + return 0; } static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle) @@ -737,18 +750,130 @@ err_free_mapped: err_free_qp: kfree(qp); err_out: - atomic64_inc(&dev->stats.sw_stats.create_qp_err); + atomic64_inc(&dev->stats.create_qp_err); return ERR_PTR(err); } +static const struct { + int valid; + enum ib_qp_attr_mask req_param; + enum ib_qp_attr_mask opt_param; +} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .req_param = IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY, + }, + }, + [IB_QPS_INIT] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_INIT] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_PORT | + IB_QP_QKEY, + }, + [IB_QPS_RTR] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_QKEY, + }, + }, + [IB_QPS_RTR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .req_param = IB_QP_SQ_PSN, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY | + IB_QP_RNR_RETRY, + + } + }, + [IB_QPS_RTS] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY, + }, + }, + [IB_QPS_SQD] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + }, + [IB_QPS_SQD] = { + .valid = 1, + .opt_param = IB_QP_PKEY_INDEX | + IB_QP_QKEY, + } + }, + [IB_QPS_SQE] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + [IB_QPS_RTS] = { + .valid = 1, + .opt_param = IB_QP_CUR_STATE | + IB_QP_QKEY, + } + }, + [IB_QPS_ERR] = { + [IB_QPS_RESET] = { .valid = 1 }, + [IB_QPS_ERR] = { .valid = 1 }, + } +}; + +static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state, + enum ib_qp_state next_state, + enum ib_qp_attr_mask mask) +{ + enum ib_qp_attr_mask req_param, opt_param; + + if (mask & IB_QP_CUR_STATE && + cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS && + cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE) + return false; + + if (!srd_qp_state_table[cur_state][next_state].valid) + return false; + + req_param = srd_qp_state_table[cur_state][next_state].req_param; + opt_param = srd_qp_state_table[cur_state][next_state].opt_param; + + if ((mask & req_param) != req_param) + return false; + + if (mask & ~(req_param | opt_param | IB_QP_STATE)) + return false; + + return true; +} + static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) { + int err; + #define EFA_MODIFY_QP_SUPP_MASK \ (IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \ - IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN) + IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \ + IB_QP_RNR_RETRY) if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) { ibdev_dbg(&dev->ibdev, @@ -757,8 +882,14 @@ static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp, return -EOPNOTSUPP; } - if (!ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, - qp_attr_mask)) { + if (qp->ibqp.qp_type == IB_QPT_DRIVER) + err = !efa_modify_srd_qp_is_ok(cur_state, new_state, + qp_attr_mask); + else + err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD, + qp_attr_mask); + + if (err) { ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n"); return -EINVAL; } @@ -805,28 +936,36 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, params.qp_handle = qp->qp_handle; if (qp_attr_mask & IB_QP_STATE) { - params.modify_mask |= BIT(EFA_ADMIN_QP_STATE_BIT) | - BIT(EFA_ADMIN_CUR_QP_STATE_BIT); + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE, + 1); + EFA_SET(¶ms.modify_mask, + EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1); params.cur_qp_state = qp_attr->cur_qp_state; params.qp_state = qp_attr->qp_state; } if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) { - params.modify_mask |= - BIT(EFA_ADMIN_SQ_DRAINED_ASYNC_NOTIFY_BIT); + EFA_SET(¶ms.modify_mask, + EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1); params.sq_drained_async_notify = qp_attr->en_sqd_async_notify; } if (qp_attr_mask & IB_QP_QKEY) { - params.modify_mask |= BIT(EFA_ADMIN_QKEY_BIT); + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1); params.qkey = qp_attr->qkey; } if (qp_attr_mask & IB_QP_SQ_PSN) { - params.modify_mask |= BIT(EFA_ADMIN_SQ_PSN_BIT); + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1); params.sq_psn = qp_attr->sq_psn; } + if (qp_attr_mask & IB_QP_RNR_RETRY) { + EFA_SET(¶ms.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY, + 1); + params.rnr_retry = qp_attr->rnr_retry; + } + err = efa_com_modify_qp(&dev->edev, ¶ms); if (err) return err; @@ -843,7 +982,7 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) return efa_com_destroy_cq(&dev->edev, ¶ms); } -void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibcq->device); struct efa_cq *cq = to_ecq(ibcq); @@ -856,6 +995,7 @@ void efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) efa_destroy_cq_idx(dev, cq->cq_idx); efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE); + return 0; } static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, @@ -996,7 +1136,7 @@ err_free_mapped: DMA_FROM_DEVICE); err_out: - atomic64_inc(&dev->stats.sw_stats.create_cq_err); + atomic64_inc(&dev->stats.create_cq_err); return err; } @@ -1013,8 +1153,7 @@ static int umem_to_page_list(struct efa_dev *dev, ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n", hp_cnt, pages_in_hp); - rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, - BIT(hp_shift)) + rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift)) page_list[hp_idx++] = rdma_block_iter_dma_address(&biter); return 0; @@ -1026,7 +1165,7 @@ static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt) struct page *pg; int i; - sglist = kcalloc(page_cnt, sizeof(*sglist), GFP_KERNEL); + sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL); if (!sglist) return NULL; sg_init_table(sglist, page_cnt); @@ -1370,7 +1509,7 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, supp_access_flags = IB_ACCESS_LOCAL_WRITE | - (is_rdma_read_cap(dev) ? IB_ACCESS_REMOTE_READ : 0); + (EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0); access_flags &= ~IB_ACCESS_OPTIONAL; if (access_flags & ~supp_access_flags) { @@ -1410,9 +1549,8 @@ struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length, goto err_unmap; } - params.page_shift = __ffs(pg_sz); - params.page_num = DIV_ROUND_UP(length + (start & (pg_sz - 1)), - pg_sz); + params.page_shift = order_base_2(pg_sz); + params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz); ibdev_dbg(&dev->ibdev, "start %#llx length %#llx params.page_shift %u params.page_num %u\n", @@ -1451,7 +1589,7 @@ err_unmap: err_free: kfree(mr); err_out: - atomic64_inc(&dev->stats.sw_stats.reg_mr_err); + atomic64_inc(&dev->stats.reg_mr_err); return ERR_PTR(err); } @@ -1569,19 +1707,17 @@ int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata) resp.max_tx_batch = dev->dev_attr.max_tx_batch; resp.min_sq_wr = dev->dev_attr.min_sq_depth; - if (udata && udata->outlen) { - err = ib_copy_to_udata(udata, &resp, - min(sizeof(resp), udata->outlen)); - if (err) - goto err_dealloc_uar; - } + err = ib_copy_to_udata(udata, &resp, + min(sizeof(resp), udata->outlen)); + if (err) + goto err_dealloc_uar; return 0; err_dealloc_uar: efa_dealloc_uar(dev, result.uarn); err_out: - atomic64_inc(&dev->stats.sw_stats.alloc_ucontext_err); + atomic64_inc(&dev->stats.alloc_ucontext_err); return err; } @@ -1614,7 +1750,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, ibdev_dbg(&dev->ibdev, "pgoff[%#lx] does not have valid entry\n", vma->vm_pgoff); - atomic64_inc(&dev->stats.sw_stats.mmap_err); + atomic64_inc(&dev->stats.mmap_err); return -EINVAL; } entry = to_emmap(rdma_entry); @@ -1656,7 +1792,7 @@ static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext, "Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n", entry->address, rdma_entry->npages * PAGE_SIZE, entry->mmap_flag, err); - atomic64_inc(&dev->stats.sw_stats.mmap_err); + atomic64_inc(&dev->stats.mmap_err); } rdma_user_mmap_entry_put(rdma_entry); @@ -1741,11 +1877,11 @@ int efa_create_ah(struct ib_ah *ibah, err_destroy_ah: efa_ah_destroy(dev, ah); err_out: - atomic64_inc(&dev->stats.sw_stats.create_ah_err); + atomic64_inc(&dev->stats.create_ah_err); return err; } -void efa_destroy_ah(struct ib_ah *ibah, u32 flags) +int efa_destroy_ah(struct ib_ah *ibah, u32 flags) { struct efa_dev *dev = to_edev(ibah->pd->device); struct efa_ah *ah = to_eah(ibah); @@ -1755,10 +1891,11 @@ void efa_destroy_ah(struct ib_ah *ibah, u32 flags) if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) { ibdev_dbg(&dev->ibdev, "Destroy address handle is not supported in atomic context\n"); - return; + return -EOPNOTSUPP; } efa_ah_destroy(dev, ah); + return 0; } struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) @@ -1774,13 +1911,15 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, struct efa_com_get_stats_params params = {}; union efa_com_get_stats_result result; struct efa_dev *dev = to_edev(ibdev); + struct efa_com_rdma_read_stats *rrs; + struct efa_com_messages_stats *ms; struct efa_com_basic_stats *bs; struct efa_com_stats_admin *as; struct efa_stats *s; int err; - params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL; + params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; err = efa_com_get_stats(&dev->edev, ¶ms, &result); if (err) @@ -1793,6 +1932,28 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, stats->value[EFA_RX_PKTS] = bs->rx_pkts; stats->value[EFA_RX_DROPS] = bs->rx_drops; + params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES; + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; + + ms = &result.messages_stats; + stats->value[EFA_SEND_BYTES] = ms->send_bytes; + stats->value[EFA_SEND_WRS] = ms->send_wrs; + stats->value[EFA_RECV_BYTES] = ms->recv_bytes; + stats->value[EFA_RECV_WRS] = ms->recv_wrs; + + params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ; + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; + + rrs = &result.rdma_read_stats; + stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs; + stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes; + stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; + stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; + as = &dev->edev.aq.stats; stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); @@ -1801,13 +1962,14 @@ int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, s = &dev->stats; stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); - stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err); - stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err); - stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->sw_stats.create_cq_err); - stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err); - stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err); - stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err); - stats->value[EFA_MMAP_ERR] = atomic64_read(&s->sw_stats.mmap_err); + stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err); + stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err); + stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err); + stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err); + stats->value[EFA_ALLOC_UCONTEXT_ERR] = + atomic64_read(&s->alloc_ucontext_err); + stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err); + stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err); return ARRAY_SIZE(efa_stats_names); } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 04575c9afd61..a307d4c8b15a 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -232,11 +232,11 @@ static const struct sdma_set_state_action sdma_action_table[] = { static void sdma_complete(struct kref *); static void sdma_finalput(struct sdma_state *); static void sdma_get(struct sdma_state *); -static void sdma_hw_clean_up_task(unsigned long); +static void sdma_hw_clean_up_task(struct tasklet_struct *); static void sdma_put(struct sdma_state *); static void sdma_set_state(struct sdma_engine *, enum sdma_states); static void sdma_start_hw_clean_up(struct sdma_engine *); -static void sdma_sw_clean_up_task(unsigned long); +static void sdma_sw_clean_up_task(struct tasklet_struct *); static void sdma_sendctrl(struct sdma_engine *, unsigned); static void init_sdma_regs(struct sdma_engine *, u32, uint); static void sdma_process_event( @@ -545,9 +545,10 @@ static void sdma_err_progress_check(struct timer_list *t) schedule_work(&sde->err_halt_worker); } -static void sdma_hw_clean_up_task(unsigned long opaque) +static void sdma_hw_clean_up_task(struct tasklet_struct *t) { - struct sdma_engine *sde = (struct sdma_engine *)opaque; + struct sdma_engine *sde = from_tasklet(sde, t, + sdma_hw_clean_up_task); u64 statuscsr; while (1) { @@ -604,9 +605,9 @@ static void sdma_flush_descq(struct sdma_engine *sde) sdma_desc_avail(sde, sdma_descq_freecnt(sde)); } -static void sdma_sw_clean_up_task(unsigned long opaque) +static void sdma_sw_clean_up_task(struct tasklet_struct *t) { - struct sdma_engine *sde = (struct sdma_engine *)opaque; + struct sdma_engine *sde = from_tasklet(sde, t, sdma_sw_clean_up_task); unsigned long flags; spin_lock_irqsave(&sde->tail_lock, flags); @@ -1454,11 +1455,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) sde->tail_csr = get_kctxt_csr_addr(dd, this_idx, SD(TAIL)); - tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task, - (unsigned long)sde); - - tasklet_init(&sde->sdma_sw_clean_up_task, sdma_sw_clean_up_task, - (unsigned long)sde); + tasklet_setup(&sde->sdma_hw_clean_up_task, + sdma_hw_clean_up_task); + tasklet_setup(&sde->sdma_sw_clean_up_task, + sdma_sw_clean_up_task); INIT_WORK(&sde->err_halt_worker, sdma_err_halt_wait); INIT_WORK(&sde->flush_worker, sdma_field_flush); diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 30865635b449..3591923abebb 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1424,7 +1424,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num, props->gid_tbl_len = HFI1_GUIDS_PER_PORT; props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); /* see rate_show() in ib core/sysfs.c */ - props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); + props->active_speed = opa_speed_to_ib(ppd->link_speed_active); props->max_vl_num = ppd->vls_supported; /* Once we are a "first class" citizen and have added the OPA MTUs to diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 5b2f9314edd3..75b06db60f7c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,6 +39,22 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 +static inline u16 get_ah_udp_sport(const struct rdma_ah_attr *ah_attr) +{ + u32 fl = ah_attr->grh.flow_label; + u16 sport; + + if (!fl) + sport = get_random_u32() % + (IB_ROCE_UDP_ENCAP_VALID_PORT_MAX + 1 - + IB_ROCE_UDP_ENCAP_VALID_PORT_MIN) + + IB_ROCE_UDP_ENCAP_VALID_PORT_MIN; + else + sport = rdma_flow_label_to_udp_sport(fl); + + return sport; +} + int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { @@ -79,6 +95,8 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); ah->av.sl = rdma_ah_get_sl(ah_attr); + ah->av.flowlabel = grh->flow_label; + ah->av.udp_sport = get_ah_udp_sport(ah_attr); return 0; } @@ -98,8 +116,3 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } - -void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) -{ - return; -} diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index a522cb2d29ea..a6b23dec1adc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -268,8 +268,7 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, } /* convert system page cnt to hw page cnt */ - rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, - 1 << page_shift) { + rdma_umem_for_each_dma_block(umem, &biter, 1 << page_shift) { addr = rdma_block_iter_dma_address(&biter); if (idx >= start) { bufs[total++] = addr; diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index e87d616f7988..809b22aa5056 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -150,7 +150,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, int err; buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; - buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; buf_attr.fixed_page = true; @@ -224,6 +224,21 @@ static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, } } +static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, + struct hns_roce_ib_create_cq *ucmd) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); + + if (udata) { + if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) + hr_cq->cqe_size = ucmd->cqe_size; + else + hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE; + } else { + hr_cq->cqe_size = hr_dev->caps.cqe_sz; + } +} + int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { @@ -258,7 +273,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, INIT_LIST_HEAD(&hr_cq->rq_list); if (udata) { - ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); + ret = ib_copy_from_udata(&ucmd, udata, + min(sizeof(ucmd), udata->inlen)); if (ret) { ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", ret); @@ -266,6 +282,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, } } + set_cqe_size(hr_cq, udata, &ucmd); + ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); @@ -287,7 +305,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, /* * For the QP created by kernel space, tptr value should be initialized * to zero; For the QP created by user space, it will cause synchronous - * problems if tptr is set to zero here, so we initialze it in user + * problems if tptr is set to zero here, so we initialize it in user * space. */ if (!udata && hr_cq->tptr_addr) @@ -311,7 +329,7 @@ err_cq_buf: return ret; } -void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); @@ -322,6 +340,7 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) free_cq_buf(hr_dev, hr_cq); free_cq_db(hr_dev, hr_cq, udata); free_cqc(hr_dev, hr_cq); + return 0; } void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 6edcbdcd8f43..6d2acff69f98 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -37,8 +37,8 @@ #define DRV_NAME "hns_roce" -/* hip08 is a pci device */ #define PCI_REVISION_ID_HIP08 0x21 +#define PCI_REVISION_ID_HIP09 0x30 #define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6') @@ -57,7 +57,6 @@ /* Hardware specification only for v1 engine */ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 #define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000 -#define HNS_ROCE_MAX_SGE_NUM 2 #define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20 #define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \ @@ -76,15 +75,18 @@ #define HNS_ROCE_CEQ 0 #define HNS_ROCE_AEQ 1 -#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4 -#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10 +#define HNS_ROCE_CEQE_SIZE 0x4 +#define HNS_ROCE_AEQE_SIZE 0x10 -#define HNS_ROCE_SL_SHIFT 28 -#define HNS_ROCE_TCLASS_SHIFT 20 -#define HNS_ROCE_FLOW_LABEL_MASK 0xfffff +#define HNS_ROCE_V3_EQE_SIZE 0x40 + +#define HNS_ROCE_V2_CQE_SIZE 32 +#define HNS_ROCE_V3_CQE_SIZE 64 + +#define HNS_ROCE_V2_QPC_SZ 256 +#define HNS_ROCE_V3_QPC_SZ 512 #define HNS_ROCE_MAX_PORTS 6 -#define HNS_ROCE_MAX_GID_NUM 16 #define HNS_ROCE_GID_SIZE 16 #define HNS_ROCE_SGE_SIZE 16 @@ -112,8 +114,6 @@ #define PAGES_SHIFT_24 24 #define PAGES_SHIFT_32 32 -#define HNS_ROCE_PCI_BAR_NUM 2 - #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 @@ -467,6 +467,7 @@ struct hns_roce_cq { void __iomem *cq_db_l; u16 *tptr_addr; int arm_sn; + int cqe_size; unsigned long cqn; u32 vector; atomic_t refcount; @@ -535,17 +536,18 @@ struct hns_roce_raq_table { }; struct hns_roce_av { - u8 port; - u8 gid_index; - u8 stat_rate; - u8 hop_limit; - u32 flowlabel; - u8 sl; - u8 tclass; - u8 dgid[HNS_ROCE_GID_SIZE]; - u8 mac[ETH_ALEN]; - u16 vlan_id; - bool vlan_en; + u8 port; + u8 gid_index; + u8 stat_rate; + u8 hop_limit; + u32 flowlabel; + u16 udp_sport; + u8 sl; + u8 tclass; + u8 dgid[HNS_ROCE_GID_SIZE]; + u8 mac[ETH_ALEN]; + u16 vlan_id; + bool vlan_en; }; struct hns_roce_ah { @@ -655,6 +657,8 @@ struct hns_roce_qp { struct hns_roce_sge sge; u32 next_sge; + enum ib_mtu path_mtu; + u32 max_inline_data; /* 0: flush needed, 1: unneeded */ unsigned long flush_flag; @@ -678,7 +682,8 @@ enum { }; struct hns_roce_ceqe { - __le32 comp; + __le32 comp; + __le32 rsv[15]; }; struct hns_roce_aeqe { @@ -715,6 +720,7 @@ struct hns_roce_aeqe { u8 rsv0; } __packed cmd; } event; + __le32 rsv[12]; }; struct hns_roce_eq { @@ -791,15 +797,15 @@ struct hns_roce_caps { int num_pds; int reserved_pds; u32 mtt_entry_sz; - u32 cq_entry_sz; + u32 cqe_sz; u32 page_size_cap; u32 reserved_lkey; int mtpt_entry_sz; - int qpc_entry_sz; + int qpc_sz; int irrl_entry_sz; int trrl_entry_sz; int cqc_entry_sz; - int sccc_entry_sz; + int sccc_sz; int qpc_timer_entry_sz; int cqc_timer_entry_sz; int srqc_entry_sz; @@ -809,6 +815,8 @@ struct hns_roce_caps { u32 pbl_hop_num; int aeqe_depth; int ceqe_depth; + u32 aeqe_size; + u32 ceqe_size; enum ib_mtu max_mtu; u32 qpc_bt_num; u32 qpc_timer_bt_num; @@ -930,7 +938,7 @@ struct hns_roce_hw { int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, struct ib_udata *udata); - void (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); + int (*destroy_cq)(struct ib_cq *ibcq, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); @@ -1178,10 +1186,13 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, int hns_roce_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -void hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); +static inline int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return 0; +} int hns_roce_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, @@ -1200,8 +1211,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, unsigned long mpt_index); unsigned long key_to_hw_index(u32 key); -struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, - struct ib_udata *udata); +int hns_roce_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw); void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); @@ -1220,7 +1230,7 @@ int hns_roce_create_srq(struct ib_srq *srq, int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); -void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, @@ -1247,7 +1257,7 @@ int to_hr_qp_type(int qp_type); int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, struct hns_roce_db *db); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index c8db6f8ae018..7487cf3d2c37 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -338,8 +338,8 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, void __iomem *bt_cmd; __le32 bt_cmd_val[2]; __le32 bt_cmd_h = 0; - __le32 bt_cmd_l = 0; - u64 bt_ba = 0; + __le32 bt_cmd_l; + u64 bt_ba; int ret = 0; /* Find the HEM(Hardware Entry Memory) entry */ @@ -1027,7 +1027,7 @@ void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) if (hr_dev->caps.cqc_timer_entry_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cqc_timer_table); - if (hr_dev->caps.sccc_entry_sz) + if (hr_dev->caps.sccc_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); if (hr_dev->caps.trrl_entry_sz) @@ -1404,7 +1404,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; - int ret = 0; + int ret; int unit; int i; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index aeb3a6fa7d47..5f4d8a32ed6d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -70,15 +70,15 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct hns_roce_qp *qp = to_hr_qp(ibqp); struct device *dev = &hr_dev->pdev->dev; struct hns_roce_sq_db sq_db = {}; - int ps_opcode = 0, i = 0; + int ps_opcode, i; unsigned long flags = 0; void *wqe = NULL; __le32 doorbell[2]; - u32 wqe_idx = 0; - int nreq = 0; int ret = 0; - u8 *smac; int loopback; + u32 wqe_idx; + int nreq; + u8 *smac; if (unlikely(ibqp->qp_type != IB_QPT_GSI && ibqp->qp_type != IB_QPT_RC)) { @@ -271,7 +271,6 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, ps_opcode = HNS_ROCE_WQE_OPCODE_SEND; break; case IB_WR_LOCAL_INV: - break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: case IB_WR_LSO: @@ -888,7 +887,7 @@ static int hns_roce_db_init(struct hns_roce_dev *hr_dev) u32 odb_ext_mod; u32 sdb_evt_mod; u32 odb_evt_mod; - int ret = 0; + int ret; memset(db, 0, sizeof(*db)); @@ -1148,8 +1147,8 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv = hr_dev->priv; struct hns_roce_raq_table *raq = &priv->raq_table; struct device *dev = &hr_dev->pdev->dev; - int raq_shift = 0; dma_addr_t addr; + int raq_shift; __le32 tmp; u32 val; int ret; @@ -1360,7 +1359,7 @@ static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev) struct hns_roce_v1_priv *priv = hr_dev->priv; struct hns_roce_free_mr *free_mr = &priv->free_mr; struct device *dev = &hr_dev->pdev->dev; - int ret = 0; + int ret; free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr"); if (!free_mr->free_mr_wq) { @@ -1440,8 +1439,8 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) { - int i = 0; struct hns_roce_caps *caps = &hr_dev->caps; + int i; hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG); hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG); @@ -1471,12 +1470,12 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA; caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ; caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ; - caps->qpc_entry_sz = HNS_ROCE_V1_QPC_ENTRY_SIZE; + caps->qpc_sz = HNS_ROCE_V1_QPC_SIZE; caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE; caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE; caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE; caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE; - caps->cq_entry_sz = HNS_ROCE_V1_CQE_ENTRY_SIZE; + caps->cqe_sz = HNS_ROCE_V1_CQE_SIZE; caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT; caps->reserved_lkey = 0; caps->reserved_pds = 0; @@ -1643,7 +1642,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev, unsigned long timeout) { u8 __iomem *hcr = hr_dev->reg_base + ROCEE_MB1_REG; - unsigned long end = 0; + unsigned long end; u32 status = 0; end = msecs_to_jiffies(timeout) + jiffies; @@ -1671,7 +1670,7 @@ static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, { unsigned long flags; u32 *p = NULL; - u8 gid_idx = 0; + u8 gid_idx; gid_idx = hns_get_gid_index(hr_dev, port, gid_index); @@ -1897,8 +1896,7 @@ static int hns_roce_v1_write_mtpt(struct hns_roce_dev *hr_dev, void *mb_buf, static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(hr_cq->mtr.kmem, - n * HNS_ROCE_V1_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, n * HNS_ROCE_V1_CQE_SIZE); } static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) @@ -2445,7 +2443,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, struct hns_roce_cmd_mailbox *mailbox; struct device *dev = &hr_dev->pdev->dev; - int ret = 0; + int ret; if (cur_state >= HNS_ROCE_QP_NUM_STATE || new_state >= HNS_ROCE_QP_NUM_STATE || @@ -3394,7 +3392,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct device *dev = &hr_dev->pdev->dev; struct hns_roce_qp_context *context; - int tmp_qp_state = 0; + int tmp_qp_state; int ret = 0; int state; @@ -3572,7 +3570,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) return 0; } -static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); @@ -3603,6 +3601,7 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) } wait_time++; } + return 0; } static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) @@ -3775,8 +3774,7 @@ static void hns_roce_v1_db_overflow_handle(struct hns_roce_dev *hr_dev, static struct hns_roce_aeqe *get_aeqe_v1(struct hns_roce_eq *eq, u32 entry) { - unsigned long off = (entry & (eq->entries - 1)) * - HNS_ROCE_AEQ_ENTRY_SIZE; + unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_AEQE_SIZE; return (struct hns_roce_aeqe *)((u8 *) (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + @@ -3881,8 +3879,7 @@ static int hns_roce_v1_aeq_int(struct hns_roce_dev *hr_dev, static struct hns_roce_ceqe *get_ceqe_v1(struct hns_roce_eq *eq, u32 entry) { - unsigned long off = (entry & (eq->entries - 1)) * - HNS_ROCE_CEQ_ENTRY_SIZE; + unsigned long off = (entry & (eq->entries - 1)) * HNS_ROCE_CEQE_SIZE; return (struct hns_roce_ceqe *)((u8 *) (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + @@ -3934,7 +3931,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_eq(int irq, void *eq_ptr) { struct hns_roce_eq *eq = eq_ptr; struct hns_roce_dev *hr_dev = eq->hr_dev; - int int_work = 0; + int int_work; if (eq->type_flag == HNS_ROCE_CEQ) /* CEQ irq routine, CEQ is pulse irq, not clear */ @@ -4132,9 +4129,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev, void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn]; struct device *dev = &hr_dev->pdev->dev; dma_addr_t tmp_dma_addr; - u32 eqconsindx_val = 0; u32 eqcuridx_val = 0; - u32 eqshift_val = 0; + u32 eqconsindx_val; + u32 eqshift_val; __le32 tmp2 = 0; __le32 tmp1 = 0; __le32 tmp = 0; @@ -4253,7 +4250,7 @@ static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev) CEQ_REG_OFFSET * i; eq->entries = hr_dev->caps.ceqe_depth; eq->log_entries = ilog2(eq->entries); - eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE; + eq->eqe_size = HNS_ROCE_CEQE_SIZE; } else { /* AEQ */ eq_table->eqc_base[i] = hr_dev->reg_base + @@ -4263,7 +4260,7 @@ static int hns_roce_v1_init_eq_table(struct hns_roce_dev *hr_dev) ROCEE_CAEP_AEQE_CONS_IDX_REG; eq->entries = hr_dev->caps.aeqe_depth; eq->log_entries = ilog2(eq->entries); - eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE; + eq->eqe_size = HNS_ROCE_AEQE_SIZE; } } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 52307b2c7100..ffd0156080f5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -68,13 +68,13 @@ #define HNS_ROCE_V1_COMP_EQE_NUM 0x8000 #define HNS_ROCE_V1_ASYNC_EQE_NUM 0x400 -#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256 +#define HNS_ROCE_V1_QPC_SIZE 256 #define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8 #define HNS_ROCE_V1_CQC_ENTRY_SIZE 64 #define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64 #define HNS_ROCE_V1_MTT_ENTRY_SIZE 64 -#define HNS_ROCE_V1_CQE_ENTRY_SIZE 32 +#define HNS_ROCE_V1_CQE_SIZE 32 #define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000 #define HNS_ROCE_V1_TABLE_CHUNK_SIZE (1 << 17) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4cda95ed1fbe..6d30850696c5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -153,6 +153,67 @@ static void set_atomic_seg(const struct ib_send_wr *wr, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); } +static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, + const struct ib_send_wr *wr, + unsigned int *sge_idx, u32 msg_len) +{ + struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev; + unsigned int dseg_len = sizeof(struct hns_roce_v2_wqe_data_seg); + unsigned int ext_sge_sz = qp->sq.max_gs * dseg_len; + unsigned int left_len_in_pg; + unsigned int idx = *sge_idx; + unsigned int i = 0; + unsigned int len; + void *addr; + void *dseg; + + if (msg_len > ext_sge_sz) { + ibdev_err(ibdev, + "no enough extended sge space for inline data.\n"); + return -EINVAL; + } + + dseg = hns_roce_get_extend_sge(qp, idx & (qp->sge.sge_cnt - 1)); + left_len_in_pg = hr_hw_page_align((uintptr_t)dseg) - (uintptr_t)dseg; + len = wr->sg_list[0].length; + addr = (void *)(unsigned long)(wr->sg_list[0].addr); + + /* When copying data to extended sge space, the left length in page may + * not long enough for current user's sge. So the data should be + * splited into several parts, one in the first page, and the others in + * the subsequent pages. + */ + while (1) { + if (len <= left_len_in_pg) { + memcpy(dseg, addr, len); + + idx += len / dseg_len; + + i++; + if (i >= wr->num_sge) + break; + + left_len_in_pg -= len; + len = wr->sg_list[i].length; + addr = (void *)(unsigned long)(wr->sg_list[i].addr); + dseg += len; + } else { + memcpy(dseg, addr, left_len_in_pg); + + len -= left_len_in_pg; + addr += left_len_in_pg; + idx += left_len_in_pg / dseg_len; + dseg = hns_roce_get_extend_sge(qp, + idx & (qp->sge.sge_cnt - 1)); + left_len_in_pg = 1 << HNS_HW_PAGE_SHIFT; + } + } + + *sge_idx = idx; + + return 0; +} + static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_ind, unsigned int valid_num_sge) { @@ -177,73 +238,115 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, *sge_ind = idx; } +static bool check_inl_data_len(struct hns_roce_qp *qp, unsigned int len) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); + int mtu = ib_mtu_enum_to_int(qp->path_mtu); + + if (len > qp->max_inline_data || len > mtu) { + ibdev_err(&hr_dev->ib_dev, + "invalid length of data, data len = %u, max inline len = %u, path mtu = %d.\n", + len, qp->max_inline_data, mtu); + return false; + } + + return true; +} + +static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr, + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, + unsigned int *sge_idx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); + u32 msg_len = le32_to_cpu(rc_sq_wqe->msg_len); + struct ib_device *ibdev = &hr_dev->ib_dev; + unsigned int curr_idx = *sge_idx; + void *dseg = rc_sq_wqe; + unsigned int i; + int ret; + + if (unlikely(wr->opcode == IB_WR_RDMA_READ)) { + ibdev_err(ibdev, "invalid inline parameters!\n"); + return -EINVAL; + } + + if (!check_inl_data_len(qp, msg_len)) + return -EINVAL; + + dseg += sizeof(struct hns_roce_v2_rc_send_wqe); + + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); + + if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) { + roce_set_bit(rc_sq_wqe->byte_20, + V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0); + + for (i = 0; i < wr->num_sge; i++) { + memcpy(dseg, ((void *)wr->sg_list[i].addr), + wr->sg_list[i].length); + dseg += wr->sg_list[i].length; + } + } else { + roce_set_bit(rc_sq_wqe->byte_20, + V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1); + + ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len); + if (ret) + return ret; + + roce_set_field(rc_sq_wqe->byte_16, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, + curr_idx - *sge_idx); + } + + *sge_idx = curr_idx; + + return 0; +} + static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, unsigned int *sge_ind, unsigned int valid_num_sge) { - struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = (void *)rc_sq_wqe + sizeof(struct hns_roce_v2_rc_send_wqe); - struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_qp *qp = to_hr_qp(ibqp); - void *wqe = dseg; int j = 0; int i; - if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { - if (unlikely(le32_to_cpu(rc_sq_wqe->msg_len) > - hr_dev->caps.max_sq_inline)) { - ibdev_err(ibdev, "inline len(1-%d)=%d, illegal", - rc_sq_wqe->msg_len, - hr_dev->caps.max_sq_inline); - return -EINVAL; - } + roce_set_field(rc_sq_wqe->byte_20, + V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, + V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, + (*sge_ind) & (qp->sge.sge_cnt - 1)); - if (unlikely(wr->opcode == IB_WR_RDMA_READ)) { - ibdev_err(ibdev, "Not support inline data!\n"); - return -EINVAL; - } + if (wr->send_flags & IB_SEND_INLINE) + return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind); + if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) { for (i = 0; i < wr->num_sge; i++) { - memcpy(wqe, ((void *)wr->sg_list[i].addr), - wr->sg_list[i].length); - wqe += wr->sg_list[i].length; + if (likely(wr->sg_list[i].length)) { + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; + } } - - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, - 1); } else { - if (valid_num_sge <= HNS_ROCE_SGE_IN_WQE) { - for (i = 0; i < wr->num_sge; i++) { - if (likely(wr->sg_list[i].length)) { - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - } + for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; i++) { + if (likely(wr->sg_list[i].length)) { + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; + j++; } - } else { - roce_set_field(rc_sq_wqe->byte_20, - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, - V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, - (*sge_ind) & (qp->sge.sge_cnt - 1)); - - for (i = 0; i < wr->num_sge && j < HNS_ROCE_SGE_IN_WQE; - i++) { - if (likely(wr->sg_list[i].length)) { - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - j++; - } - } - - set_extend_sge(qp, wr, sge_ind, valid_num_sge); } - roce_set_field(rc_sq_wqe->byte_16, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + set_extend_sge(qp, wr, sge_ind, valid_num_sge); } + roce_set_field(rc_sq_wqe->byte_16, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); + return 0; } @@ -292,6 +395,33 @@ static unsigned int calc_wr_sge_num(const struct ib_send_wr *wr, return valid_num; } +static __le32 get_immtdata(const struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); + default: + return 0; + } +} + +static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe, + const struct ib_send_wr *wr) +{ + u32 ib_op = wr->opcode; + + if (ib_op != IB_WR_SEND && ib_op != IB_WR_SEND_WITH_IMM) + return -EINVAL; + + ud_sq_wqe->immtdata = get_immtdata(wr); + + roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M, + V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); + + return 0; +} + static inline int set_ud_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, @@ -305,10 +435,15 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, u32 msg_len = 0; bool loopback; u8 *smac; + int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe)); + ret = set_ud_opcode(ud_sq_wqe, wr); + if (WARN_ON(ret)) + return ret; + roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_0_M, V2_UD_SEND_WQE_DMAC_0_S, ah->av.mac[0]); roce_set_field(ud_sq_wqe->dmac, V2_UD_SEND_WQE_DMAC_1_M, @@ -329,23 +464,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_LBI_S, loopback); - roce_set_field(ud_sq_wqe->byte_4, - V2_UD_SEND_WQE_BYTE_4_OPCODE_M, - V2_UD_SEND_WQE_BYTE_4_OPCODE_S, - HNS_ROCE_V2_WQE_OP_SEND); - ud_sq_wqe->msg_len = cpu_to_le32(msg_len); - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - ud_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); - break; - default: - ud_sq_wqe->immtdata = 0; - break; - } - /* Set sig attr */ roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S, (wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0); @@ -369,7 +489,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, curr_idx & (qp->sge.sge_cnt - 1)); roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M, - V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, 0); + V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport); ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ? qp->qkey : ud_wr(wr)->remote_qkey); roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M, @@ -402,6 +522,46 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, return 0; } +static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, + const struct ib_send_wr *wr) +{ + u32 ib_op = wr->opcode; + + rc_sq_wqe->immtdata = get_immtdata(wr); + + switch (ib_op) { + case IB_WR_RDMA_READ: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); + break; + case IB_WR_SEND: + case IB_WR_SEND_WITH_IMM: + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey); + rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); + break; + case IB_WR_REG_MR: + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + break; + case IB_WR_LOCAL_INV: + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); + fallthrough; + case IB_WR_SEND_WITH_INV: + rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); + break; + default: + return -EINVAL; + } + + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, + V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); + + return 0; +} static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, @@ -411,25 +571,16 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, unsigned int curr_idx = *sge_idx; unsigned int valid_num_sge; u32 msg_len = 0; - int ret = 0; + int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); rc_sq_wqe->msg_len = cpu_to_le32(msg_len); - switch (wr->opcode) { - case IB_WR_SEND_WITH_IMM: - case IB_WR_RDMA_WRITE_WITH_IMM: - rc_sq_wqe->immtdata = cpu_to_le32(be32_to_cpu(wr->ex.imm_data)); - break; - case IB_WR_SEND_WITH_INV: - rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); - break; - default: - rc_sq_wqe->immtdata = 0; - break; - } + ret = set_rc_opcode(rc_sq_wqe, wr); + if (WARN_ON(ret)) + return ret; roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S, (wr->send_flags & IB_SEND_FENCE) ? 1 : 0); @@ -443,33 +594,6 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S, owner_bit); - switch (wr->opcode) { - case IB_WR_RDMA_READ: - case IB_WR_RDMA_WRITE: - case IB_WR_RDMA_WRITE_WITH_IMM: - rc_sq_wqe->rkey = cpu_to_le32(rdma_wr(wr)->rkey); - rc_sq_wqe->va = cpu_to_le64(rdma_wr(wr)->remote_addr); - break; - case IB_WR_LOCAL_INV: - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); - rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); - break; - case IB_WR_REG_MR: - set_frmr_seg(rc_sq_wqe, reg_wr(wr)); - break; - case IB_WR_ATOMIC_CMP_AND_SWP: - case IB_WR_ATOMIC_FETCH_AND_ADD: - rc_sq_wqe->rkey = cpu_to_le32(atomic_wr(wr)->rkey); - rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); - break; - default: - break; - } - - roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, - V2_RC_SEND_WQE_BYTE_4_OPCODE_S, - to_hr_opcode(wr->opcode)); - if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) set_atomic_seg(wr, rc_sq_wqe, valid_num_sge); @@ -1682,7 +1806,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->max_sq_desc_sz = HNS_ROCE_V2_MAX_SQ_DESC_SZ; caps->max_rq_desc_sz = HNS_ROCE_V2_MAX_RQ_DESC_SZ; caps->max_srq_desc_sz = HNS_ROCE_V2_MAX_SRQ_DESC_SZ; - caps->qpc_entry_sz = HNS_ROCE_V2_QPC_ENTRY_SZ; + caps->qpc_sz = HNS_ROCE_V2_QPC_SZ; caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; caps->trrl_entry_sz = HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; @@ -1690,7 +1814,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; caps->idx_entry_sz = HNS_ROCE_V2_IDX_ENTRY_SZ; - caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE; + caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE; caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; caps->reserved_lkey = 0; caps->reserved_pds = 0; @@ -1739,6 +1863,8 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; caps->aeqe_depth = HNS_ROCE_V2_ASYNC_EQE_NUM; + caps->aeqe_size = HNS_ROCE_AEQE_SIZE; + caps->ceqe_size = HNS_ROCE_CEQE_SIZE; caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; @@ -1760,19 +1886,26 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->cqc_timer_buf_pg_sz = 0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->sccc_entry_sz = HNS_ROCE_V2_SCCC_ENTRY_SZ; + caps->sccc_sz = HNS_ROCE_V2_SCCC_SZ; caps->sccc_ba_pg_sz = 0; caps->sccc_buf_pg_sz = 0; caps->sccc_hop_num = HNS_ROCE_SCCC_HOP_NUM; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { + caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; + caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + } } static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, int *buf_page_size, int *bt_page_size, u32 hem_type) { u64 obj_per_chunk; - int bt_chunk_size = 1 << PAGE_SHIFT; - int buf_chunk_size = 1 << PAGE_SHIFT; - int obj_per_chunk_default = buf_chunk_size / obj_size; + u64 bt_chunk_size = PAGE_SIZE; + u64 buf_chunk_size = PAGE_SIZE; + u64 obj_per_chunk_default = buf_chunk_size / obj_size; *buf_page_size = 0; *bt_page_size = 0; @@ -1855,7 +1988,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; caps->max_rq_desc_sz = resp_a->max_rq_desc_sz; caps->max_srq_desc_sz = resp_a->max_srq_desc_sz; - caps->cq_entry_sz = resp_a->cq_entry_sz; + caps->cqe_sz = HNS_ROCE_V2_CQE_SIZE; caps->mtpt_entry_sz = resp_b->mtpt_entry_sz; caps->irrl_entry_sz = resp_b->irrl_entry_sz; @@ -1863,9 +1996,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqc_entry_sz = resp_b->cqc_entry_sz; caps->srqc_entry_sz = resp_b->srqc_entry_sz; caps->idx_entry_sz = resp_b->idx_entry_sz; - caps->sccc_entry_sz = resp_b->scc_ctx_entry_sz; + caps->sccc_sz = resp_b->sccc_sz; caps->max_mtu = resp_b->max_mtu; - caps->qpc_entry_sz = le16_to_cpu(resp_b->qpc_entry_sz); + caps->qpc_sz = HNS_ROCE_V2_QPC_SZ; caps->min_cqes = resp_b->min_cqes; caps->min_wqes = resp_b->min_wqes; caps->page_size_cap = le32_to_cpu(resp_b->page_size_cap); @@ -1958,6 +2091,8 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->cqc_timer_entry_sz = HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; + caps->ceqe_size = HNS_ROCE_CEQE_SIZE; + caps->aeqe_size = HNS_ROCE_AEQE_SIZE; caps->mtt_ba_pg_sz = 0; caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; @@ -1981,7 +2116,15 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M, V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S); - calc_pg_sz(caps->num_qps, caps->qpc_entry_sz, caps->qpc_hop_num, + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { + caps->ceqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->aeqe_size = HNS_ROCE_V3_EQE_SIZE; + caps->cqe_sz = HNS_ROCE_V3_CQE_SIZE; + caps->qpc_sz = HNS_ROCE_V3_QPC_SZ; + caps->sccc_sz = HNS_ROCE_V3_SCCC_SZ; + } + + calc_pg_sz(caps->num_qps, caps->qpc_sz, caps->qpc_hop_num, caps->qpc_bt_num, &caps->qpc_buf_pg_sz, &caps->qpc_ba_pg_sz, HEM_TYPE_QPC); calc_pg_sz(caps->num_mtpts, caps->mtpt_entry_sz, caps->mpt_hop_num, @@ -1998,7 +2141,7 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - calc_pg_sz(caps->num_qps, caps->sccc_entry_sz, + calc_pg_sz(caps->num_qps, caps->sccc_sz, caps->sccc_hop_num, caps->sccc_bt_num, &caps->sccc_buf_pg_sz, &caps->sccc_ba_pg_sz, HEM_TYPE_SCCC); @@ -2018,6 +2161,56 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_config_qpc_size(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_entry_size *cfg_size = + (struct hns_roce_cfg_entry_size *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE, + false); + + cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_QPC_SIZE); + cfg_size->size = cpu_to_le32(hr_dev->caps.qpc_sz); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + +static int hns_roce_config_sccc_size(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_cfg_entry_size *cfg_size = + (struct hns_roce_cfg_entry_size *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_ENTRY_SIZE, + false); + + cfg_size->type = cpu_to_le32(HNS_ROCE_CFG_SCCC_SIZE); + cfg_size->size = cpu_to_le32(hr_dev->caps.sccc_sz); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + +static int hns_roce_config_entry_size(struct hns_roce_dev *hr_dev) +{ + int ret; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return 0; + + ret = hns_roce_config_qpc_size(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "failed to cfg qpc sz, ret = %d.\n", ret); + return ret; + } + + ret = hns_roce_config_sccc_size(hr_dev); + if (ret) + dev_err(hr_dev->dev, "failed to cfg sccc sz, ret = %d.\n", ret); + + return ret; +} + static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) { struct hns_roce_caps *caps = &hr_dev->caps; @@ -2090,9 +2283,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) } ret = hns_roce_v2_set_bt(hr_dev); - if (ret) - dev_err(hr_dev->dev, "Configure bt attribute fail, ret = %d.\n", - ret); + if (ret) { + dev_err(hr_dev->dev, + "Configure bt attribute fail, ret = %d.\n", ret); + return ret; + } + + /* Configure the size of QPC, SCCC, etc. */ + ret = hns_roce_config_entry_size(hr_dev); return ret; } @@ -2757,8 +2955,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(hr_cq->mtr.kmem, - n * HNS_ROCE_V2_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, n * hr_cq->cqe_size); } static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) @@ -2858,6 +3055,10 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); + roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQE_SIZE_M, + V2_CQC_BYTE_8_CQE_SIZE_S, hr_cq->cqe_size == + HNS_ROCE_V3_CQE_SIZE ? 1 : 0); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, @@ -3025,7 +3226,8 @@ out: } static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, - struct hns_roce_v2_cqe *cqe, struct ib_wc *wc) + struct hns_roce_cq *cq, struct hns_roce_v2_cqe *cqe, + struct ib_wc *wc) { static const struct { u32 cqe_status; @@ -3066,7 +3268,7 @@ static void get_cqe_status(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, ibdev_err(&hr_dev->ib_dev, "error cqe status 0x%x:\n", cqe_status); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_NONE, 16, 4, cqe, - sizeof(*cqe), false); + cq->cqe_size, false); /* * For hns ROCEE, GENERAL_ERR is an error type that is not defined in @@ -3163,7 +3365,7 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, ++wq->tail; } - get_cqe_status(hr_dev, *cur_qp, cqe, wc); + get_cqe_status(hr_dev, *cur_qp, hr_cq, cqe, wc); if (unlikely(wc->status != IB_WC_SUCCESS)) return 0; @@ -3514,16 +3716,21 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, static int hns_roce_v2_qp_modify(struct hns_roce_dev *hr_dev, struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask, struct hns_roce_qp *hr_qp) { struct hns_roce_cmd_mailbox *mailbox; + int qpc_size; int ret; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - memcpy(mailbox->buf, context, sizeof(*context) * 2); + /* The qpc size of HIP08 is only 256B, which is half of HIP09 */ + qpc_size = hr_dev->caps.qpc_sz; + memcpy(mailbox->buf, context, qpc_size); + memcpy(mailbox->buf + qpc_size, qpc_mask, qpc_size); ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, HNS_ROCE_CMD_MODIFY_QPC, @@ -3641,9 +3848,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, V2_QPC_BYTE_76_SRQ_EN_S, 1); } - roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M, - V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 4); - roce_set_bit(context->byte_172_sq_psn, V2_QPC_BYTE_172_FRE_S, 1); hr_qp->access_flags = attr->qp_access_flags; @@ -3954,6 +4158,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t trrl_ba; dma_addr_t irrl_ba; enum ib_mtu mtu; + u8 lp_pktn_ini; u8 port_num; u64 *mtts; u8 *dmac; @@ -4052,6 +4257,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_52_DMAC_S, 0); mtu = get_mtu(ibqp, attr); + hr_qp->path_mtu = mtu; if (attr_mask & IB_QP_PATH_MTU) { roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, @@ -4061,13 +4267,21 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, } #define MAX_LP_MSG_LEN 65536 - /* MTU*(2^LP_PKTN_INI) shouldn't be bigger than 64kb */ + /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */ + lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu)); + roce_set_field(context->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, - V2_QPC_BYTE_56_LP_PKTN_INI_S, - ilog2(MAX_LP_MSG_LEN / ib_mtu_enum_to_int(mtu))); + V2_QPC_BYTE_56_LP_PKTN_INI_S, lp_pktn_ini); roce_set_field(qpc_mask->byte_56_dqpn_err, V2_QPC_BYTE_56_LP_PKTN_INI_M, V2_QPC_BYTE_56_LP_PKTN_INI_S, 0); + /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */ + roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_ACK_REQ_FREQ_M, + V2_QPC_BYTE_172_ACK_REQ_FREQ_S, lp_pktn_ini); + roce_set_field(qpc_mask->byte_172_sq_psn, + V2_QPC_BYTE_172_ACK_REQ_FREQ_M, + V2_QPC_BYTE_172_ACK_REQ_FREQ_S, 0); + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, V2_QPC_BYTE_108_RX_REQ_PSN_ERR_S, 0); roce_set_field(qpc_mask->byte_96_rx_reqmsn, V2_QPC_BYTE_96_RX_REQ_MSN_M, @@ -4164,6 +4378,14 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } +static inline u16 get_udp_sport(u32 fl, u32 lqpn, u32 rqpn) +{ + if (!fl) + fl = rdma_calc_flow_label(lqpn, rqpn); + + return rdma_flow_label_to_udp_sport(fl); +} + static int hns_roce_v2_set_path(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -4227,7 +4449,8 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S, - is_udp ? 0x12b7 : 0); + is_udp ? get_udp_sport(grh->flow_label, ibqp->qp_num, + attr->dest_qp_num) : 0); roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S, 0); @@ -4259,11 +4482,19 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp, V2_QPC_BYTE_28_FL_S, 0); memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); + + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + if (unlikely(hr_qp->sl > MAX_SERVICE_LEVEL)) { + ibdev_err(ibdev, + "failed to fill QPC, sl (%d) shouldn't be larger than %d.\n", + hr_qp->sl, MAX_SERVICE_LEVEL); + return -EINVAL; + } + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr)); + V2_QPC_BYTE_28_SL_S, hr_qp->sl); roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, V2_QPC_BYTE_28_SL_S, 0); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); return 0; } @@ -4309,7 +4540,7 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, } if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { - memset(qpc_mask, 0, sizeof(*qpc_mask)); + memset(qpc_mask, 0, hr_dev->caps.qpc_sz); modify_qp_reset_to_init(ibqp, attr, attr_mask, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { @@ -4532,8 +4763,9 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ - memset(context, 0, sizeof(*context)); - memset(qpc_mask, 0xff, sizeof(*qpc_mask)); + memset(context, 0, hr_dev->caps.qpc_sz); + memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz); + ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state, new_state, context, qpc_mask); if (ret) @@ -4583,7 +4815,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_60_QP_ST_S, 0); /* SW pass context to HW */ - ret = hns_roce_v2_qp_modify(hr_dev, ctx, hr_qp); + ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); if (ret) { ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret); goto out; @@ -4646,7 +4878,7 @@ static int hns_roce_v2_query_qpc(struct hns_roce_dev *hr_dev, if (ret) goto out; - memcpy(hr_context, mailbox->buf, sizeof(*hr_context)); + memcpy(hr_context, mailbox->buf, hr_dev->caps.qpc_sz); out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -4759,7 +4991,9 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, V2_QPC_BYTE_212_RETRY_CNT_S); - qp_attr->rnr_retry = le32_to_cpu(context.rq_rnr_timer); + qp_attr->rnr_retry = roce_get_field(context.byte_244_rnr_rxack, + V2_QPC_BYTE_244_RNR_CNT_M, + V2_QPC_BYTE_244_RNR_CNT_S); done: qp_attr->cur_qp_state = qp_attr->qp_state; @@ -4775,6 +5009,7 @@ done: } qp_init_attr->cap = qp_attr->cap; + qp_init_attr->sq_sig_type = hr_qp->sq_signal_bits; out: mutex_unlock(&hr_qp->mutex); @@ -5004,6 +5239,10 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, struct hns_roce_cmd_mailbox *mailbox; int ret; + /* Resizing SRQs is not supported yet */ + if (srq_attr_mask & IB_SRQ_MAX_WR) + return -EINVAL; + if (srq_attr_mask & IB_SRQ_LIMIT) { if (srq_attr->srq_limit >= srq->wqe_cnt) return -EINVAL; @@ -5233,7 +5472,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) aeqe = hns_roce_buf_offset(eq->mtr.kmem, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_AEQ_ENTRY_SIZE); + eq->eqe_size); return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ !!(eq->cons_index & eq->entries)) ? aeqe : NULL; @@ -5333,7 +5572,8 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) ceqe = hns_roce_buf_offset(eq->mtr.kmem, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_CEQ_ENTRY_SIZE); + eq->eqe_size); + return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; } @@ -5374,7 +5614,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) { struct hns_roce_eq *eq = eq_ptr; struct hns_roce_dev *hr_dev = eq->hr_dev; - int int_work = 0; + int int_work; if (eq->type_flag == HNS_ROCE_CEQ) /* Completion event interrupt */ @@ -5609,14 +5849,16 @@ static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M, HNS_ROCE_EQC_CONS_INDX_S, HNS_ROCE_EQ_INIT_CONS_IDX); - /* set nex_eqe_ba[43:12] */ - roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M, + roce_set_field(eqc->byte_40, HNS_ROCE_EQC_NXT_EQE_BA_L_M, HNS_ROCE_EQC_NXT_EQE_BA_L_S, eqe_ba[1] >> 12); - /* set nex_eqe_ba[63:44] */ - roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M, + roce_set_field(eqc->byte_44, HNS_ROCE_EQC_NXT_EQE_BA_H_M, HNS_ROCE_EQC_NXT_EQE_BA_H_S, eqe_ba[1] >> 44); + roce_set_field(eqc->byte_44, HNS_ROCE_EQC_EQE_SIZE_M, + HNS_ROCE_EQC_EQE_SIZE_S, + eq->eqe_size == HNS_ROCE_V3_EQE_SIZE ? 1 : 0); + return 0; } @@ -5807,7 +6049,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) eq_cmd = HNS_ROCE_CMD_CREATE_CEQC; eq->type_flag = HNS_ROCE_CEQ; eq->entries = hr_dev->caps.ceqe_depth; - eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE; + eq->eqe_size = hr_dev->caps.ceqe_size; eq->irq = hr_dev->irq[i + other_num + aeq_num]; eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM; eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL; @@ -5816,7 +6058,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) eq_cmd = HNS_ROCE_CMD_CREATE_AEQC; eq->type_flag = HNS_ROCE_AEQ; eq->entries = hr_dev->caps.aeqe_depth; - eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE; + eq->eqe_size = hr_dev->caps.aeqe_size; eq->irq = hr_dev->irq[i - comp_num + other_num]; eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM; eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index ac29be43b6bd..29c9dd4bcbc6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -60,6 +60,7 @@ #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 64 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 +#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_UAR_NUM 256 #define HNS_ROCE_V2_PHY_UAR_NUM 1 #define HNS_ROCE_V2_MAX_IRQ_NUM 65 @@ -77,7 +78,6 @@ #define HNS_ROCE_V2_MAX_SQ_DESC_SZ 64 #define HNS_ROCE_V2_MAX_RQ_DESC_SZ 16 #define HNS_ROCE_V2_MAX_SRQ_DESC_SZ 64 -#define HNS_ROCE_V2_QPC_ENTRY_SZ 256 #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 #define HNS_ROCE_V2_EXT_ATOMIC_TRRL_ENTRY_SZ 100 @@ -86,8 +86,10 @@ #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_IDX_ENTRY_SZ 4 -#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 -#define HNS_ROCE_V2_SCCC_ENTRY_SZ 32 + +#define HNS_ROCE_V2_SCCC_SZ 32 +#define HNS_ROCE_V3_SCCC_SZ 64 + #define HNS_ROCE_V2_QPC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 @@ -229,6 +231,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_QUERY_PF_TIMER_RES = 0x8406, HNS_ROCE_OPC_QUERY_PF_CAPS_NUM = 0x8408, + HNS_ROCE_OPC_CFG_ENTRY_SIZE = 0x8409, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, HNS_ROCE_OPC_POST_MB = 0x8504, @@ -309,6 +312,9 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_8_CQN_S 0 #define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0) +#define V2_CQC_BYTE_8_CQE_SIZE_S 27 +#define V2_CQC_BYTE_8_CQE_SIZE_M GENMASK(28, 27) + #define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S 0 #define V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M GENMASK(19, 0) @@ -512,6 +518,7 @@ struct hns_roce_v2_qp_context { __le32 byte_248_ack_psn; __le32 byte_252_err_txcqn; __le32 byte_256_sqflush_rqcqe; + __le32 ext[64]; }; #define V2_QPC_BYTE_4_TST_S 0 @@ -896,6 +903,7 @@ struct hns_roce_v2_cqe { u8 smac[4]; __le32 byte_28; __le32 byte_32; + __le32 rsv[8]; }; #define V2_CQE_BYTE_4_OPCODE_S 0 @@ -1187,6 +1195,8 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0 #define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0) +#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31 + struct hns_roce_wqe_frmr_seg { __le32 pbl_size; __le32 mode_buf_pg_sz; @@ -1537,6 +1547,18 @@ struct hns_roce_cfg_sgid_tb { __le32 vf_sgid_h; __le32 vf_sgid_type_rsv; }; + +enum { + HNS_ROCE_CFG_QPC_SIZE = BIT(0), + HNS_ROCE_CFG_SCCC_SIZE = BIT(1), +}; + +struct hns_roce_cfg_entry_size { + __le32 type; + __le32 rsv[4]; + __le32 size; +}; + #define CFG_SGID_TB_TABLE_IDX_S 0 #define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0) @@ -1571,7 +1593,7 @@ struct hns_roce_query_pf_caps_a { u8 max_sq_desc_sz; u8 max_rq_desc_sz; u8 max_srq_desc_sz; - u8 cq_entry_sz; + u8 cqe_sz; }; struct hns_roce_query_pf_caps_b { @@ -1581,9 +1603,9 @@ struct hns_roce_query_pf_caps_b { u8 cqc_entry_sz; u8 srqc_entry_sz; u8 idx_entry_sz; - u8 scc_ctx_entry_sz; + u8 sccc_sz; u8 max_mtu; - __le16 qpc_entry_sz; + __le16 qpc_sz; __le16 qpc_timer_entry_sz; __le16 cqc_timer_entry_sz; u8 min_cqes; @@ -1777,8 +1799,8 @@ struct hns_roce_eq_context { __le32 byte_28; __le32 byte_32; __le32 byte_36; - __le32 nxt_eqe_ba0; - __le32 nxt_eqe_ba1; + __le32 byte_40; + __le32 byte_44; __le32 rsv[5]; }; @@ -1920,6 +1942,9 @@ struct hns_roce_eq_context { #define HNS_ROCE_EQC_NXT_EQE_BA_H_S 0 #define HNS_ROCE_EQC_NXT_EQE_BA_H_M GENMASK(19, 0) +#define HNS_ROCE_EQC_EQE_SIZE_S 20 +#define HNS_ROCE_EQC_EQE_SIZE_M GENMASK(21, 20) + #define HNS_ROCE_V2_CEQE_COMP_CQN_S 0 #define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0) @@ -1941,6 +1966,8 @@ struct hns_roce_eq_context { #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0 #define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0) +#define MAX_SERVICE_LEVEL 0x7 + struct hns_roce_wqe_atomic_seg { __le64 fetchadd_swap_data; __le64 cmp_data; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 5907cfd878a6..afeffafc59f9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -141,8 +141,8 @@ static int hns_roce_netdev_event(struct notifier_block *self, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct hns_roce_ib_iboe *iboe = NULL; struct hns_roce_dev *hr_dev = NULL; - u8 port = 0; - int ret = 0; + int ret; + u8 port; hr_dev = container_of(self, struct hns_roce_dev, iboe.nb); iboe = &hr_dev->iboe; @@ -323,6 +323,8 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, mutex_init(&context->page_mutex); } + resp.cqe_size = hr_dev->caps.cqe_sz; + ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); if (ret) goto error_fail_copy_to_udata; @@ -454,6 +456,8 @@ static const struct ib_device_ops hns_roce_dev_mr_ops = { static const struct ib_device_ops hns_roce_dev_mw_ops = { .alloc_mw = hns_roce_alloc_mw, .dealloc_mw = hns_roce_dealloc_mw, + + INIT_RDMA_OBJ_SIZE(ib_mw, hns_roce_mw, ibmw), }; static const struct ib_device_ops hns_roce_dev_frmr_ops = { @@ -545,7 +549,8 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - ret = ib_register_device(ib_dev, "hns_%d"); + dma_set_max_seg_size(dev, UINT_MAX); + ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); return ret; @@ -587,7 +592,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table, - HEM_TYPE_QPC, hr_dev->caps.qpc_entry_sz, + HEM_TYPE_QPC, hr_dev->caps.qpc_sz, hr_dev->caps.num_qps, 1); if (ret) { dev_err(dev, "Failed to init QP context memory, aborting.\n"); @@ -638,11 +643,11 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) } } - if (hr_dev->caps.sccc_entry_sz) { + if (hr_dev->caps.sccc_sz) { ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.sccc_table, HEM_TYPE_SCCC, - hr_dev->caps.sccc_entry_sz, + hr_dev->caps.sccc_sz, hr_dev->caps.num_qps, 1); if (ret) { dev_err(dev, @@ -682,7 +687,7 @@ err_unmap_qpc_timer: hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qpc_timer_table); err_unmap_ctx: - if (hr_dev->caps.sccc_entry_sz) + if (hr_dev->caps.sccc_sz) hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.sccc_table); err_unmap_srq: diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index e5df3884b41d..7f81a695e9af 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -589,28 +589,22 @@ err_table: return ret; } -struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, - struct ib_udata *udata) +int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_pd->device); - struct hns_roce_mw *mw; + struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device); + struct hns_roce_mw *mw = to_hr_mw(ibmw); unsigned long index = 0; int ret; - mw = kmalloc(sizeof(*mw), GFP_KERNEL); - if (!mw) - return ERR_PTR(-ENOMEM); - /* Allocate a key for mw from bitmap */ ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); if (ret) - goto err_bitmap; + return ret; mw->rkey = hw_index_to_key(index); - mw->ibmw.rkey = mw->rkey; - mw->ibmw.type = type; - mw->pdn = to_hr_pd(ib_pd)->pdn; + ibmw->rkey = mw->rkey; + mw->pdn = to_hr_pd(ibmw->pd)->pdn; mw->pbl_hop_num = hr_dev->caps.pbl_hop_num; mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; @@ -619,15 +613,11 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, if (ret) goto err_mw; - return &mw->ibmw; + return 0; err_mw: hns_roce_mw_free(hr_dev, mw); - -err_bitmap: - kfree(mw); - - return ERR_PTR(ret); + return ret; } int hns_roce_dealloc_mw(struct ib_mw *ibmw) @@ -636,8 +626,6 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) struct hns_roce_mw *mw = to_hr_mw(ibmw); hns_roce_mw_free(hr_dev, mw); - kfree(mw); - return 0; } @@ -707,19 +695,6 @@ static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) return size; } -static inline int mtr_umem_page_count(struct ib_umem *umem, - unsigned int page_shift) -{ - int count = ib_umem_page_count(umem); - - if (page_shift >= PAGE_SHIFT) - count >>= page_shift - PAGE_SHIFT; - else - count <<= PAGE_SHIFT - page_shift; - - return count; -} - static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, unsigned int page_shift) { @@ -767,13 +742,11 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, struct ib_udata *udata, unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; - unsigned int max_pg_shift = buf_attr->page_shift; - unsigned int best_pg_shift = 0; + unsigned int best_pg_shift; int all_pg_count = 0; size_t direct_size; size_t total_size; - unsigned long tmp; - int ret = 0; + int ret; total_size = mtr_bufs_size(buf_attr); if (total_size < 1) { @@ -782,6 +755,9 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, } if (udata) { + unsigned long pgsz_bitmap; + unsigned long page_size; + mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); @@ -790,15 +766,17 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, PTR_ERR(mtr->umem)); return -ENOMEM; } - if (buf_attr->fixed_page) { - best_pg_shift = max_pg_shift; - } else { - tmp = GENMASK(max_pg_shift, 0); - ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr); - best_pg_shift = (ret <= PAGE_SIZE) ? - PAGE_SHIFT : ilog2(ret); - } - all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift); + if (buf_attr->fixed_page) + pgsz_bitmap = 1 << buf_attr->page_shift; + else + pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT); + + page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap, + user_addr); + if (!page_size) + return -EINVAL; + best_pg_shift = order_base_2(page_size); + all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size); ret = 0; } else { mtr->umem = NULL; @@ -808,16 +786,15 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return -ENOMEM; } direct_size = mtr_kmem_direct_size(is_direct, total_size, - max_pg_shift); + buf_attr->page_shift); ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, - mtr->kmem, max_pg_shift); + mtr->kmem, buf_attr->page_shift); if (ret) { ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); goto err_alloc_mem; - } else { - best_pg_shift = max_pg_shift; - all_pg_count = mtr->kmem->npages; } + best_pg_shift = buf_attr->page_shift; + all_pg_count = mtr->kmem->npages; } /* must bigger than minimum hardware page shift */ @@ -967,7 +944,7 @@ static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, unsigned int *buf_page_shift) { struct hns_roce_buf_region *r; - unsigned int page_shift = 0; + unsigned int page_shift; int page_cnt = 0; size_t buf_size; int region_cnt; diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index b10c50b8736e..98f69496adb4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -82,9 +82,10 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -void hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int hns_roce_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn); + return 0; } int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index c063c450c715..6c081dd985fc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -41,8 +41,6 @@ #include "hns_roce_hem.h" #include -#define SQP_NUM (2 * HNS_ROCE_MAX_PORTS) - static void flush_work_handle(struct work_struct *work) { struct hns_roce_work *flush_work = container_of(work, @@ -288,7 +286,7 @@ static int alloc_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) } } - if (hr_dev->caps.sccc_entry_sz) { + if (hr_dev->caps.sccc_sz) { /* Alloc memory for SCC CTX */ ret = hns_roce_table_get(hr_dev, &qp_table->sccc_table, hr_qp->qpn); @@ -551,10 +549,9 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, int ret; if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || - cap->max_send_sge > hr_dev->caps.max_sq_sg || - cap->max_inline_data > hr_dev->caps.max_sq_inline) { + cap->max_send_sge > hr_dev->caps.max_sq_sg) { ibdev_err(ibdev, - "failed to check SQ WR, SGE or inline num, ret = %d.\n", + "failed to check SQ WR or SGE num, ret = %d.\n", -EINVAL); return -EINVAL; } @@ -577,9 +574,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, cap->max_send_wr = cnt; cap->max_send_sge = hr_qp->sq.max_gs; - /* We don't support inline sends for kernel QPs (yet) */ - cap->max_inline_data = 0; - return 0; } @@ -847,6 +841,11 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->ibqp.qp_type = init_attr->qp_type; + if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline) + init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline; + + hr_qp->max_inline_data = init_attr->cap.max_inline_data; + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; else @@ -1014,53 +1013,32 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, int ret; switch (init_attr->qp_type) { - case IB_QPT_RC: { - hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); - if (!hr_qp) - return ERR_PTR(-ENOMEM); - - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, - hr_qp); - if (ret) { - ibdev_err(ibdev, "Create QP 0x%06lx failed(%d)\n", - hr_qp->qpn, ret); - kfree(hr_qp); - return ERR_PTR(ret); - } - + case IB_QPT_RC: + case IB_QPT_GSI: break; - } - case IB_QPT_GSI: { - /* Userspace is not allowed to create special QPs: */ - if (udata) { - ibdev_err(ibdev, "not support usr space GSI\n"); - return ERR_PTR(-EINVAL); - } - - hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); - if (!hr_qp) - return ERR_PTR(-ENOMEM); - - hr_qp->port = init_attr->port_num - 1; - hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; - - ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, - hr_qp); - if (ret) { - ibdev_err(ibdev, "Create GSI QP failed!\n"); - kfree(hr_qp); - return ERR_PTR(ret); - } - - break; - } - default:{ + default: ibdev_err(ibdev, "not support QP type %d\n", init_attr->qp_type); return ERR_PTR(-EOPNOTSUPP); } + + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (!hr_qp) + return ERR_PTR(-ENOMEM); + + if (init_attr->qp_type == IB_QPT_GSI) { + hr_qp->port = init_attr->port_num - 1; + hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; } + ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp); + if (ret) { + ibdev_err(ibdev, "Create QP type 0x%x failed(%d)\n", + init_attr->qp_type, ret); + ibdev_err(ibdev, "Create GSI QP failed!\n"); + kfree(hr_qp); + return ERR_PTR(ret); + } return &hr_qp->ibqp; } @@ -1161,8 +1139,10 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, mutex_lock(&hr_qp->mutex); - cur_state = attr_mask & IB_QP_CUR_STATE ? - attr->cur_qp_state : (enum ib_qp_state)hr_qp->state; + if (attr_mask & IB_QP_CUR_STATE && attr->cur_qp_state != hr_qp->state) + goto out; + + cur_state = hr_qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (ibqp->uobject && diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index b9e2dbd372b6..8caf74e44efd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -285,7 +285,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_srq *srq = to_hr_srq(ib_srq); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_srq ucmd = {}; - int ret = 0; + int ret; u32 cqn; /* Check the actual SRQ wqe and SRQ sge num */ @@ -363,7 +363,7 @@ err_buf_alloc: return ret; } -void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); @@ -372,6 +372,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) free_srq_idx(hr_dev, srq); free_srq_wrid(srq); free_srq_buf(hr_dev, srq); + return 0; } int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 25747b85a79c..832b80de004f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -409,8 +409,8 @@ static inline struct i40iw_qp *to_iwqp(struct ib_qp *ibqp) } /* i40iw.c */ -void i40iw_add_ref(struct ib_qp *); -void i40iw_rem_ref(struct ib_qp *); +void i40iw_qp_add_ref(struct ib_qp *ibqp); +void i40iw_qp_rem_ref(struct ib_qp *ibqp); struct ib_qp *i40iw_get_qp(struct ib_device *, int); void i40iw_flush_wqes(struct i40iw_device *iwdev, @@ -554,9 +554,8 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, bool wait); void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf); void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp); -void i40iw_free_qp_resources(struct i40iw_device *iwdev, - struct i40iw_qp *iwqp, - u32 qp_num); +void i40iw_free_qp_resources(struct i40iw_qp *iwqp); + enum i40iw_status_code i40iw_obj_aligned_mem(struct i40iw_device *iwdev, struct i40iw_dma_mem *memptr, u32 size, u32 mask); diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index a3b95805c154..3053c345a5a3 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -2322,7 +2322,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node) iwqp = cm_node->iwqp; if (iwqp) { iwqp->cm_node = NULL; - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); cm_node->iwqp = NULL; } else if (cm_node->qhash_set) { i40iw_get_addr_info(cm_node, &nfo); @@ -3452,7 +3452,7 @@ void i40iw_cm_disconn(struct i40iw_qp *iwqp) kfree(work); return; } - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&iwdev->qptable_lock, flags); work->iwqp = iwqp; @@ -3623,7 +3623,7 @@ static void i40iw_disconnect_worker(struct work_struct *work) kfree(dwork); i40iw_cm_disconn_true(iwqp); - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** @@ -3745,7 +3745,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->lsmm_size = accept.size + conn_param->private_data_len; i40iw_cm_init_tsa_conn(iwqp, cm_node); cm_id->add_ref(cm_id); - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); attr.qp_state = IB_QPS_RTS; cm_node->qhash_set = false; @@ -3908,7 +3908,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) iwqp->cm_node = cm_node; cm_node->iwqp = iwqp; iwqp->cm_id = cm_id; - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); if (cm_node->state != I40IW_CM_STATE_OFFLOADED) { cm_node->state = I40IW_CM_STATE_SYN_SENT; diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index e1085634b8d9..56fdc161f6f8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -313,7 +313,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) __func__, info->qp_cq_id); continue; } - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); spin_unlock_irqrestore(&iwdev->qptable_lock, flags); qp = &iwqp->sc_qp; spin_lock_irqsave(&iwqp->lock, flags); @@ -426,7 +426,7 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) break; } if (info->qp) - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } while (1); if (aeqcnt) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 58a433135a03..2408b279e4c2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -192,9 +192,9 @@ static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id) * i40iw_dpc - tasklet for aeq and ceq 0 * @data: iwarp device */ -static void i40iw_dpc(unsigned long data) +static void i40iw_dpc(struct tasklet_struct *t) { - struct i40iw_device *iwdev = (struct i40iw_device *)data; + struct i40iw_device *iwdev = from_tasklet(iwdev, t, dpc_tasklet); if (iwdev->msix_shared) i40iw_process_ceq(iwdev, iwdev->ceqlist); @@ -206,9 +206,9 @@ static void i40iw_dpc(unsigned long data) * i40iw_ceq_dpc - dpc handler for CEQ * @data: data points to CEQ */ -static void i40iw_ceq_dpc(unsigned long data) +static void i40iw_ceq_dpc(struct tasklet_struct *t) { - struct i40iw_ceq *iwceq = (struct i40iw_ceq *)data; + struct i40iw_ceq *iwceq = from_tasklet(iwceq, t, dpc_tasklet); struct i40iw_device *iwdev = iwceq->iwdev; i40iw_process_ceq(iwdev, iwceq); @@ -689,10 +689,10 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw enum i40iw_status_code status; if (iwdev->msix_shared && !ceq_id) { - tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); + tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc); status = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "AEQCEQ", iwdev); } else { - tasklet_init(&iwceq->dpc_tasklet, i40iw_ceq_dpc, (unsigned long)iwceq); + tasklet_setup(&iwceq->dpc_tasklet, i40iw_ceq_dpc); status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq); } @@ -841,7 +841,7 @@ static enum i40iw_status_code i40iw_configure_aeq_vector(struct i40iw_device *iw u32 ret = 0; if (!iwdev->msix_shared) { - tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev); + tasklet_setup(&iwdev->dpc_tasklet, i40iw_dpc); ret = request_irq(msix_vec->irq, i40iw_irq_handler, 0, "i40iw", iwdev); } if (ret) { @@ -1573,7 +1573,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl, status = i40iw_save_msix_info(iwdev, ldev); if (status) return status; - iwdev->hw.dev_context = (void *)ldev->pcidev; + iwdev->hw.pcidev = ldev->pcidev; iwdev->hw.hw_addr = ldev->hw_addr; status = i40iw_allocate_dma_mem(&iwdev->hw, &iwdev->obj_mem, 8192, 4096); diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index 540aab5e502d..5f97643e22e5 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -167,7 +167,7 @@ static enum i40iw_status_code add_sd_direct(struct i40iw_sc_dev *dev, */ static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; int i; if (!chunk->pg_cnt) @@ -193,7 +193,7 @@ static enum i40iw_status_code i40iw_get_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chunk, int pg_cnt) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; struct page *page; u8 *addr; u32 size; diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h index 54c323c40d96..c3babf3cbb8e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_type.h +++ b/drivers/infiniband/hw/i40iw/i40iw_type.h @@ -73,6 +73,7 @@ struct i40iw_pd_ops; struct i40iw_priv_qp_ops; struct i40iw_priv_cq_ops; struct i40iw_hmc_ops; +struct pci_dev; enum i40iw_page_size { I40IW_PAGE_SIZE_4K, @@ -261,7 +262,7 @@ struct i40iw_vsi_pestat { struct i40iw_hw { u8 __iomem *hw_addr; - void *dev_context; + struct pci_dev *pcidev; struct i40iw_hmc_info hmc; }; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index e07fb37af086..644f8c641aa0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -477,25 +477,6 @@ void i40iw_cleanup_pending_cqp_op(struct i40iw_device *iwdev) } } -/** - * i40iw_free_qp - callback after destroy cqp completes - * @cqp_request: cqp request for destroy qp - * @num: not used - */ -static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num) -{ - struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)cqp_request->param; - struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp; - struct i40iw_device *iwdev; - u32 qp_num = iwqp->ibqp.qp_num; - - iwdev = iwqp->iwdev; - - i40iw_rem_pdusecount(iwqp->iwpd, iwdev); - i40iw_free_qp_resources(iwdev, iwqp, qp_num); - i40iw_rem_devusecount(iwdev); -} - /** * i40iw_wait_event - wait for completion * @iwdev: iwarp device @@ -616,26 +597,23 @@ void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev) } /** - * i40iw_add_ref - add refcount for qp + * i40iw_qp_add_ref - add refcount for qp * @ibqp: iqarp qp */ -void i40iw_add_ref(struct ib_qp *ibqp) +void i40iw_qp_add_ref(struct ib_qp *ibqp) { struct i40iw_qp *iwqp = (struct i40iw_qp *)ibqp; - atomic_inc(&iwqp->refcount); + refcount_inc(&iwqp->refcount); } /** - * i40iw_rem_ref - rem refcount for qp and free if 0 + * i40iw_qp_rem_ref - rem refcount for qp and free if 0 * @ibqp: iqarp qp */ -void i40iw_rem_ref(struct ib_qp *ibqp) +void i40iw_qp_rem_ref(struct ib_qp *ibqp) { struct i40iw_qp *iwqp; - enum i40iw_status_code status; - struct i40iw_cqp_request *cqp_request; - struct cqp_commands_info *cqp_info; struct i40iw_device *iwdev; u32 qp_num; unsigned long flags; @@ -643,7 +621,7 @@ void i40iw_rem_ref(struct ib_qp *ibqp) iwqp = to_iwqp(ibqp); iwdev = iwqp->iwdev; spin_lock_irqsave(&iwdev->qptable_lock, flags); - if (!atomic_dec_and_test(&iwqp->refcount)) { + if (!refcount_dec_and_test(&iwqp->refcount)) { spin_unlock_irqrestore(&iwdev->qptable_lock, flags); return; } @@ -651,25 +629,8 @@ void i40iw_rem_ref(struct ib_qp *ibqp) qp_num = iwqp->ibqp.qp_num; iwdev->qp_table[qp_num] = NULL; spin_unlock_irqrestore(&iwdev->qptable_lock, flags); - cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false); - if (!cqp_request) - return; + complete(&iwqp->free_qp); - cqp_request->callback_fcn = i40iw_free_qp; - cqp_request->param = (void *)&iwqp->sc_qp; - cqp_info = &cqp_request->info; - cqp_info->cqp_cmd = OP_QP_DESTROY; - cqp_info->post_sq = 1; - cqp_info->in.u.qp_destroy.qp = &iwqp->sc_qp; - cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request; - cqp_info->in.u.qp_destroy.remove_hash_idx = true; - status = i40iw_handle_cqp_op(iwdev, cqp_request); - if (!status) - return; - - i40iw_rem_pdusecount(iwqp->iwpd, iwdev); - i40iw_free_qp_resources(iwdev, iwqp, qp_num); - i40iw_rem_devusecount(iwdev); } /** @@ -751,7 +712,7 @@ enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw, u64 size, u32 alignment) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; if (!mem) return I40IW_ERR_PARAM; @@ -770,7 +731,7 @@ enum i40iw_status_code i40iw_allocate_dma_mem(struct i40iw_hw *hw, */ void i40iw_free_dma_mem(struct i40iw_hw *hw, struct i40iw_dma_mem *mem) { - struct pci_dev *pcidev = (struct pci_dev *)hw->dev_context; + struct pci_dev *pcidev = hw->pcidev; if (!mem || !mem->va) return; @@ -936,7 +897,7 @@ static void i40iw_terminate_timeout(struct timer_list *t) struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp; i40iw_terminate_done(qp, 1); - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** @@ -948,7 +909,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp) struct i40iw_qp *iwqp; iwqp = (struct i40iw_qp *)qp->back_qp; - i40iw_add_ref(&iwqp->ibqp); + i40iw_qp_add_ref(&iwqp->ibqp); timer_setup(&iwqp->terminate_timer, i40iw_terminate_timeout, 0); iwqp->terminate_timer.expires = jiffies + HZ; add_timer(&iwqp->terminate_timer); @@ -964,7 +925,7 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp) iwqp = (struct i40iw_qp *)qp->back_qp; if (del_timer(&iwqp->terminate_timer)) - i40iw_rem_ref(&iwqp->ibqp); + i40iw_qp_rem_ref(&iwqp->ibqp); } /** diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index b51339328a51..581ecbadf586 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -328,12 +328,13 @@ error: * @ibpd: ptr of pd to be deallocated * @udata: user data or null for kernel object */ -static void i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +static int i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct i40iw_pd *iwpd = to_iwpd(ibpd); struct i40iw_device *iwdev = to_iwdev(ibpd->device); i40iw_rem_pdusecount(iwpd, iwdev); + return 0; } /** @@ -363,11 +364,11 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, * @iwqp: qp ptr (user or kernel) * @qp_num: qp number assigned */ -void i40iw_free_qp_resources(struct i40iw_device *iwdev, - struct i40iw_qp *iwqp, - u32 qp_num) +void i40iw_free_qp_resources(struct i40iw_qp *iwqp) { struct i40iw_pbl *iwpbl = &iwqp->iwpbl; + struct i40iw_device *iwdev = iwqp->iwdev; + u32 qp_num = iwqp->ibqp.qp_num; i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp); i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp); @@ -379,7 +380,7 @@ void i40iw_free_qp_resources(struct i40iw_device *iwdev, i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem); kfree(iwqp->kqp.wrid_mem); iwqp->kqp.wrid_mem = NULL; - kfree(iwqp->allocated_buffer); + kfree(iwqp); } /** @@ -401,6 +402,10 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq) static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct i40iw_qp *iwqp = to_iwqp(ibqp); + struct ib_qp_attr attr; + struct i40iw_device *iwdev = iwqp->iwdev; + + memset(&attr, 0, sizeof(attr)); iwqp->destroyed = 1; @@ -415,7 +420,15 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) } } - i40iw_rem_ref(&iwqp->ibqp); + attr.qp_state = IB_QPS_ERR; + i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL); + i40iw_qp_rem_ref(&iwqp->ibqp); + wait_for_completion(&iwqp->free_qp); + i40iw_cqp_qp_destroy_cmd(&iwdev->sc_dev, &iwqp->sc_qp); + i40iw_rem_pdusecount(iwqp->iwpd, iwdev); + i40iw_free_qp_resources(iwqp); + i40iw_rem_devusecount(iwdev); + return 0; } @@ -524,7 +537,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, struct i40iw_create_qp_req req; struct i40iw_create_qp_resp uresp; u32 qp_num = 0; - void *mem; enum i40iw_status_code ret; int err_code; int sq_size; @@ -566,16 +578,15 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; - mem = kzalloc(sizeof(*iwqp), GFP_KERNEL); - if (!mem) + iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL); + if (!iwqp) return ERR_PTR(-ENOMEM); - iwqp = (struct i40iw_qp *)mem; - iwqp->allocated_buffer = mem; qp = &iwqp->sc_qp; qp->back_qp = (void *)iwqp; qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX; + iwqp->iwdev = iwdev; iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info; if (i40iw_allocate_dma_mem(dev->hw, @@ -600,7 +611,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - iwqp->iwdev = iwdev; iwqp->iwpd = iwpd; iwqp->ibqp.qp_num = qp_num; qp = &iwqp->sc_qp; @@ -714,7 +724,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } - i40iw_add_ref(&iwqp->ibqp); + refcount_set(&iwqp->refcount, 1); spin_lock_init(&iwqp->lock); iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; iwdev->qp_table[qp_num] = iwqp; @@ -736,10 +746,11 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, } init_completion(&iwqp->sq_drained); init_completion(&iwqp->rq_drained); + init_completion(&iwqp->free_qp); return &iwqp->ibqp; error: - i40iw_free_qp_resources(iwdev, iwqp, qp_num); + i40iw_free_qp_resources(iwqp); return ERR_PTR(err_code); } @@ -1052,7 +1063,7 @@ void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq) * @ib_cq: cq pointer * @udata: user data or NULL for kernel object */ -static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) +static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct i40iw_cq *iwcq; struct i40iw_device *iwdev; @@ -1064,6 +1075,7 @@ static void i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) i40iw_cq_wq_destroy(iwdev, cq); cq_free_resources(iwdev, iwcq); i40iw_rem_devusecount(iwdev); + return 0; } /** @@ -1320,8 +1332,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, if (iwmr->type == IW_MEMREG_TYPE_QP) iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl); - rdma_for_each_block(region->sg_head.sgl, &biter, region->nmap, - iwmr->page_size) { + rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) { *pbl = rdma_block_iter_dma_address(&biter); pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx); } @@ -1744,15 +1755,12 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, struct i40iw_mr *iwmr; struct ib_umem *region; struct i40iw_mem_reg_req req; - u64 pbl_depth = 0; u32 stag = 0; u16 access; - u64 region_length; bool use_pbles = false; unsigned long flags; int err = -ENOSYS; int ret; - int pg_shift; if (!udata) return ERR_PTR(-EOPNOTSUPP); @@ -1787,18 +1795,13 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd, if (req.reg_type == IW_MEMREG_TYPE_MEM) iwmr->page_size = ib_umem_find_best_pgsz(region, SZ_4K | SZ_2M, virt); - - region_length = region->length + (start & (iwmr->page_size - 1)); - pg_shift = ffs(iwmr->page_size) - 1; - pbl_depth = region_length >> pg_shift; - pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0; iwmr->length = region->length; iwpbl->user_base = virt; palloc = &iwpbl->pble_alloc; iwmr->type = req.reg_type; - iwmr->page_cnt = (u32)pbl_depth; + iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size); switch (req.reg_type) { case IW_MEMREG_TYPE_QP: @@ -2636,13 +2639,13 @@ static const struct ib_device_ops i40iw_dev_ops = { .get_hw_stats = i40iw_get_hw_stats, .get_port_immutable = i40iw_port_immutable, .iw_accept = i40iw_accept, - .iw_add_ref = i40iw_add_ref, + .iw_add_ref = i40iw_qp_add_ref, .iw_connect = i40iw_connect, .iw_create_listen = i40iw_create_listen, .iw_destroy_listen = i40iw_destroy_listen, .iw_get_qp = i40iw_get_qp, .iw_reject = i40iw_reject, - .iw_rem_ref = i40iw_rem_ref, + .iw_rem_ref = i40iw_qp_rem_ref, .map_mr_sg = i40iw_map_mr_sg, .mmap = i40iw_mmap, .modify_qp = i40iw_modify_qp, @@ -2668,7 +2671,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev { struct i40iw_ib_device *iwibdev; struct net_device *netdev = iwdev->netdev; - struct pci_dev *pcidev = (struct pci_dev *)iwdev->hw.dev_context; + struct pci_dev *pcidev = iwdev->hw.pcidev; iwibdev = ib_alloc_device(i40iw_ib_device, ibdev); if (!iwibdev) { @@ -2758,7 +2761,8 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) if (ret) goto error; - ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); + dma_set_max_seg_size(&iwdev->hw.pcidev->dev, UINT_MAX); + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", &iwdev->hw.pcidev->dev); if (ret) goto error; diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h index 331bc21cbcc7..bab71f3e5637 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h @@ -139,7 +139,7 @@ struct i40iw_qp { struct i40iw_qp_host_ctx_info ctx_info; struct i40iwarp_offload_info iwarp_info; void *allocated_buffer; - atomic_t refcount; + refcount_t refcount; struct iw_cm_id *cm_id; void *cm_node; struct ib_mr *lsmm_mr; @@ -174,5 +174,6 @@ struct i40iw_qp { struct i40iw_dma_mem ietf_mem; struct completion sq_drained; struct completion rq_drained; + struct completion free_qp; }; #endif diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 5f8f8d5c0ce0..7321d6ab5fe1 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -232,8 +232,3 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } - -void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) -{ - return; -} diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c index b591861934b3..4aff1c8298b1 100644 --- a/drivers/infiniband/hw/mlx4/cm.c +++ b/drivers/infiniband/hw/mlx4/cm.c @@ -54,11 +54,20 @@ struct id_map_entry { struct delayed_work timeout; }; +struct rej_tmout_entry { + int slave; + u32 rem_pv_cm_id; + struct delayed_work timeout; + struct xarray *xa_rej_tmout; +}; + struct cm_generic_msg { struct ib_mad_hdr hdr; __be32 local_comm_id; __be32 remote_comm_id; + unsigned char unused[2]; + __be16 rej_reason; }; struct cm_sidr_generic_msg { @@ -280,11 +289,15 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id) if (!sriov->is_going_down && !id->scheduled_delete) { id->scheduled_delete = 1; schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT); + } else if (id->scheduled_delete) { + /* Adjust timeout if already scheduled */ + mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT); } spin_unlock_irqrestore(&sriov->going_down_lock, flags); spin_unlock(&sriov->id_map_lock); } +#define REJ_REASON(m) be16_to_cpu(((struct cm_generic_msg *)(m))->rej_reason) int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id, struct ib_mad *mad) { @@ -293,8 +306,10 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id int pv_cm_id = -1; if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || - mad->mad_hdr.attr_id == CM_REP_ATTR_ID || - mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { + mad->mad_hdr.attr_id == CM_REP_ATTR_ID || + mad->mad_hdr.attr_id == CM_MRA_ATTR_ID || + mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID || + (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && REJ_REASON(mad) == IB_CM_REJ_TIMEOUT)) { sl_cm_id = get_local_comm_id(mad); id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id); if (id) @@ -314,8 +329,8 @@ int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id } if (!id) { - pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n", - slave_id, sl_cm_id); + pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL! attr_id: 0x%x\n", + slave_id, sl_cm_id, be16_to_cpu(mad->mad_hdr.attr_id)); return -EINVAL; } @@ -327,11 +342,94 @@ cont: return 0; } +static void rej_tmout_timeout(struct work_struct *work) +{ + struct delayed_work *delay = to_delayed_work(work); + struct rej_tmout_entry *item = container_of(delay, struct rej_tmout_entry, timeout); + struct rej_tmout_entry *deleted; + + deleted = xa_cmpxchg(item->xa_rej_tmout, item->rem_pv_cm_id, item, NULL, 0); + + if (deleted != item) + pr_debug("deleted(%p) != item(%p)\n", deleted, item); + + kfree(item); +} + +static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int slave) +{ + struct rej_tmout_entry *item; + struct rej_tmout_entry *old; + int ret = 0; + + xa_lock(&sriov->xa_rej_tmout); + item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id); + + if (item) { + if (xa_err(item)) + ret = xa_err(item); + else + /* If a retry, adjust delayed work */ + mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT); + goto err_or_exists; + } + xa_unlock(&sriov->xa_rej_tmout); + + item = kmalloc(sizeof(*item), GFP_KERNEL); + if (!item) + return -ENOMEM; + + INIT_DELAYED_WORK(&item->timeout, rej_tmout_timeout); + item->slave = slave; + item->rem_pv_cm_id = rem_pv_cm_id; + item->xa_rej_tmout = &sriov->xa_rej_tmout; + + old = xa_cmpxchg(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id, NULL, item, GFP_KERNEL); + if (old) { + pr_debug( + "Non-null old entry (%p) or error (%d) when inserting\n", + old, xa_err(old)); + kfree(item); + return xa_err(old); + } + + schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT); + + return 0; + +err_or_exists: + xa_unlock(&sriov->xa_rej_tmout); + return ret; +} + +static int lookup_rej_tmout_slave(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id) +{ + struct rej_tmout_entry *item; + int slave; + + xa_lock(&sriov->xa_rej_tmout); + item = xa_load(&sriov->xa_rej_tmout, (unsigned long)rem_pv_cm_id); + + if (!item || xa_err(item)) { + pr_debug("Could not find slave. rem_pv_cm_id 0x%x error: %d\n", + rem_pv_cm_id, xa_err(item)); + slave = !item ? -ENOENT : xa_err(item); + } else { + slave = item->slave; + } + xa_unlock(&sriov->xa_rej_tmout); + + return slave; +} + int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, struct ib_mad *mad) { + struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov; + u32 rem_pv_cm_id = get_local_comm_id(mad); u32 pv_cm_id; struct id_map_entry *id; + int sts; if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID || mad->mad_hdr.attr_id == CM_SIDR_REQ_ATTR_ID) { @@ -347,6 +445,13 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, be64_to_cpu(gid.global.interface_id)); return -ENOENT; } + + sts = alloc_rej_tmout(sriov, rem_pv_cm_id, *slave); + if (sts) + /* Even if this fails, we pass on the REQ to the slave */ + pr_debug("Could not allocate rej_tmout entry. rem_pv_cm_id 0x%x slave %d status %d\n", + rem_pv_cm_id, *slave, sts); + return 0; } @@ -354,7 +459,14 @@ int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave, id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1); if (!id) { - pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id); + if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID && + REJ_REASON(mad) == IB_CM_REJ_TIMEOUT && slave) { + *slave = lookup_rej_tmout_slave(sriov, rem_pv_cm_id); + + return (*slave < 0) ? *slave : 0; + } + pr_debug("Couldn't find an entry for pv_cm_id 0x%x, attr_id 0x%x\n", + pv_cm_id, be16_to_cpu(mad->mad_hdr.attr_id)); return -ENOENT; } @@ -375,6 +487,34 @@ void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev) INIT_LIST_HEAD(&dev->sriov.cm_list); dev->sriov.sl_id_map = RB_ROOT; xa_init_flags(&dev->sriov.pv_id_table, XA_FLAGS_ALLOC); + xa_init(&dev->sriov.xa_rej_tmout); +} + +static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave) +{ + struct rej_tmout_entry *item; + bool flush_needed = false; + unsigned long id; + int cnt = 0; + + xa_lock(&sriov->xa_rej_tmout); + xa_for_each(&sriov->xa_rej_tmout, id, item) { + if (slave < 0 || slave == item->slave) { + mod_delayed_work(system_wq, &item->timeout, 0); + flush_needed = true; + ++cnt; + } + } + xa_unlock(&sriov->xa_rej_tmout); + + if (flush_needed) { + flush_scheduled_work(); + pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n", + cnt, slave); + } + + if (slave < 0) + WARN_ON(!xa_empty(&sriov->xa_rej_tmout)); } /* slave = -1 ==> all slaves */ @@ -444,4 +584,6 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave) list_del(&map->list); kfree(map); } + + rej_tmout_xa_cleanup(sriov, slave); } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 8a3436994f80..e9b5a4d57fb1 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -149,7 +149,6 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_udata *udata, if (IS_ERR(*umem)) return PTR_ERR(*umem); - n = ib_umem_page_count(*umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n); err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt); @@ -475,7 +474,7 @@ out: return err; } -void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(cq->device); struct mlx4_ib_cq *mcq = to_mcq(cq); @@ -495,6 +494,7 @@ void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) mlx4_db_free(dev->dev, &mcq->db); } ib_umem_release(mcq->umem); + return 0; } static void dump_cqe(void *cqe) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index abe68708d6d6..8bd16474708f 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -500,6 +500,13 @@ static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid, sgid, dgid); } +static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) +{ + int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; + + return (qpn >= proxy_start && qpn <= proxy_start + 1); +} + int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad) @@ -520,8 +527,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, u16 cached_pkey; u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; - if (dest_qpt > IB_QPT_GSI) + if (dest_qpt > IB_QPT_GSI) { + pr_debug("dest_qpt (%d) > IB_QPT_GSI\n", dest_qpt); return -EINVAL; + } tun_ctx = dev->sriov.demux[port-1].tun[slave]; @@ -538,12 +547,20 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, if (dest_qpt) { u16 pkey_ix; ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey); - if (ret) + if (ret) { + pr_debug("unable to get %s cached pkey for index %d, ret %d\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", + wc->pkey_index, ret); return -EINVAL; + } ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix); - if (ret) + if (ret) { + pr_debug("unable to get %s pkey ix for pkey 0x%x, ret %d\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", + cached_pkey, ret); return -EINVAL; + } tun_pkey_ix = pkey_ix; } else tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; @@ -715,7 +732,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); if (err) - pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", slave, err); return 0; } @@ -794,7 +812,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad); if (err) - pr_debug("failed sending to slave %d via tunnel qp (%d)\n", + pr_debug("failed sending %s to slave %d via tunnel qp (%d)\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", slave, err); return 0; } @@ -807,27 +826,6 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, int err; struct ib_port_attr pattr; - if (in_wc && in_wc->qp) { - pr_debug("received MAD: port:%d slid:%d sqpn:%d " - "dlid_bits:%d dqpn:%d wc_flags:0x%x tid:%016llx cls:%x mtd:%x atr:%x\n", - port_num, - in_wc->slid, in_wc->src_qp, - in_wc->dlid_path_bits, - in_wc->qp->qp_num, - in_wc->wc_flags, - be64_to_cpu(in_mad->mad_hdr.tid), - in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method, - be16_to_cpu(in_mad->mad_hdr.attr_id)); - if (in_wc->wc_flags & IB_WC_GRH) { - pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n", - be64_to_cpu(in_grh->sgid.global.subnet_prefix), - be64_to_cpu(in_grh->sgid.global.interface_id)); - pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n", - be64_to_cpu(in_grh->dgid.global.subnet_prefix), - be64_to_cpu(in_grh->dgid.global.interface_id)); - } - } - slid = in_wc ? ib_lid_cpu16(in_wc->slid) : be16_to_cpu(IB_LID_PERMISSIVE); if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { @@ -1299,6 +1297,18 @@ static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg) spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); } +static void mlx4_ib_wire_comp_handler(struct ib_cq *cq, void *arg) +{ + unsigned long flags; + struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context; + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE) + queue_work(ctx->wi_wq, &ctx->work); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, struct mlx4_ib_demux_pv_qp *tun_qp, int index) @@ -1341,14 +1351,6 @@ static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port, return ret; } -static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) -{ - int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; - - return (qpn >= proxy_start && qpn <= proxy_start + 1); -} - - int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr, @@ -1401,10 +1403,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_lock(&sqp->tx_lock); if (sqp->tx_ix_head - sqp->tx_ix_tail >= - (MLX4_NUM_TUNNEL_BUFS - 1)) + (MLX4_NUM_WIRE_BUFS - 1)) ret = -EAGAIN; else - wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); + wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_WIRE_BUFS - 1); spin_unlock(&sqp->tx_lock); if (ret) goto out; @@ -1484,6 +1486,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc u16 vlan_id; u8 qos; u8 *dmac; + int sts; /* Get slave that sent this packet */ if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || @@ -1580,13 +1583,17 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc &vlan_id, &qos)) rdma_ah_set_sl(&ah_attr, qos); - mlx4_ib_send_to_wire(dev, slave, ctx->port, - is_proxy_qp0(dev, wc->src_qp, slave) ? - IB_QPT_SMI : IB_QPT_GSI, - be16_to_cpu(tunnel->hdr.pkey_index), - be32_to_cpu(tunnel->hdr.remote_qpn), - be32_to_cpu(tunnel->hdr.qkey), - &ah_attr, wc->smac, vlan_id, &tunnel->mad); + sts = mlx4_ib_send_to_wire(dev, slave, ctx->port, + is_proxy_qp0(dev, wc->src_qp, slave) ? + IB_QPT_SMI : IB_QPT_GSI, + be16_to_cpu(tunnel->hdr.pkey_index), + be32_to_cpu(tunnel->hdr.remote_qpn), + be32_to_cpu(tunnel->hdr.qkey), + &ah_attr, wc->smac, vlan_id, &tunnel->mad); + if (sts) + pr_debug("failed sending %s to wire on behalf of slave %d (%d)\n", + is_proxy_qp0(dev, wc->src_qp, slave) ? "SMI" : "GSI", + slave, sts); } static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, @@ -1595,19 +1602,20 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, int i; struct mlx4_ib_demux_pv_qp *tun_qp; int rx_buf_size, tx_buf_size; + const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (qp_type > IB_QPT_GSI) return -EINVAL; tun_qp = &ctx->qp[qp_type]; - tun_qp->ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, + tun_qp->ring = kcalloc(nmbr_bufs, sizeof(struct mlx4_ib_buf), GFP_KERNEL); if (!tun_qp->ring) return -ENOMEM; - tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, + tun_qp->tx_ring = kcalloc(nmbr_bufs, sizeof (struct mlx4_ib_tun_tx_buf), GFP_KERNEL); if (!tun_qp->tx_ring) { @@ -1624,7 +1632,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, tx_buf_size = sizeof (struct mlx4_mad_snd_buf); } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL); if (!tun_qp->ring[i].addr) goto err; @@ -1638,7 +1646,7 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, } } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { tun_qp->tx_ring[i].buf.addr = kmalloc(tx_buf_size, GFP_KERNEL); if (!tun_qp->tx_ring[i].buf.addr) @@ -1669,7 +1677,7 @@ tx_err: tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); } - i = MLX4_NUM_TUNNEL_BUFS; + i = nmbr_bufs; err: while (i > 0) { --i; @@ -1690,6 +1698,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, int i; struct mlx4_ib_demux_pv_qp *tun_qp; int rx_buf_size, tx_buf_size; + const int nmbr_bufs = is_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (qp_type > IB_QPT_GSI) return; @@ -1704,13 +1713,13 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, rx_buf_size, DMA_FROM_DEVICE); kfree(tun_qp->ring[i].addr); } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); @@ -1744,9 +1753,6 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) "buf:%lld\n", wc.wr_id); break; case IB_WC_SEND: - pr_debug("received tunnel send completion:" - "wrid=0x%llx, status=0x%x\n", - wc.wr_id, wc.status); rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah @@ -1793,6 +1799,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, struct mlx4_ib_qp_tunnel_init_attr qp_init_attr; struct ib_qp_attr attr; int qp_attr_mask_INIT; + const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (qp_type > IB_QPT_GSI) return -EINVAL; @@ -1803,8 +1810,8 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, qp_init_attr.init_attr.send_cq = ctx->cq; qp_init_attr.init_attr.recv_cq = ctx->cq; qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS; - qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS; + qp_init_attr.init_attr.cap.max_send_wr = nmbr_bufs; + qp_init_attr.init_attr.cap.max_recv_wr = nmbr_bufs; qp_init_attr.init_attr.cap.max_send_sge = 1; qp_init_attr.init_attr.cap.max_recv_sge = 1; if (create_tun) { @@ -1866,7 +1873,7 @@ static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, goto err_qp; } - for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + for (i = 0; i < nmbr_bufs; i++) { ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i); if (ret) { pr_err(" mlx4_ib_post_pv_buf error" @@ -1902,8 +1909,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) switch (wc.opcode) { case IB_WC_SEND: kfree(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); - sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah + (MLX4_NUM_WIRE_BUFS - 1)].ah); + sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); sqp->tx_ix_tail++; @@ -1912,13 +1919,13 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) case IB_WC_RECV: mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *) (sqp->ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload); + (MLX4_NUM_WIRE_BUFS - 1)].addr))->payload); grh = &(((struct mlx4_mad_rcv_buf *) (sqp->ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh); + (MLX4_NUM_WIRE_BUFS - 1)].addr))->grh); mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad); if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1))) + (MLX4_NUM_WIRE_BUFS - 1))) pr_err("Failed reposting SQP " "buf:%lld\n", wc.wr_id); break; @@ -1931,8 +1938,8 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { kfree(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); - sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah + (MLX4_NUM_WIRE_BUFS - 1)].ah); + sqp->tx_ring[wc.wr_id & (MLX4_NUM_WIRE_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); sqp->tx_ix_tail++; @@ -1972,6 +1979,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, { int ret, cq_size; struct ib_cq_init_attr cq_attr = {}; + const int nmbr_bufs = create_tun ? MLX4_NUM_TUNNEL_BUFS : MLX4_NUM_WIRE_BUFS; if (ctx->state != DEMUX_PV_STATE_DOWN) return -EEXIST; @@ -1996,12 +2004,13 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, goto err_out_qp0; } - cq_size = 2 * MLX4_NUM_TUNNEL_BUFS; + cq_size = 2 * nmbr_bufs; if (ctx->has_smi) cq_size *= 2; cq_attr.cqe = cq_size; - ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, + ctx->cq = ib_create_cq(ctx->ib_dev, + create_tun ? mlx4_ib_tunnel_comp_handler : mlx4_ib_wire_comp_handler, NULL, ctx, &cq_attr); if (IS_ERR(ctx->cq)) { ret = PTR_ERR(ctx->cq); @@ -2038,6 +2047,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker); ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq; + ctx->wi_wq = to_mdev(ibdev)->sriov.demux[port - 1].wi_wq; ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); if (ret) { @@ -2181,7 +2191,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, goto err_mcg; } - snprintf(name, sizeof name, "mlx4_ibt%d", port); + snprintf(name, sizeof(name), "mlx4_ibt%d", port); ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->wq) { pr_err("Failed to create tunnelling WQ for port %d\n", port); @@ -2189,7 +2199,15 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, goto err_wq; } - snprintf(name, sizeof name, "mlx4_ibud%d", port); + snprintf(name, sizeof(name), "mlx4_ibwi%d", port); + ctx->wi_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); + if (!ctx->wi_wq) { + pr_err("Failed to create wire WQ for port %d\n", port); + ret = -ENOMEM; + goto err_wiwq; + } + + snprintf(name, sizeof(name), "mlx4_ibud%d", port); ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->ud_wq) { pr_err("Failed to create up/down WQ for port %d\n", port); @@ -2200,6 +2218,10 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, return 0; err_udwq: + destroy_workqueue(ctx->wi_wq); + ctx->wi_wq = NULL; + +err_wiwq: destroy_workqueue(ctx->wq); ctx->wq = NULL; @@ -2247,12 +2269,14 @@ static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx) ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING; } flush_workqueue(ctx->wq); + flush_workqueue(ctx->wi_wq); for (i = 0; i < dev->dev->caps.sqp_demux; i++) { destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0); free_pv_object(dev, i, ctx->port); } kfree(ctx->tun); destroy_workqueue(ctx->ud_wq); + destroy_workqueue(ctx->wi_wq); destroy_workqueue(ctx->wq); } } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index bd4f975e7f9a..cd0fba6b0964 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1215,9 +1215,10 @@ static int mlx4_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -static void mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +static int mlx4_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); + return 0; } static int mlx4_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) @@ -1256,11 +1257,12 @@ err2: return err; } -static void mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { ib_destroy_cq(to_mxrcd(xrcd)->cq); ib_dealloc_pd(to_mxrcd(xrcd)->pd); mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); + return 0; } static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) @@ -1533,23 +1535,11 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att struct mlx4_net_trans_rule_hw_ctrl *ctrl; int default_flow; - static const u16 __mlx4_domain[] = { - [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS, - [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL, - [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS, - [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC, - }; - if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) { pr_err("Invalid priority value %d\n", flow_attr->priority); return -EINVAL; } - if (domain >= IB_FLOW_DOMAIN_NUM) { - pr_err("Invalid domain value %d\n", domain); - return -EINVAL; - } - if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0) return -EINVAL; @@ -1558,8 +1548,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att return PTR_ERR(mailbox); ctrl = mailbox->buf; - ctrl->prio = cpu_to_be16(__mlx4_domain[domain] | - flow_attr->priority); + ctrl->prio = cpu_to_be16(domain | flow_attr->priority); ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type); ctrl->port = flow_attr->port; ctrl->qpn = cpu_to_be32(qp->qp_num); @@ -1701,8 +1690,8 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev, } static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain, struct ib_udata *udata) + struct ib_flow_attr *flow_attr, + struct ib_udata *udata) { int err = 0, i = 0, j = 0; struct mlx4_ib_flow *mflow; @@ -1768,8 +1757,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, } while (i < ARRAY_SIZE(type) && type[i]) { - err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], - &mflow->reg_id[i].id); + err = __mlx4_ib_create_flow(qp, flow_attr, MLX4_DOMAIN_UVERBS, + type[i], &mflow->reg_id[i].id); if (err) goto err_create_flow; if (is_bonded) { @@ -1778,7 +1767,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, */ flow_attr->port = 2; err = __mlx4_ib_create_flow(qp, flow_attr, - domain, type[j], + MLX4_DOMAIN_UVERBS, type[j], &mflow->reg_id[j].mirror); flow_attr->port = 1; if (err) @@ -2589,11 +2578,16 @@ static const struct ib_device_ops mlx4_ib_dev_wq_ops = { .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table, .destroy_wq = mlx4_ib_destroy_wq, .modify_wq = mlx4_ib_modify_wq, + + INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx4_ib_rwq_ind_table, + ib_rwq_ind_tbl), }; static const struct ib_device_ops mlx4_ib_dev_mw_ops = { .alloc_mw = mlx4_ib_alloc_mw, .dealloc_mw = mlx4_ib_dealloc_mw, + + INIT_RDMA_OBJ_SIZE(ib_mw, mlx4_ib_mw, ibmw), }; static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { @@ -2847,7 +2841,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_steer_free_bitmap; rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group); - if (ib_register_device(&ibdev->ib_dev, "mlx4_%d")) + if (ib_register_device(&ibdev->ib_dev, "mlx4_%d", + &dev->persist->pdev->dev)) goto err_diag_counters; if (mlx4_ib_mad_init(ibdev)) @@ -2989,10 +2984,8 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, /* Add an empty rule for IB L2 */ memset(&ib_spec->mask, 0, sizeof(ib_spec->mask)); - err = __mlx4_ib_create_flow(&mqp->ibqp, flow, - IB_FLOW_DOMAIN_NIC, - MLX4_FS_REGULAR, - &mqp->reg_id); + err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC, + MLX4_FS_REGULAR, &mqp->reg_id); } else { err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 38e87a700a2a..58df06492d69 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -233,7 +233,8 @@ enum mlx4_ib_mad_ifc_flags { }; enum { - MLX4_NUM_TUNNEL_BUFS = 256, + MLX4_NUM_TUNNEL_BUFS = 512, + MLX4_NUM_WIRE_BUFS = 2048, }; struct mlx4_ib_tunnel_header { @@ -298,6 +299,26 @@ struct mlx4_ib_rss { u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; }; +enum { + /* + * Largest possible UD header: send with GRH and immediate + * data plus 18 bytes for an Ethernet header with VLAN/802.1Q + * tag. (LRH would only use 8 bytes, so Ethernet is the + * biggest case) + */ + MLX4_IB_UD_HEADER_SIZE = 82, + MLX4_IB_LSO_HEADER_SPARE = 128, +}; + +struct mlx4_ib_sqp { + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; + struct ib_qp *roce_v2_gsi; +}; + struct mlx4_ib_qp { union { struct ib_qp ibqp; @@ -343,7 +364,10 @@ struct mlx4_ib_qp { struct mlx4_wqn_range *wqn_range; /* Number of RSS QP parents that uses this WQ */ u32 rss_usecnt; - struct mlx4_ib_rss *rss_ctx; + union { + struct mlx4_ib_rss *rss_ctx; + struct mlx4_ib_sqp *sqp; + }; }; struct mlx4_ib_srq { @@ -366,6 +390,10 @@ struct mlx4_ib_ah { union mlx4_ext_av av; }; +struct mlx4_ib_rwq_ind_table { + struct ib_rwq_ind_table ib_rwq_ind_tbl; +}; + /****************************************/ /* alias guid support */ /****************************************/ @@ -454,6 +482,7 @@ struct mlx4_ib_demux_pv_ctx { struct ib_pd *pd; struct work_struct work; struct workqueue_struct *wq; + struct workqueue_struct *wi_wq; struct mlx4_ib_demux_pv_qp qp[2]; }; @@ -461,6 +490,7 @@ struct mlx4_ib_demux_ctx { struct ib_device *ib_dev; int port; struct workqueue_struct *wq; + struct workqueue_struct *wi_wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; atomic64_t subnet_prefix; @@ -492,6 +522,7 @@ struct mlx4_ib_sriov { spinlock_t id_map_lock; struct rb_root sl_id_map; struct list_head cm_list; + struct xarray xa_rej_tmout; }; struct gid_cache_context { @@ -725,8 +756,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); int mlx4_ib_dereg_mr(struct ib_mr *mr, struct ib_udata *udata); -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata); +int mlx4_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx4_ib_dealloc_mw(struct ib_mw *mw); struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, u32 max_num_sg); @@ -736,7 +766,7 @@ int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata); int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); @@ -747,14 +777,17 @@ int mlx4_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, int mlx4_ib_create_ah_slave(struct ib_ah *ah, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -void mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); +static inline int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return 0; +} int mlx4_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index); int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); @@ -890,15 +923,18 @@ void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -void mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); +int mlx4_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx4_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); -struct ib_rwq_ind_table -*mlx4_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata); -int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); +int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); +static inline int +mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table) +{ + return 0; +} int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, int *num_of_mtts); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 1d5ef0de12c9..426fed005d53 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -271,6 +271,8 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, u64 total_len = 0; int i; + *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE); + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { /* * Initialization - save the first chunk start as the @@ -421,7 +423,6 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_free; } - n = ib_umem_page_count(mr->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, @@ -511,7 +512,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, mmr->umem = NULL; goto release_mpt_entry; } - n = ib_umem_page_count(mmr->umem); + n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE); shift = PAGE_SHIFT; err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, @@ -610,37 +611,27 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) return 0; } -struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) +int mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { - struct mlx4_ib_dev *dev = to_mdev(pd->device); - struct mlx4_ib_mw *mw; + struct mlx4_ib_dev *dev = to_mdev(ibmw->device); + struct mlx4_ib_mw *mw = to_mmw(ibmw); int err; - mw = kmalloc(sizeof(*mw), GFP_KERNEL); - if (!mw) - return ERR_PTR(-ENOMEM); - - err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, - to_mlx4_type(type), &mw->mmw); + err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn, + to_mlx4_type(ibmw->type), &mw->mmw); if (err) - goto err_free; + return err; err = mlx4_mw_enable(dev->dev, &mw->mmw); if (err) goto err_mw; - mw->ibmw.rkey = mw->mmw.key; - - return &mw->ibmw; + ibmw->rkey = mw->mmw.key; + return 0; err_mw: mlx4_mw_free(dev->dev, &mw->mmw); - -err_free: - kfree(mw); - - return ERR_PTR(err); + return err; } int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) @@ -648,8 +639,6 @@ int mlx4_ib_dealloc_mw(struct ib_mw *ibmw) struct mlx4_ib_mw *mw = to_mmw(ibmw); mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); - kfree(mw); - return 0; } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 2975f350b9fd..5cb8e602294c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -67,27 +67,6 @@ enum { MLX4_IB_LINK_TYPE_ETH = 1 }; -enum { - /* - * Largest possible UD header: send with GRH and immediate - * data plus 18 bytes for an Ethernet header with VLAN/802.1Q - * tag. (LRH would only use 8 bytes, so Ethernet is the - * biggest case) - */ - MLX4_IB_UD_HEADER_SIZE = 82, - MLX4_IB_LSO_HEADER_SPARE = 128, -}; - -struct mlx4_ib_sqp { - struct mlx4_ib_qp qp; - int pkey_index; - u32 qkey; - u32 send_psn; - struct ib_ud_header ud_header; - u8 header_buf[MLX4_IB_UD_HEADER_SIZE]; - struct ib_qp *roce_v2_gsi; -}; - enum { MLX4_IB_MIN_SQ_STRIDE = 6, MLX4_IB_CACHE_LINE_SIZE = 64, @@ -123,11 +102,6 @@ enum mlx4_ib_source_type { MLX4_IB_RWQ_SRC = 1, }; -static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) -{ - return container_of(mqp, struct mlx4_ib_sqp, qp); -} - static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { if (!mlx4_is_master(dev->dev)) @@ -656,8 +630,6 @@ static int create_qp_rss(struct mlx4_ib_dev *dev, if (err) goto err_qpn; - mutex_init(&qp->mutex); - INIT_LIST_HEAD(&qp->gid_list); INIT_LIST_HEAD(&qp->steering_rules); @@ -696,80 +668,72 @@ err_qpn: return err; } -static struct ib_qp *_mlx4_ib_create_qp_rss(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static int _mlx4_ib_create_qp_rss(struct ib_pd *pd, struct mlx4_ib_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx4_ib_qp *qp; struct mlx4_ib_create_qp_rss ucmd = {}; size_t required_cmd_sz; int err; if (!udata) { pr_debug("RSS QP with NULL udata\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (udata->outlen) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; required_cmd_sz = offsetof(typeof(ucmd), reserved1) + sizeof(ucmd.reserved1); if (udata->inlen < required_cmd_sz) { pr_debug("invalid inlen\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen))) { pr_debug("copy failed\n"); - return ERR_PTR(-EFAULT); + return -EFAULT; } if (memchr_inv(ucmd.reserved, 0, sizeof(ucmd.reserved))) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (ucmd.comp_mask || ucmd.reserved1) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (udata->inlen > sizeof(ucmd) && !ib_is_udata_cleared(udata, sizeof(ucmd), udata->inlen - sizeof(ucmd))) { pr_debug("inlen is not supported\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->qp_type != IB_QPT_RAW_PACKET) { pr_debug("RSS QP with unsupported QP type %d\n", init_attr->qp_type); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->create_flags) { pr_debug("RSS QP doesn't support create flags\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } if (init_attr->send_cq || init_attr->cap.max_send_wr) { pr_debug("RSS QP with unsupported send attributes\n"); - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); - qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; err = create_qp_rss(to_mdev(pd->device), init_attr, &ucmd, qp); - if (err) { - kfree(qp); - return ERR_PTR(err); - } + if (err) + return err; qp->ibqp.qp_num = qp->mqp.qpn; - - return &qp->ibqp; + return 0; } /* @@ -873,7 +837,6 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET; - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); @@ -922,7 +885,6 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, goto err; } - n = ib_umem_page_count(qp->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); @@ -989,13 +951,11 @@ err: static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, - struct mlx4_ib_qp **caller_qp) + struct mlx4_ib_qp *qp) { struct mlx4_ib_dev *dev = to_mdev(pd->device); int qpn; int err; - struct mlx4_ib_sqp *sqp = NULL; - struct mlx4_ib_qp *qp; struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( udata, struct mlx4_ib_ucontext, ibucontext); enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; @@ -1043,27 +1003,18 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, sqpn = qpn; } - if (!*caller_qp) { - if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || - (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { - sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL); - if (!sqp) - return -ENOMEM; - qp = &sqp->qp; - } else { - qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL); - if (!qp) - return -ENOMEM; - } - qp->pri.vid = 0xFFFF; - qp->alt.vid = 0xFFFF; - } else - qp = *caller_qp; + if (init_attr->qp_type == IB_QPT_SMI || + init_attr->qp_type == IB_QPT_GSI || qp_type == MLX4_IB_QPT_SMI || + qp_type == MLX4_IB_QPT_GSI || + (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { + qp->sqp = kzalloc(sizeof(struct mlx4_ib_sqp), GFP_KERNEL); + if (!qp->sqp) + return -ENOMEM; + } qp->mlx4_ib_qp_type = qp_type; - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); INIT_LIST_HEAD(&qp->gid_list); @@ -1117,7 +1068,6 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, goto err; } - n = ib_umem_page_count(qp->umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); @@ -1239,9 +1189,6 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, qp->mqp.event = mlx4_ib_qp_event; - if (!*caller_qp) - *caller_qp = qp; - spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq)); @@ -1293,10 +1240,7 @@ err_db: mlx4_db_free(dev->dev, &qp->db); err: - if (!sqp && !*caller_qp) - kfree(qp); - kfree(sqp); - + kfree(qp->sqp); return err; } @@ -1410,7 +1354,6 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) mlx4_qp_free(dev->dev, &qp->mqp); mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); del_gid_entries(qp); - kfree(qp->rss_ctx); } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, @@ -1529,17 +1472,16 @@ static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) return dev->dev->caps.spec_qps[attr->port_num - 1].qp1_proxy; } -static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr, - struct ib_udata *udata) +static int _mlx4_ib_create_qp(struct ib_pd *pd, struct mlx4_ib_qp *qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx4_ib_qp *qp = NULL; int err; int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK; u16 xrcdn = 0; if (init_attr->rwq_ind_tbl) - return _mlx4_ib_create_qp_rss(pd, init_attr, udata); + return _mlx4_ib_create_qp_rss(pd, qp, init_attr, udata); /* * We only support LSO, vendor flag1, and multicast loopback blocking, @@ -1551,16 +1493,16 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, MLX4_IB_SRIOV_SQP | MLX4_IB_QP_NETIF | MLX4_IB_QP_CREATE_ROCE_V2_GSI)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) { if (init_attr->qp_type != IB_QPT_UD) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (init_attr->create_flags) { if (udata && init_attr->create_flags & ~(sup_u_create_flags)) - return ERR_PTR(-EINVAL); + return -EINVAL; if ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_ROCE_V2_GSI | @@ -1570,7 +1512,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, init_attr->qp_type > IB_QPT_GSI) || (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI && init_attr->qp_type != IB_QPT_GSI)) - return ERR_PTR(-EINVAL); + return -EINVAL; } switch (init_attr->qp_type) { @@ -1581,53 +1523,43 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, fallthrough; case IB_QPT_XRC_INI: if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) - return ERR_PTR(-ENOSYS); + return -ENOSYS; init_attr->recv_cq = init_attr->send_cq; fallthrough; case IB_QPT_RC: case IB_QPT_UC: case IB_QPT_RAW_PACKET: - qp = kzalloc(sizeof(*qp), GFP_KERNEL); - if (!qp) - return ERR_PTR(-ENOMEM); + case IB_QPT_UD: qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; - fallthrough; - case IB_QPT_UD: - { - err = create_qp_common(pd, init_attr, udata, 0, &qp); - if (err) { - kfree(qp); - return ERR_PTR(err); - } + err = create_qp_common(pd, init_attr, udata, 0, qp); + if (err) + return err; qp->ibqp.qp_num = qp->mqp.qpn; qp->xrcdn = xrcdn; - break; - } case IB_QPT_SMI: case IB_QPT_GSI: { int sqpn; - /* Userspace is not allowed to create special QPs: */ - if (udata) - return ERR_PTR(-EINVAL); if (init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI) { int res = mlx4_qp_reserve_range(to_mdev(pd->device)->dev, 1, 1, &sqpn, 0, MLX4_RES_USAGE_DRIVER); if (res) - return ERR_PTR(res); + return res; } else { sqpn = get_sqp_num(to_mdev(pd->device), init_attr); } - err = create_qp_common(pd, init_attr, udata, sqpn, &qp); + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; + err = create_qp_common(pd, init_attr, udata, sqpn, qp); if (err) - return ERR_PTR(err); + return err; qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : @@ -1636,25 +1568,33 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, } default: /* Don't support raw QPs */ - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; } - - return &qp->ibqp; + return 0; } struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { struct ib_device *device = pd ? pd->device : init_attr->xrcd->device; - struct ib_qp *ibqp; struct mlx4_ib_dev *dev = to_mdev(device); + struct mlx4_ib_qp *qp; + int ret; - ibqp = _mlx4_ib_create_qp(pd, init_attr, udata); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); - if (!IS_ERR(ibqp) && - (init_attr->qp_type == IB_QPT_GSI) && + mutex_init(&qp->mutex); + ret = _mlx4_ib_create_qp(pd, qp, init_attr, udata); + if (ret) { + kfree(qp); + return ERR_PTR(ret); + } + + if (init_attr->qp_type == IB_QPT_GSI && !(init_attr->create_flags & MLX4_IB_QP_CREATE_ROCE_V2_GSI)) { - struct mlx4_ib_sqp *sqp = to_msqp((to_mqp(ibqp))); + struct mlx4_ib_sqp *sqp = qp->sqp; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, init_attr->port_num); if (is_eth && @@ -1666,14 +1606,14 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, pr_err("Failed to create GSI QP for RoCEv2 (%ld)\n", PTR_ERR(sqp->roce_v2_gsi)); sqp->roce_v2_gsi = NULL; } else { - sqp = to_msqp(to_mqp(sqp->roce_v2_gsi)); - sqp->qp.flags |= MLX4_IB_ROCE_V2_GSI_QP; + to_mqp(sqp->roce_v2_gsi)->flags |= + MLX4_IB_ROCE_V2_GSI_QP; } init_attr->create_flags &= ~MLX4_IB_QP_CREATE_ROCE_V2_GSI; } } - return ibqp; + return &qp->ibqp; } static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) @@ -1700,10 +1640,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, udata); } - if (is_sqp(dev, mqp)) - kfree(to_msqp(mqp)); - else - kfree(mqp); + kfree(mqp->sqp); + kfree(mqp); return 0; } @@ -1713,7 +1651,7 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) struct mlx4_ib_qp *mqp = to_mqp(qp); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { - struct mlx4_ib_sqp *sqp = to_msqp(mqp); + struct mlx4_ib_sqp *sqp = mqp->sqp; if (sqp->roce_v2_gsi) ib_destroy_qp(sqp->roce_v2_gsi); @@ -2575,7 +2513,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type, qp->alt_port = attr->alt_port_num; if (is_sqp(dev, qp)) - store_sqp_attrs(to_msqp(qp), attr, attr_mask); + store_sqp_attrs(qp->sqp, attr, attr_mask); /* * If we moved QP0 to RTR, bring the IB link up; if we moved @@ -2852,7 +2790,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, ret = _mlx4_ib_modify_qp(ibqp, attr, attr_mask, udata); if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { - struct mlx4_ib_sqp *sqp = to_msqp(mqp); + struct mlx4_ib_sqp *sqp = mqp->sqp; int err = 0; if (sqp->roce_v2_gsi) @@ -2877,12 +2815,13 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey) return -EINVAL; } -static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, +static int build_sriov_qp0_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { - struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); - struct ib_device *ib_dev = &mdev->ib_dev; + struct mlx4_ib_dev *mdev = to_mdev(qp->ibqp.device); + struct mlx4_ib_sqp *sqp = qp->sqp; + struct ib_device *ib_dev = qp->ibqp.device; struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; struct mlx4_ib_ah *ah = to_mah(wr->ah); @@ -2904,12 +2843,12 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, /* for proxy-qp0 sends, need to add in size of tunnel header */ /* for tunnel-qp0 sends, tunnel header is already in s/g list */ - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) send_size += sizeof (struct mlx4_ib_tunnel_header); ib_ud_header_init(send_size, 1, 0, 0, 0, 0, 0, 0, &sqp->ud_header); - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { sqp->ud_header.lrh.service_level = be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; sqp->ud_header.lrh.destination_lid = @@ -2926,26 +2865,26 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, sqp->ud_header.lrh.virtual_lane = 0; sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - err = ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); + err = ib_get_cached_pkey(ib_dev, qp->port, 0, &pkey); if (err) return err; sqp->ud_header.bth.pkey = cpu_to_be16(pkey); - if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); else sqp->ud_header.bth.destination_qpn = - cpu_to_be32(mdev->dev->caps.spec_qps[sqp->qp.port - 1].qp0_tunnel); + cpu_to_be32(mdev->dev->caps.spec_qps[qp->port - 1].qp0_tunnel); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); if (mlx4_is_master(mdev->dev)) { - if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + if (mlx4_get_parav_qkey(mdev->dev, qp->mqp.qpn, &qkey)) return -EINVAL; } else { - if (vf_get_qp0_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + if (vf_get_qp0_qkey(mdev->dev, qp->mqp.qpn, &qkey)) return -EINVAL; } sqp->ud_header.deth.qkey = cpu_to_be32(qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); + sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->mqp.qpn); sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; sqp->ud_header.immediate_present = 0; @@ -3029,10 +2968,11 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num, } #define MLX4_ROCEV2_QP1_SPORT 0xC000 -static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, +static int build_mlx_header(struct mlx4_ib_qp *qp, const struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) { - struct ib_device *ib_dev = sqp->qp.ibqp.device; + struct mlx4_ib_sqp *sqp = qp->sqp; + struct ib_device *ib_dev = qp->ibqp.device; struct mlx4_ib_dev *ibdev = to_mdev(ib_dev); struct mlx4_wqe_mlx_seg *mlx = wqe; struct mlx4_wqe_ctrl_seg *ctrl = wqe; @@ -3056,7 +2996,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, for (i = 0; i < wr->wr.num_sge; ++i) send_size += wr->wr.sg_list[i].length; - is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; + is_eth = rdma_port_get_link_layer(qp->ibqp.device, qp->port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); if (is_eth) { enum ib_gid_type gid_type; @@ -3070,9 +3010,9 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, if (err) return err; } else { - err = fill_gid_by_hw_index(ibdev, sqp->qp.port, - ah->av.ib.gid_index, - &sgid, &gid_type); + err = fill_gid_by_hw_index(ibdev, qp->port, + ah->av.ib.gid_index, &sgid, + &gid_type); if (!err) { is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP; if (is_udp) { @@ -3117,13 +3057,18 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, * indexes don't necessarily match the hw ones, so * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. - demux[sqp->qp.port - 1]. - subnet_prefix))); - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; + sqp->ud_header.grh.source_gid.global + .subnet_prefix = + cpu_to_be64(atomic64_read( + &(to_mdev(ib_dev) + ->sriov + .demux[qp->port - 1] + .subnet_prefix))); + sqp->ud_header.grh.source_gid.global + .interface_id = + to_mdev(ib_dev) + ->sriov.demux[qp->port - 1] + .guid_cache[ah->av.ib.gid_index]; } else { sqp->ud_header.grh.source_gid = ah->ibah.sgid_attr->gid; @@ -3155,10 +3100,13 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); if (!is_eth) { - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | - (sqp->ud_header.lrh.destination_lid == - IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | - (sqp->ud_header.lrh.service_level << 8)); + mlx->flags |= + cpu_to_be32((!qp->ibqp.qp_num ? MLX4_WQE_MLX_VL15 : 0) | + (sqp->ud_header.lrh.destination_lid == + IB_LID_PERMISSIVE ? + MLX4_WQE_MLX_SLR : + 0) | + (sqp->ud_header.lrh.service_level << 8)); if (ah->av.ib.port_pd & cpu_to_be32(0x80000000)) mlx->flags |= cpu_to_be32(0x1); /* force loopback */ mlx->rlid = sqp->ud_header.lrh.destination_lid; @@ -3204,21 +3152,23 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : - sl_to_vl(to_mdev(ib_dev), - sqp->ud_header.lrh.service_level, - sqp->qp.port); - if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) + sqp->ud_header.lrh.virtual_lane = + !qp->ibqp.qp_num ? + 15 : + sl_to_vl(to_mdev(ib_dev), + sqp->ud_header.lrh.service_level, + qp->port); + if (qp->ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) return -EINVAL; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - if (!sqp->qp.ibqp.qp_num) - err = ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, + if (!qp->ibqp.qp_num) + err = ib_get_cached_pkey(ib_dev, qp->port, sqp->pkey_index, &pkey); else - err = ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, + err = ib_get_cached_pkey(ib_dev, qp->port, wr->pkey_index, &pkey); if (err) return err; @@ -3228,7 +3178,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, const struct ib_ud_wr *wr, sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? sqp->qkey : wr->remote_qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); @@ -3551,14 +3501,14 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI) { - struct mlx4_ib_sqp *sqp = to_msqp(qp); + struct mlx4_ib_sqp *sqp = qp->sqp; if (sqp->roce_v2_gsi) { struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah); enum ib_gid_type gid_type; union ib_gid gid; - if (!fill_gid_by_hw_index(mdev, sqp->qp.port, + if (!fill_gid_by_hw_index(mdev, qp->port, ah->av.ib.gid_index, &gid, &gid_type)) qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ? @@ -3678,8 +3628,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX4_IB_QPT_TUN_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), - ctrl, &seglen); + err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl, + &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -3715,8 +3665,8 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX4_IB_QPT_PROXY_SMI_OWNER: - err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr), - ctrl, &seglen); + err = build_sriov_qp0_header(qp, ud_wr(wr), ctrl, + &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -3749,8 +3699,7 @@ static int _mlx4_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: - err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl, - &seglen); + err = build_mlx_header(qp, ud_wr(wr), ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; goto out; @@ -4172,6 +4121,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); + mutex_init(&qp->mutex); qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; @@ -4327,7 +4277,7 @@ int mlx4_ib_modify_wq(struct ib_wq *ibwq, struct ib_wq_attr *wq_attr, return err; } -void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) +int mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(ibwq->device); struct mlx4_ib_qp *qp = to_mqp((struct ib_qp *)ibwq); @@ -4338,36 +4288,35 @@ void mlx4_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) destroy_qp_common(dev, qp, MLX4_IB_RWQ_SRC, udata); kfree(qp); + return 0; } -struct ib_rwq_ind_table -*mlx4_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata) +int mlx4_ib_create_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) { - struct ib_rwq_ind_table *rwq_ind_table; struct mlx4_ib_create_rwq_ind_tbl_resp resp = {}; unsigned int ind_tbl_size = 1 << init_attr->log_ind_tbl_size; + struct ib_device *device = rwq_ind_table->device; unsigned int base_wqn; size_t min_resp_len; - int i; - int err; + int i, err = 0; if (udata->inlen > 0 && !ib_is_udata_cleared(udata, 0, udata->inlen)) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); if (udata->outlen && udata->outlen < min_resp_len) - return ERR_PTR(-EINVAL); + return -EINVAL; if (ind_tbl_size > device->attrs.rss_caps.max_rwq_indirection_table_size) { pr_debug("log_ind_tbl_size = %d is bigger than supported = %d\n", ind_tbl_size, device->attrs.rss_caps.max_rwq_indirection_table_size); - return ERR_PTR(-EINVAL); + return -EINVAL; } base_wqn = init_attr->ind_tbl[0]->wq_num; @@ -4375,39 +4324,23 @@ struct ib_rwq_ind_table if (base_wqn % ind_tbl_size) { pr_debug("WQN=0x%x isn't aligned with indirection table size\n", base_wqn); - return ERR_PTR(-EINVAL); + return -EINVAL; } for (i = 1; i < ind_tbl_size; i++) { if (++base_wqn != init_attr->ind_tbl[i]->wq_num) { pr_debug("indirection table's WQNs aren't consecutive\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } } - rwq_ind_table = kzalloc(sizeof(*rwq_ind_table), GFP_KERNEL); - if (!rwq_ind_table) - return ERR_PTR(-ENOMEM); - if (udata->outlen) { resp.response_length = offsetof(typeof(resp), response_length) + sizeof(resp.response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); - if (err) - goto err; } - return rwq_ind_table; - -err: - kfree(rwq_ind_table); - return ERR_PTR(err); -} - -int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) -{ - kfree(ib_rwq_ind_tbl); - return 0; + return err; } struct mlx4_ib_drain_cqe { diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 8f9d5035142d..bf618529e734 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -115,8 +115,9 @@ int mlx4_ib_create_srq(struct ib_srq *ib_srq, if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); - err = mlx4_mtt_init(dev->dev, ib_umem_page_count(srq->umem), - PAGE_SHIFT, &srq->mtt); + err = mlx4_mtt_init( + dev->dev, ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE), + PAGE_SHIFT, &srq->mtt); if (err) goto err_buf; @@ -260,7 +261,7 @@ int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return 0; } -void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +int mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx4_ib_dev *dev = to_mdev(srq->device); struct mlx4_ib_srq *msrq = to_msrq(srq); @@ -282,6 +283,7 @@ void mlx4_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) mlx4_db_free(dev->dev, &msrq->db); } ib_umem_release(msrq->umem); + return 0; } void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 59e5ec39b447..505bc47fd575 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -106,8 +106,8 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, if (ah_type == RDMA_AH_ATTR_TYPE_ROCE && udata) { int err; struct mlx5_ib_create_ah_resp resp = {}; - u32 min_resp_len = offsetof(typeof(resp), dmac) + - sizeof(resp.dmac); + u32 min_resp_len = + offsetofend(struct mlx5_ib_create_ah_resp, dmac); if (udata->outlen < min_resp_len) return -EINVAL; @@ -147,8 +147,3 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } - -void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) -{ - return; -} diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ebb2f108b64f..234f29912ba9 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -168,14 +168,14 @@ void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid) mlx5_cmd_exec_in(dev, destroy_tis, in); } -void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid) +int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid) { u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); MLX5_SET(destroy_rqt_in, in, uid, uid); - mlx5_cmd_exec_in(dev, destroy_rqt, in); + return mlx5_cmd_exec_in(dev, destroy_rqt, in); } int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, @@ -209,14 +209,14 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, mlx5_cmd_exec_in(dev, dealloc_transport_domain, in); } -void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) +int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid) { u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {}; MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); MLX5_SET(dealloc_pd_in, in, pd, pdn); MLX5_SET(dealloc_pd_in, in, uid, uid); - mlx5_cmd_exec_in(dev, dealloc_pd, in); + return mlx5_cmd_exec_in(dev, dealloc_pd, in); } int mlx5_cmd_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 1d192a8ca87d..88ea6ef8f2cb 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -44,10 +44,10 @@ int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point, int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr, u64 length, u32 alignment); void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length); -void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); +int mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid); void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid); void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid); -void mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid); +int mlx5_cmd_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn, u16 uid); int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, u16 uid); void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 145f3cb40ccb..70c8fd67ee2f 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -117,7 +117,7 @@ err_bound: return ret; } -static void mlx5_ib_destroy_counters(struct ib_counters *counters) +static int mlx5_ib_destroy_counters(struct ib_counters *counters) { struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); @@ -125,6 +125,7 @@ static void mlx5_ib_destroy_counters(struct ib_counters *counters) if (mcounters->hw_cntrs_hndl) mlx5_fc_destroy(to_mdev(counters->device)->mdev, mcounters->hw_cntrs_hndl); + return 0; } static int mlx5_ib_create_counters(struct ib_counters *counters, @@ -456,12 +457,12 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); num_counters += ARRAY_SIZE(ext_ppcnt_cnts); } - cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); + cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL); if (!cnts->names) return -ENOMEM; cnts->offsets = kcalloc(num_counters, - sizeof(cnts->offsets), GFP_KERNEL); + sizeof(*cnts->offsets), GFP_KERNEL); if (!cnts->offsets) goto err_names; diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index dceb0eb2bed1..fb62f1d04afa 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -168,7 +168,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, { enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1); struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); - struct mlx5_ib_srq *srq; + struct mlx5_ib_srq *srq = NULL; struct mlx5_ib_wq *wq; u16 wqe_ctr; u8 roce_packet_type; @@ -180,7 +180,8 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, if (qp->ibqp.xrcd) { msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); - srq = to_mibsrq(msrq); + if (msrq) + srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); } @@ -254,7 +255,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, switch (roce_packet_type) { case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH: - wc->network_hdr_type = RDMA_NETWORK_IB; + wc->network_hdr_type = RDMA_NETWORK_ROCE_V1; break; case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6: wc->network_hdr_type = RDMA_NETWORK_IPV6; @@ -1023,16 +1024,21 @@ err_cqb: return err; } -void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); + int ret; + + ret = mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); + if (ret) + return ret; - mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); if (udata) destroy_cq_user(mcq, udata); else destroy_cq_kernel(dev, mcq); + return 0; } static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index e9cfb9a2ef41..492cfe063bca 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -136,12 +136,9 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) #define LAST_COUNTERS_FIELD counters /* Field is the last supported field */ -#define FIELDS_NOT_SUPPORTED(filter, field)\ - memchr_inv((void *)&filter.field +\ - sizeof(filter.field), 0,\ - sizeof(filter) -\ - offsetof(typeof(filter), field) -\ - sizeof(filter.field)) +#define FIELDS_NOT_SUPPORTED(filter, field) \ + memchr_inv((void *)&filter.field + sizeof(filter.field), 0, \ + sizeof(filter) - offsetofend(typeof(filter), field)) int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, bool is_egress, @@ -767,6 +764,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, { bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; struct mlx5_flow_namespace *ns = NULL; + enum mlx5_flow_namespace_type fn_type; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_table *ft; int max_table_size; @@ -780,11 +778,9 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, log_max_ft_size)); esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - enum mlx5_flow_namespace_type fn_type; - - if (flow_is_multicast_only(flow_attr) && - !dont_trap) + switch (flow_attr->type) { + case IB_FLOW_ATTR_NORMAL: + if (flow_is_multicast_only(flow_attr) && !dont_trap) priority = MLX5_IB_FLOW_MCAST_PRIO; else priority = ib_prio_to_core_prio(flow_attr->priority, @@ -797,12 +793,11 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; if (!dev->is_rep && !esw_encap && MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2)) + reformat_l3_tunnel_to_l2)) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; } else { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); + max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_TX( + dev->mdev, log_max_ft_size)); fn_type = MLX5_FLOW_NAMESPACE_EGRESS; prio = &dev->flow_db->egress_prios[priority]; if (!dev->is_rep && !esw_encap && @@ -812,27 +807,31 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, ns = mlx5_get_flow_namespace(dev->mdev, fn_type); num_entries = MLX5_FS_MAX_ENTRIES; num_groups = MLX5_FS_MAX_TYPES; - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { + break; + case IB_FLOW_ATTR_ALL_DEFAULT: + case IB_FLOW_ATTR_MC_DEFAULT: ns = mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_LEFTOVERS); - build_leftovers_ft_param(&priority, - &num_entries, - &num_groups); + build_leftovers_ft_param(&priority, &num_entries, &num_groups); prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + break; + case IB_FLOW_ATTR_SNIFFER: if (!MLX5_CAP_FLOWTABLE(dev->mdev, allow_sniffer_and_nic_rx_shared_tir)) return ERR_PTR(-EOPNOTSUPP); - ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? - MLX5_FLOW_NAMESPACE_SNIFFER_RX : - MLX5_FLOW_NAMESPACE_SNIFFER_TX); + ns = mlx5_get_flow_namespace( + dev->mdev, ft_type == MLX5_IB_FT_RX ? + MLX5_FLOW_NAMESPACE_SNIFFER_RX : + MLX5_FLOW_NAMESPACE_SNIFFER_TX); prio = &dev->flow_db->sniffer[ft_type]; priority = 0; num_entries = 1; num_groups = 1; + break; + default: + break; } if (!ns) @@ -954,7 +953,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, if (!flow_is_multicast_only(flow_attr)) set_underlay_qp(dev, spec, underlay_qpn); - if (dev->is_rep) { + if (dev->is_rep && flow_attr->type != IB_FLOW_ATTR_SNIFFER) { struct mlx5_eswitch_rep *rep; rep = dev->port[flow_attr->port - 1].rep; @@ -1116,6 +1115,7 @@ static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, int err; static const struct ib_flow_attr flow_attr = { .num_of_specs = 0, + .type = IB_FLOW_ATTR_SNIFFER, .size = sizeof(flow_attr) }; @@ -1143,10 +1143,8 @@ err: return ERR_PTR(err); } - static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); @@ -1162,8 +1160,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, int underlay_qpn; if (udata && udata->inlen) { - min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + - sizeof(ucmd_hdr.reserved); + min_ucmd_sz = offsetofend(struct mlx5_ib_create_flow, reserved); if (udata->inlen < min_ucmd_sz) return ERR_PTR(-EOPNOTSUPP); @@ -1197,10 +1194,9 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, goto free_ucmd; } - if (domain != IB_FLOW_DOMAIN_USER || - flow_attr->port > dev->num_ports || - (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | - IB_FLOW_ATTR_FLAGS_EGRESS))) { + if (flow_attr->port > dev->num_ports || + (flow_attr->flags & + ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS))) { err = -EINVAL; goto free_ucmd; } @@ -1245,19 +1241,22 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, dst->tir_num = mqp->raw_packet_qp.rq.tirn; } - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { + switch (flow_attr->type) { + case IB_FLOW_ATTR_NORMAL: underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? mqp->underlay_qpn : 0; handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, underlay_qpn, ucmd); - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - handler = create_leftovers_rule(dev, ft_prio, flow_attr, - dst); - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + break; + case IB_FLOW_ATTR_ALL_DEFAULT: + case IB_FLOW_ATTR_MC_DEFAULT: + handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst); + break; + case IB_FLOW_ATTR_SNIFFER: handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); - } else { + break; + default: err = -EINVAL; goto destroy_ft; } @@ -1305,39 +1304,47 @@ _get_flow_table(struct mlx5_ib_dev *dev, esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); + switch (fs_matcher->ns_type) { + case MLX5_FLOW_NAMESPACE_BYPASS: + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, log_max_ft_size)); if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, reformat_l3_tunnel_to_l2) && !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { + break; + case MLX5_FLOW_NAMESPACE_EGRESS: max_table_size = BIT( MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) + if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && + !esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + break; + case MLX5_FLOW_NAMESPACE_FDB: max_table_size = BIT( MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, + reformat_l3_tunnel_to_l2) && esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; priority = FDB_BYPASS_PATH; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, - log_max_ft_size)); + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX: + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, log_max_ft_size)); priority = fs_matcher->priority; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, - log_max_ft_size)); + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: + max_table_size = BIT( + MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size)); priority = fs_matcher->priority; + break; + default: + break; } max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); @@ -1346,16 +1353,24 @@ _get_flow_table(struct mlx5_ib_dev *dev, if (!ns) return ERR_PTR(-EOPNOTSUPP); - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) + switch (fs_matcher->ns_type) { + case MLX5_FLOW_NAMESPACE_BYPASS: prio = &dev->flow_db->prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + break; + case MLX5_FLOW_NAMESPACE_EGRESS: prio = &dev->flow_db->egress_prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) + break; + case MLX5_FLOW_NAMESPACE_FDB: prio = &dev->flow_db->fdb; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) + break; + case MLX5_FLOW_NAMESPACE_RDMA_RX: prio = &dev->flow_db->rdma_rx[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX: prio = &dev->flow_db->rdma_tx[priority]; + break; + default: return ERR_PTR(-EINVAL); + } if (!prio) return ERR_PTR(-EINVAL); @@ -1488,20 +1503,25 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add( goto unlock; } - if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { + switch (dest_type) { + case MLX5_FLOW_DESTINATION_TYPE_TIR: dst[dst_num].type = dest_type; dst[dst_num++].tir_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; dst[dst_num++].ft_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { + break; + case MLX5_FLOW_DESTINATION_TYPE_PORT: dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; + break; + default: + break; } - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dst[dst_num].counter_id = counter_id; diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 40d418153891..7fcad9135276 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -35,44 +35,19 @@ struct mlx5_ib_gsi_wr { struct ib_cqe cqe; struct ib_wc wc; - int send_flags; bool completed:1; }; -struct mlx5_ib_gsi_qp { - struct ib_qp ibqp; - struct ib_qp *rx_qp; - u8 port_num; - struct ib_qp_cap cap; - enum ib_sig_type sq_sig_type; - /* Serialize qp state modifications */ - struct mutex mutex; - struct ib_cq *cq; - struct mlx5_ib_gsi_wr *outstanding_wrs; - u32 outstanding_pi, outstanding_ci; - int num_qps; - /* Protects access to the tx_qps. Post send operations synchronize - * with tx_qp creation in setup_qp(). Also protects the - * outstanding_wrs array and indices. - */ - spinlock_t lock; - struct ib_qp **tx_qps; -}; - -static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp) -{ - return container_of(qp, struct mlx5_ib_gsi_qp, ibqp); -} - static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev) { return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn); } /* Call with gsi->lock locked */ -static void generate_completions(struct mlx5_ib_gsi_qp *gsi) +static void generate_completions(struct mlx5_ib_qp *mqp) { - struct ib_cq *gsi_cq = gsi->ibqp.send_cq; + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; + struct ib_cq *gsi_cq = mqp->ibqp.send_cq; struct mlx5_ib_gsi_wr *wr; u32 index; @@ -83,10 +58,7 @@ static void generate_completions(struct mlx5_ib_gsi_qp *gsi) if (!wr->completed) break; - if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR || - wr->send_flags & IB_SEND_SIGNALED) - WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); - + WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc)); wr->completed = false; } @@ -98,6 +70,7 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) struct mlx5_ib_gsi_qp *gsi = cq->cq_context; struct mlx5_ib_gsi_wr *wr = container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe); + struct mlx5_ib_qp *mqp = container_of(gsi, struct mlx5_ib_qp, gsi); u64 wr_id; unsigned long flags; @@ -106,19 +79,19 @@ static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc) wr_id = wr->wc.wr_id; wr->wc = *wc; wr->wc.wr_id = wr_id; - wr->wc.qp = &gsi->ibqp; + wr->wc.qp = &mqp->ibqp; - generate_completions(gsi); + generate_completions(mqp); spin_unlock_irqrestore(&gsi->lock, flags); } -struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr) +int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, + struct ib_qp_init_attr *attr) { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_gsi_qp *gsi; - struct ib_qp_init_attr hw_init_attr = *init_attr; - const u8 port_num = init_attr->port_num; + struct ib_qp_init_attr hw_init_attr = *attr; + const u8 port_num = attr->port_num; int num_qps = 0; int ret; @@ -130,26 +103,19 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, num_qps = MLX5_MAX_PORTS; } - gsi = kzalloc(sizeof(*gsi), GFP_KERNEL); - if (!gsi) - return ERR_PTR(-ENOMEM); - + gsi = &mqp->gsi; gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL); - if (!gsi->tx_qps) { - ret = -ENOMEM; - goto err_free; - } + if (!gsi->tx_qps) + return -ENOMEM; - gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr, - sizeof(*gsi->outstanding_wrs), - GFP_KERNEL); + gsi->outstanding_wrs = + kcalloc(attr->cap.max_send_wr, sizeof(*gsi->outstanding_wrs), + GFP_KERNEL); if (!gsi->outstanding_wrs) { ret = -ENOMEM; goto err_free_tx; } - mutex_init(&gsi->mutex); - mutex_lock(&dev->devr.mutex); if (dev->devr.ports[port_num - 1].gsi) { @@ -161,12 +127,10 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, gsi->num_qps = num_qps; spin_lock_init(&gsi->lock); - gsi->cap = init_attr->cap; - gsi->sq_sig_type = init_attr->sq_sig_type; - gsi->ibqp.qp_num = 1; + gsi->cap = attr->cap; gsi->port_num = port_num; - gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0, + gsi->cq = ib_alloc_cq(pd->device, gsi, attr->cap.max_send_wr, 0, IB_POLL_SOFTIRQ); if (IS_ERR(gsi->cq)) { mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n", @@ -182,19 +146,31 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, hw_init_attr.cap.max_send_sge = 0; hw_init_attr.cap.max_inline_data = 0; } - gsi->rx_qp = ib_create_qp(pd, &hw_init_attr); + + gsi->rx_qp = mlx5_ib_create_qp(pd, &hw_init_attr, NULL); if (IS_ERR(gsi->rx_qp)) { mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n", PTR_ERR(gsi->rx_qp)); ret = PTR_ERR(gsi->rx_qp); goto err_destroy_cq; } + gsi->rx_qp->device = pd->device; + gsi->rx_qp->pd = pd; + gsi->rx_qp->real_qp = gsi->rx_qp; - dev->devr.ports[init_attr->port_num - 1].gsi = gsi; + gsi->rx_qp->qp_type = hw_init_attr.qp_type; + gsi->rx_qp->send_cq = hw_init_attr.send_cq; + gsi->rx_qp->recv_cq = hw_init_attr.recv_cq; + gsi->rx_qp->event_handler = hw_init_attr.event_handler; + spin_lock_init(&gsi->rx_qp->mr_lock); + INIT_LIST_HEAD(&gsi->rx_qp->rdma_mrs); + INIT_LIST_HEAD(&gsi->rx_qp->sig_mrs); + + dev->devr.ports[attr->port_num - 1].gsi = gsi; mutex_unlock(&dev->devr.mutex); - return &gsi->ibqp; + return 0; err_destroy_cq: ib_free_cq(gsi->cq); @@ -203,23 +179,19 @@ err_free_wrs: kfree(gsi->outstanding_wrs); err_free_tx: kfree(gsi->tx_qps); -err_free: - kfree(gsi); - return ERR_PTR(ret); + return ret; } -int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) +int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp) { - struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_dev *dev = to_mdev(mqp->ibqp.device); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; const int port_num = gsi->port_num; int qp_index; int ret; - mlx5_ib_dbg(dev, "destroying GSI QP\n"); - mutex_lock(&dev->devr.mutex); - ret = ib_destroy_qp(gsi->rx_qp); + ret = mlx5_ib_destroy_qp(gsi->rx_qp, NULL); if (ret) { mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n", ret); @@ -241,7 +213,7 @@ int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp) kfree(gsi->outstanding_wrs); kfree(gsi->tx_qps); - kfree(gsi); + kfree(mqp); return 0; } @@ -259,7 +231,6 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi) .max_send_sge = gsi->cap.max_send_sge, .max_inline_data = gsi->cap.max_inline_data, }, - .sq_sig_type = gsi->sq_sig_type, .qp_type = IB_QPT_UD, .create_flags = MLX5_IB_QP_CREATE_SQPN_QP1, }; @@ -370,56 +341,54 @@ err_destroy_qp: static void setup_qps(struct mlx5_ib_gsi_qp *gsi) { + struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); u16 qp_index; + mutex_lock(&dev->devr.mutex); for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) setup_qp(gsi, qp_index); + mutex_unlock(&dev->devr.mutex); } int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask) { struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; int ret; mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state); - mutex_lock(&gsi->mutex); ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask); if (ret) { mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret); - goto unlock; + return ret; } if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS) setup_qps(gsi); - -unlock: - mutex_unlock(&gsi->mutex); - - return ret; + return 0; } int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; int ret; - mutex_lock(&gsi->mutex); ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr); qp_init_attr->cap = gsi->cap; - mutex_unlock(&gsi->mutex); - return ret; } /* Call with gsi->lock locked */ -static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, +static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr, struct ib_wc *wc) { + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device); struct mlx5_ib_gsi_wr *gsi_wr; @@ -448,22 +417,21 @@ static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi, } /* Call with gsi->lock locked */ -static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi, - struct ib_ud_wr *wr) +static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_qp *mqp, struct ib_ud_wr *wr) { struct ib_wc wc = { { .wr_id = wr->wr.wr_id }, .status = IB_WC_SUCCESS, .opcode = IB_WC_SEND, - .qp = &gsi->ibqp, + .qp = &mqp->ibqp, }; int ret; - ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc); + ret = mlx5_ib_add_outstanding_wr(mqp, wr, &wc); if (ret) return ret; - generate_completions(gsi); + generate_completions(mqp); return 0; } @@ -490,7 +458,8 @@ static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr) int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; struct ib_qp *tx_qp; unsigned long flags; int ret; @@ -503,14 +472,14 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, const struct ib_send_wr *wr, spin_lock_irqsave(&gsi->lock, flags); tx_qp = get_tx_qp(gsi, &cur_wr); if (!tx_qp) { - ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr); + ret = mlx5_ib_gsi_silent_drop(mqp, &cur_wr); if (ret) goto err; spin_unlock_irqrestore(&gsi->lock, flags); continue; } - ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL); + ret = mlx5_ib_add_outstanding_wr(mqp, &cur_wr, NULL); if (ret) goto err; @@ -534,7 +503,8 @@ err: int mlx5_ib_gsi_post_recv(struct ib_qp *qp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { - struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp); + struct mlx5_ib_qp *mqp = to_mqp(qp); + struct mlx5_ib_gsi_qp *gsi = &mqp->gsi; return ib_post_recv(gsi->rx_qp, wr, bad_wr); } @@ -544,7 +514,5 @@ void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi) if (!gsi) return; - mutex_lock(&gsi->mutex); setup_qps(gsi); - mutex_unlock(&gsi->mutex); } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index d60d63221b14..89e04ca62ae0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -326,8 +326,8 @@ out: spin_unlock(&port->mp.mpi_lock); } -static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, - u8 *active_width) +static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, + u16 *active_speed, u8 *active_width) { switch (eth_proto_oper) { case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): @@ -384,7 +384,7 @@ static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, +static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, u8 *active_width) { switch (eth_proto_oper) { @@ -436,7 +436,7 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, return 0; } -static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, +static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed, u8 *active_width, bool ext) { return ext ? @@ -546,7 +546,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, unsigned int index, const union ib_gid *gid, const struct ib_gid_attr *attr) { - enum ib_gid_type gid_type = IB_GID_TYPE_IB; + enum ib_gid_type gid_type = IB_GID_TYPE_ROCE; u16 vlan_id = 0xffff; u8 roce_version = 0; u8 roce_l3_type = 0; @@ -561,7 +561,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, } switch (gid_type) { - case IB_GID_TYPE_IB: + case IB_GID_TYPE_ROCE: roce_version = MLX5_ROCE_VERSION_1; break; case IB_GID_TYPE_ROCE_UDP_ENCAP: @@ -840,7 +840,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, /* We support 'Gappy' memory registration too */ props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; } - props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; + /* IB_WR_REG_MR always requires changing the entity size with UMR */ + if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; if (MLX5_CAP_GEN(mdev, sho)) { props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER; /* At this stage no support for signature handover */ @@ -1175,32 +1177,24 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, return 0; } -enum mlx5_ib_width { - MLX5_IB_WIDTH_1X = 1 << 0, - MLX5_IB_WIDTH_2X = 1 << 1, - MLX5_IB_WIDTH_4X = 1 << 2, - MLX5_IB_WIDTH_8X = 1 << 3, - MLX5_IB_WIDTH_12X = 1 << 4 -}; - -static void translate_active_width(struct ib_device *ibdev, u8 active_width, - u8 *ib_width) +static void translate_active_width(struct ib_device *ibdev, u16 active_width, + u8 *ib_width) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - if (active_width & MLX5_IB_WIDTH_1X) + if (active_width & MLX5_PTYS_WIDTH_1X) *ib_width = IB_WIDTH_1X; - else if (active_width & MLX5_IB_WIDTH_2X) + else if (active_width & MLX5_PTYS_WIDTH_2X) *ib_width = IB_WIDTH_2X; - else if (active_width & MLX5_IB_WIDTH_4X) + else if (active_width & MLX5_PTYS_WIDTH_4X) *ib_width = IB_WIDTH_4X; - else if (active_width & MLX5_IB_WIDTH_8X) + else if (active_width & MLX5_PTYS_WIDTH_8X) *ib_width = IB_WIDTH_8X; - else if (active_width & MLX5_IB_WIDTH_12X) + else if (active_width & MLX5_PTYS_WIDTH_12X) *ib_width = IB_WIDTH_12X; else { mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n", - (int)active_width); + active_width); *ib_width = IB_WIDTH_4X; } @@ -1277,7 +1271,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, u16 max_mtu; u16 oper_mtu; int err; - u8 ib_link_width_oper; + u16 ib_link_width_oper; u8 vl_hw_cap; rep = kzalloc(sizeof(*rep), GFP_KERNEL); @@ -1310,16 +1304,13 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) props->port_cap_flags2 = rep->cap_mask2; - err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); + err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, + &props->active_speed, port); if (err) goto out; translate_active_width(ibdev, ib_link_width_oper, &props->active_width); - err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); - if (err) - goto out; - mlx5_query_port_max_mtu(mdev, &max_mtu, port); props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); @@ -2354,7 +2345,9 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev, return -EPERM; if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner))) + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2) || + MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner_v2))) return -EOPNOTSUPP; break; } @@ -2569,12 +2562,12 @@ static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct mlx5_ib_dev *mdev = to_mdev(pd->device); struct mlx5_ib_pd *mpd = to_mpd(pd); - mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); + return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); } static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) @@ -2699,9 +2692,7 @@ static void pkey_change_handler(struct work_struct *work) container_of(work, struct mlx5_ib_port_resources, pkey_change_work); - mutex_lock(&ports->devr->mutex); mlx5_ib_gsi_pkey_change(ports->gsi); - mutex_unlock(&ports->devr->mutex); } static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) @@ -3127,11 +3118,9 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s1->usecnt, 0); - for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { + for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) INIT_WORK(&devr->ports[port].pkey_change_work, pkey_change_handler); - devr->ports[port].devr = devr; - } return 0; @@ -4098,6 +4087,8 @@ static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { static const struct ib_device_ops mlx5_ib_dev_mw_ops = { .alloc_mw = mlx5_ib_alloc_mw, .dealloc_mw = mlx5_ib_dealloc_mw, + + INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw), }; static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { @@ -4268,6 +4259,9 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { .destroy_wq = mlx5_ib_destroy_wq, .get_netdev = mlx5_ib_get_netdev, .modify_wq = mlx5_ib_modify_wq, + + INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table, + ib_rwq_ind_tbl), }; static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) @@ -4386,7 +4380,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) name = "mlx5_%d"; else name = "mlx5_bond_%d"; - return ib_register_device(&dev->ib_dev, name); + return ib_register_device(&dev->ib_dev, name, &dev->mdev->pdev->dev); } static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index c19ec9fd8a63..13de3d2edd34 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -169,8 +169,8 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, __be64 *pas, int access_flags) { return __mlx5_ib_populate_pas(dev, umem, page_shift, 0, - ib_umem_num_pages(umem), pas, - access_flags); + ib_umem_num_dma_blocks(umem, PAGE_SIZE), + pas, access_flags); } int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset) { diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 5287fc868662..b1f2b34e5955 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -384,6 +384,22 @@ struct mlx5_ib_dct { u32 *in; }; +struct mlx5_ib_gsi_qp { + struct ib_qp *rx_qp; + u8 port_num; + struct ib_qp_cap cap; + struct ib_cq *cq; + struct mlx5_ib_gsi_wr *outstanding_wrs; + u32 outstanding_pi, outstanding_ci; + int num_qps; + /* Protects access to the tx_qps. Post send operations synchronize + * with tx_qp creation in setup_qp(). Also protects the + * outstanding_wrs array and indices. + */ + spinlock_t lock; + struct ib_qp **tx_qps; +}; + struct mlx5_ib_qp { struct ib_qp ibqp; union { @@ -391,6 +407,7 @@ struct mlx5_ib_qp { struct mlx5_ib_raw_packet_qp raw_packet_qp; struct mlx5_ib_rss_qp rss_qp; struct mlx5_ib_dct dct; + struct mlx5_ib_gsi_qp gsi; }; struct mlx5_frag_buf buf; @@ -693,10 +710,7 @@ struct mlx5_mr_cache { unsigned long last_add; }; -struct mlx5_ib_gsi_qp; - struct mlx5_ib_port_resources { - struct mlx5_ib_resources *devr; struct mlx5_ib_gsi_qp *gsi; struct work_struct pkey_change_work; }; @@ -1119,13 +1133,16 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_ib_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -void mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); +static inline int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) +{ + return 0; +} int mlx5_ib_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr); -void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp); @@ -1148,7 +1165,7 @@ int mlx5_ib_read_wqe_srq(struct mlx5_ib_srq *srq, int wqe_index, void *buffer, size_t buflen, size_t *bc); int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period); @@ -1163,8 +1180,7 @@ int mlx5_ib_advise_mr(struct ib_pd *pd, struct ib_sge *sg_list, u32 num_sge, struct uverbs_attr_bundle *attrs); -struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata); +int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); @@ -1193,7 +1209,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_mad *in, struct ib_mad *out, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset); int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, @@ -1229,7 +1245,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev); int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev); struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry); + unsigned int entry, int access_flags); void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr); @@ -1238,12 +1254,12 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); -void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, u32 wq_attr_mask, struct ib_udata *udata); -struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata); +int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, struct ib_ucontext *context, @@ -1267,6 +1283,7 @@ void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1288,6 +1305,10 @@ mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, { return -EOPNOTSUPP; } +static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ extern const struct mmu_interval_notifier_ops mlx5_mn_ops; @@ -1318,9 +1339,9 @@ void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); void mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num); /* GSI QP helper functions */ -struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr); -int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp); +int mlx5_ib_create_gsi(struct ib_pd *pd, struct mlx5_ib_qp *mqp, + struct ib_qp_init_attr *attr); +int mlx5_ib_destroy_gsi(struct mlx5_ib_qp *mqp); int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr, int attr_mask); int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, @@ -1358,7 +1379,7 @@ static inline void init_query_mad(struct ib_smp *mad) static inline int is_qp1(enum ib_qp_type qp_type) { - return qp_type == MLX5_IB_QPT_HW_GSI; + return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI; } #define MLX5_MAX_UMR_SHIFT 16 @@ -1442,25 +1463,54 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, u32 bfregn, bool dyn_bfreg); -static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, - bool do_modify_atomic, int access_flags) +static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev, + size_t length) { + /* + * umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is + * always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka + * MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey + * can never be enabled without this capability. Simplify this weird + * quirky hardware by just saying it can't use PAS lists with UMR at + * all. + */ if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) return false; - if (do_modify_atomic && + /* + * length is the size of the MR in bytes when mlx5_ib_update_xlt() is + * used. + */ + if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && + length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE) + return false; + return true; +} + +/* + * true if an existing MR can be reconfigured to new access_flags using UMR. + * Older HW cannot use UMR to update certain elements of the MKC. See + * umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask() + */ +static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, + unsigned int current_access_flags, + unsigned int target_access_flags) +{ + unsigned int diffs = current_access_flags ^ target_access_flags; + + if ((diffs & IB_ACCESS_REMOTE_ATOMIC) && MLX5_CAP_GEN(dev->mdev, atomic) && MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) return false; - if (access_flags & IB_ACCESS_RELAXED_ORDERING && + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) && !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) return false; - if (access_flags & IB_ACCESS_RELAXED_ORDERING && - MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && - !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + if ((diffs & IB_ACCESS_RELAXED_ORDERING) && + MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) && + !MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) return false; return true; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3e6f2f9c6655..b261797b258f 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -50,6 +50,29 @@ enum { static void create_mkey_callback(int status, struct mlx5_async_work *context); +static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, + struct ib_pd *pd) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) + MLX5_SET(mkc, mkc, relaxed_ordering_write, + !!(acc & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) + MLX5_SET(mkc, mkc, relaxed_ordering_read, + !!(acc & IB_ACCESS_RELAXED_ORDERING)); + + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, start_addr); +} + static void assign_mkey_variant(struct mlx5_ib_dev *dev, struct mlx5_core_mkey *mkey, u32 *in) @@ -100,7 +123,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); } -static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) +static inline bool mlx5_ib_pas_fits_in_mr(struct mlx5_ib_mr *mr, u64 start, + u64 length) { return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); @@ -152,12 +176,12 @@ static struct mlx5_ib_mr *alloc_cache_mr(struct mlx5_cache_ent *ent, void *mkc) mr->cache_ent = ent; mr->dev = ent->dev; + set_mkc_access_pd_addr_fields(mkc, 0, 0, ent->dev->umrc.pd); MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (ent->access_mode >> 2) & 0x7); - MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); MLX5_SET(mkc, mkc, log_page_size, ent->page); return mr; @@ -534,7 +558,7 @@ static void cache_work_func(struct work_struct *work) /* Allocate a special entry from the cache */ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, - unsigned int entry) + unsigned int entry, int access_flags) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent; @@ -544,6 +568,10 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, entry >= ARRAY_SIZE(cache->ent))) return ERR_PTR(-EINVAL); + /* Matches access in alloc_cache_mr() */ + if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) + return ERR_PTR(-EOPNOTSUPP); + ent = &cache->ent[entry]; spin_lock_irq(&ent->lock); if (list_empty(&ent->head)) { @@ -558,6 +586,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, queue_adjust_cache_locked(ent); spin_unlock_irq(&ent->lock); } + mr->access_flags = access_flags; return mr; } @@ -730,8 +759,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) MLX5_IB_UMR_OCTOWORD; ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && - !dev->is_rep && - mlx5_core_is_pf(dev->mdev)) + !dev->is_rep && mlx5_core_is_pf(dev->mdev) && + mlx5_ib_can_load_pas_with_umr(dev, 0)) ent->limit = dev->mdev->profile->mr_cache[i].limit; else ent->limit = 0; @@ -774,29 +803,6 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) return 0; } -static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, - struct ib_pd *pd) -{ - struct mlx5_ib_dev *dev = to_mdev(pd->device); - - MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); - - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) - MLX5_SET(mkc, mkc, relaxed_ordering_write, - !!(acc & IB_ACCESS_RELAXED_ORDERING)); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) - MLX5_SET(mkc, mkc, relaxed_ordering_read, - !!(acc & IB_ACCESS_RELAXED_ORDERING)); - - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET64(mkc, mkc, start_addr, start_addr); -} - struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -979,6 +985,11 @@ alloc_mr_from_cache(struct ib_pd *pd, struct ib_umem *umem, u64 virt_addr, if (!ent) return ERR_PTR(-E2BIG); + + /* Matches access in alloc_cache_mr() */ + if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) + return ERR_PTR(-EOPNOTSUPP); + mr = get_cache_mr(ent); if (!mr) { mr = create_cache_mr(ent); @@ -1181,38 +1192,31 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, goto err_1; } pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - if (populate && !(access_flags & IB_ACCESS_ON_DEMAND)) + if (populate) { + if (WARN_ON(access_flags & IB_ACCESS_ON_DEMAND)) { + err = -EINVAL; + goto err_2; + } mlx5_ib_populate_pas(dev, umem, page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); + } /* The pg_access bit allows setting the access flags * in the page list submitted with the command. */ MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + set_mkc_access_pd_addr_fields(mkc, access_flags, virt_addr, + populate ? pd : dev->umrc.pd); MLX5_SET(mkc, mkc, free, !populate); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) - MLX5_SET(mkc, mkc, relaxed_ordering_write, - !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); - if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) - MLX5_SET(mkc, mkc, relaxed_ordering_read, - !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); - MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); - MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); - MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); - MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); - MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET64(mkc, mkc, start_addr, virt_addr); MLX5_SET64(mkc, mkc, len, length); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, bsf_octword_size, 0); MLX5_SET(mkc, mkc, translations_octword_size, get_octo_len(virt_addr, length, page_shift)); MLX5_SET(mkc, mkc, log_page_size, page_shift); - MLX5_SET(mkc, mkc, qpn, 0xffffff); if (populate) { MLX5_SET(create_mkey_in, in, translations_octword_actual_size, get_octo_len(virt_addr, length, page_shift)); @@ -1308,7 +1312,8 @@ int mlx5_ib_advise_mr(struct ib_pd *pd, struct uverbs_attr_bundle *attrs) { if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && - advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) return -EOPNOTSUPP; return mlx5_ib_advise_mr_prefetch(pd, advice, flags, @@ -1353,7 +1358,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_ib_mr *mr = NULL; - bool use_umr; + bool xlt_with_umr; struct ib_umem *umem; int page_shift; int npages; @@ -1367,6 +1372,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); + xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, length); + /* ODP requires xlt update via umr to work. */ + if (!xlt_with_umr && (access_flags & IB_ACCESS_ON_DEMAND)) + return ERR_PTR(-EINVAL); + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && length == U64_MAX) { if (virt_addr != start) @@ -1387,28 +1397,17 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - use_umr = mlx5_ib_can_use_umr(dev, true, access_flags); - - if (order <= mr_cache_max_order(dev) && use_umr) { + if (xlt_with_umr) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, page_shift, order, access_flags); - if (PTR_ERR(mr) == -EAGAIN) { - mlx5_ib_dbg(dev, "cache empty for order %d\n", order); + if (IS_ERR(mr)) mr = NULL; - } - } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { - if (access_flags & IB_ACCESS_ON_DEMAND) { - err = -EINVAL; - pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); - goto error; - } - use_umr = false; } if (!mr) { mutex_lock(&dev->slow_path_mutex); mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, - page_shift, access_flags, !use_umr); + page_shift, access_flags, !xlt_with_umr); mutex_unlock(&dev->slow_path_mutex); } @@ -1422,15 +1421,16 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); - if (use_umr) { + if (xlt_with_umr && !(access_flags & IB_ACCESS_ON_DEMAND)) { + /* + * If the MR was created with reg_create then it will be + * configured properly but left disabled. It is safe to go ahead + * and configure it again via UMR while enabling it. + */ int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; - if (access_flags & IB_ACCESS_ON_DEMAND) - update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP; - err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, update_xlt_flags); - if (err) { dereg_mr(dev, mr); return ERR_PTR(err); @@ -1448,6 +1448,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, dereg_mr(dev, mr); return ERR_PTR(err); } + + err = mlx5_ib_init_odp_mr(mr, xlt_with_umr); + if (err) { + dereg_mr(dev, mr); + return ERR_PTR(err); + } } return &mr->ibmr; @@ -1555,8 +1561,11 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - if (!mlx5_ib_can_use_umr(dev, true, access_flags) || - (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { + if (!mlx5_ib_can_reconfig_with_umr(dev, mr->access_flags, + access_flags) || + !mlx5_ib_can_load_pas_with_umr(dev, len) || + (flags & IB_MR_REREG_TRANS && + !mlx5_ib_pas_fits_in_mr(mr, addr, len))) { /* * UMR can't be used - MKey needs to be replaced. */ @@ -1727,9 +1736,9 @@ static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + /* This is only used from the kernel, so setting the PD is OK. */ + set_mkc_access_pd_addr_fields(mkc, 0, 0, pd); MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); MLX5_SET(mkc, mkc, translations_octword_size, ndescs); MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); @@ -1973,12 +1982,11 @@ struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, max_num_meta_sg); } -struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata) +int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_dev *dev = to_mdev(ibmw->device); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - struct mlx5_ib_mw *mw = NULL; + struct mlx5_ib_mw *mw = to_mmw(ibmw); u32 *in = NULL; void *mkc; int ndescs; @@ -1991,21 +1999,20 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); if (err) - return ERR_PTR(err); + return err; if (req.comp_mask || req.reserved1 || req.reserved2) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (udata->inlen > sizeof(req) && !ib_is_udata_cleared(udata, sizeof(req), udata->inlen - sizeof(req))) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); - mw = kzalloc(sizeof(*mw), GFP_KERNEL); in = kzalloc(inlen, GFP_KERNEL); - if (!mw || !in) { + if (!in) { err = -ENOMEM; goto free; } @@ -2014,11 +2021,11 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, translations_octword_size, ndescs); - MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, pd, to_mpd(ibmw->pd)->pdn); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); - MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); + MLX5_SET(mkc, mkc, en_rinval, !!((ibmw->type == IB_MW_TYPE_2))); MLX5_SET(mkc, mkc, qpn, 0xffffff); err = mlx5_ib_create_mkey(dev, &mw->mmkey, in, inlen); @@ -2026,17 +2033,15 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, goto free; mw->mmkey.type = MLX5_MKEY_MW; - mw->ibmw.rkey = mw->mmkey.key; + ibmw->rkey = mw->mmkey.key; mw->ndescs = ndescs; - resp.response_length = min(offsetof(typeof(resp), response_length) + - sizeof(resp.response_length), udata->outlen); + resp.response_length = + min(offsetofend(typeof(resp), response_length), udata->outlen); if (resp.response_length) { err = ib_copy_to_udata(udata, &resp, resp.response_length); - if (err) { - mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); - goto free; - } + if (err) + goto free_mkey; } if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { @@ -2048,21 +2053,19 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, } kfree(in); - return &mw->ibmw; + return 0; free_mkey: mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); free: - kfree(mw); kfree(in); - return ERR_PTR(err); + return err; } int mlx5_ib_dealloc_mw(struct ib_mw *mw) { struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); - int err; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); @@ -2073,11 +2076,7 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) synchronize_srcu(&dev->odp_srcu); } - err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); - if (err) - return err; - kfree(mmw); - return 0; + return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index cfd7efab114e..5c853ec1b0d8 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -382,7 +382,7 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); if (!MLX5_CAP_GEN(dev->mdev, pg) || - !mlx5_ib_can_use_umr(dev, true, 0)) + !mlx5_ib_can_load_pas_with_umr(dev, 0)) return; caps->general_caps = IB_ODP_SUPPORT; @@ -476,12 +476,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, if (IS_ERR(odp)) return ERR_CAST(odp); - ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY); + ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY, + imr->access_flags); if (IS_ERR(mr)) goto out_umem; mr->ibmr.pd = imr->ibmr.pd; - mr->access_flags = imr->access_flags; mr->umem = &odp->umem; mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; @@ -540,14 +540,13 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (IS_ERR(umem_odp)) return ERR_CAST(umem_odp); - imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY); + imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags); if (IS_ERR(imr)) { err = PTR_ERR(imr); goto out_umem; } imr->ibmr.pd = &pd->ibpd; - imr->access_flags = access_flags; imr->mmkey.iova = 0; imr->umem = &umem_odp->umem; imr->ibmr.lkey = imr->mmkey.key; @@ -666,15 +665,21 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) } #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) +#define MLX5_PF_FLAGS_SNAPSHOT BIT(2) +#define MLX5_PF_FLAGS_ENABLE BIT(3) static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, u64 user_va, size_t bcnt, u32 *bytes_mapped, u32 flags) { int page_shift, ret, np; bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; - unsigned long current_seq; u64 access_mask; u64 start_idx; + bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT); + u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC; + + if (flags & MLX5_PF_FLAGS_ENABLE) + xlt_flags |= MLX5_IB_UPD_XLT_ENABLE; page_shift = odp->page_shift; start_idx = (user_va - ib_umem_start(odp)) >> page_shift; @@ -683,25 +688,15 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; - current_seq = mmu_interval_read_begin(&odp->notifier); - - np = ib_umem_odp_map_dma_pages(odp, user_va, bcnt, access_mask, - current_seq); + np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault); if (np < 0) return np; - mutex_lock(&odp->umem_mutex); - if (!mmu_interval_read_retry(&odp->notifier, current_seq)) { - /* - * No need to check whether the MTTs really belong to - * this MR, since ib_umem_odp_map_dma_pages already - * checks this. - */ - ret = mlx5_ib_update_xlt(mr, start_idx, np, - page_shift, MLX5_IB_UPD_XLT_ATOMIC); - } else { - ret = -EAGAIN; - } + /* + * No need to check whether the MTTs really belong to this MR, since + * ib_umem_odp_map_dma_and_lock already checks this. + */ + ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags); mutex_unlock(&odp->umem_mutex); if (ret < 0) { @@ -836,6 +831,20 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, flags); } +int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) +{ + u32 flags = MLX5_PF_FLAGS_SNAPSHOT; + int ret; + + if (enable) + flags |= MLX5_PF_FLAGS_ENABLE; + + ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), + mr->umem->address, mr->umem->length, NULL, + flags); + return ret >= 0 ? 0 : ret; +} + struct pf_frame { struct pf_frame *next; u32 key; @@ -1862,6 +1871,9 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) + pf_flags |= MLX5_PF_FLAGS_SNAPSHOT; + if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list, num_sge); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5758dbe64045..600e056798c0 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1477,7 +1477,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_RQN; resp->tirn = rq->tirn; resp->comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) { resp->tir_icm_addr = MLX5_GET( create_tir_out, out, icm_address_31_0); resp->tir_icm_addr |= @@ -1739,7 +1740,8 @@ create_tir: if (mucontext->devx_uid) { params->resp.comp_mask |= MLX5_IB_CREATE_QP_RESP_MASK_TIRN; params->resp.tirn = qp->rss_qp.tirn; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner)) { + if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) || + MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner_v2)) { params->resp.tir_icm_addr = MLX5_GET(create_tir_out, out, icm_address_31_0); params->resp.tir_icm_addr |= @@ -2409,6 +2411,9 @@ static int create_dct(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 uidx = params->uidx; void *dctc; + if (mlx5_lag_is_active(dev->mdev) && !MLX5_CAP_GEN(dev->mdev, lag_dct)) + return -EOPNOTSUPP; + qp->dct.in = kzalloc(MLX5_ST_SZ_BYTES(create_dct_in), GFP_KERNEL); if (!qp->dct.in) return -ENOMEM; @@ -2506,18 +2511,6 @@ static int check_valid_flow(struct mlx5_ib_dev *dev, struct ib_pd *pd, return -EINVAL; } - switch (attr->qp_type) { - case IB_QPT_SMI: - case MLX5_IB_QPT_HW_GSI: - case MLX5_IB_QPT_REG_UMR: - case IB_QPT_GSI: - mlx5_ib_dbg(dev, "Kernel doesn't support QP type %d\n", - attr->qp_type); - return -EINVAL; - default: - break; - } - /* * We don't need to see this warning, it means that kernel code * missing ib_pd. Placed here to catch developer's mistakes. @@ -2780,21 +2773,23 @@ static int create_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto out; } - if (qp->type == MLX5_IB_QPT_DCT) { + switch (qp->type) { + case MLX5_IB_QPT_DCT: err = create_dct(dev, pd, qp, params); - goto out; - } - - if (qp->type == IB_QPT_XRC_TGT) { + break; + case IB_QPT_XRC_TGT: err = create_xrc_tgt_qp(dev, qp, params); - goto out; + break; + case IB_QPT_GSI: + err = mlx5_ib_create_gsi(pd, qp, params->attr); + break; + default: + if (params->udata) + err = create_user_qp(dev, pd, qp, params); + else + err = create_kernel_qp(dev, pd, qp, params); } - if (params->udata) - err = create_user_qp(dev, pd, qp, params); - else - err = create_kernel_qp(dev, pd, qp, params); - out: if (err) { mlx5_ib_err(dev, "Create QP type %d failed\n", qp->type); @@ -2934,9 +2929,6 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, if (err) return ERR_PTR(err); - if (attr->qp_type == IB_QPT_GSI) - return mlx5_ib_gsi_create_qp(pd, attr); - params.udata = udata; params.uidx = MLX5_IB_DEFAULT_UIDX; params.attr = attr; @@ -3005,9 +2997,14 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attr, return &qp->ibqp; destroy_qp: - if (qp->type == MLX5_IB_QPT_DCT) { + switch (qp->type) { + case MLX5_IB_QPT_DCT: mlx5_ib_destroy_dct(qp); - } else { + break; + case IB_QPT_GSI: + mlx5_ib_destroy_gsi(qp); + break; + default: /* * These lines below are temp solution till QP allocation * will be moved to be under IB/core responsiblity. @@ -3032,7 +3029,7 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) struct mlx5_ib_qp *mqp = to_mqp(qp); if (unlikely(qp->qp_type == IB_QPT_GSI)) - return mlx5_ib_gsi_destroy_qp(qp); + return mlx5_ib_destroy_gsi(mqp); if (mqp->type == MLX5_IB_QPT_DCT) return mlx5_ib_destroy_dct(mqp); @@ -3088,20 +3085,44 @@ enum { MLX5_PATH_FLAG_COUNTER = 1 << 2, }; +static int ib_to_mlx5_rate_map(u8 rate) +{ + switch (rate) { + case IB_RATE_PORT_CURRENT: + return 0; + case IB_RATE_56_GBPS: + return 1; + case IB_RATE_25_GBPS: + return 2; + case IB_RATE_100_GBPS: + return 3; + case IB_RATE_200_GBPS: + return 4; + case IB_RATE_50_GBPS: + return 5; + default: + return rate + MLX5_STAT_RATE_OFFSET; + }; + + return 0; +} + static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) { + u32 stat_rate_support; + if (rate == IB_RATE_PORT_CURRENT) return 0; if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS) return -EINVAL; + stat_rate_support = MLX5_CAP_GEN(dev->mdev, stat_rate_support); while (rate != IB_RATE_PORT_CURRENT && - !(1 << (rate + MLX5_STAT_RATE_OFFSET) & - MLX5_CAP_GEN(dev->mdev, stat_rate_support))) + !(1 << ib_to_mlx5_rate_map(rate) & stat_rate_support)) --rate; - return rate ? rate + MLX5_STAT_RATE_OFFSET : rate; + return ib_to_mlx5_rate_map(rate); } static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, @@ -3643,14 +3664,12 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev, MLX5_MAX_PORTS + 1; } -static bool qp_supports_affinity(struct ib_qp *qp) +static bool qp_supports_affinity(struct mlx5_ib_qp *qp) { - if ((qp->qp_type == IB_QPT_RC) || - (qp->qp_type == IB_QPT_UD) || - (qp->qp_type == IB_QPT_UC) || - (qp->qp_type == IB_QPT_RAW_PACKET) || - (qp->qp_type == IB_QPT_XRC_INI) || - (qp->qp_type == IB_QPT_XRC_TGT)) + if ((qp->type == IB_QPT_RC) || (qp->type == IB_QPT_UD) || + (qp->type == IB_QPT_UC) || (qp->type == IB_QPT_RAW_PACKET) || + (qp->type == IB_QPT_XRC_INI) || (qp->type == IB_QPT_XRC_TGT) || + (qp->type == MLX5_IB_QPT_DCI)) return true; return false; } @@ -3668,7 +3687,7 @@ static unsigned int get_tx_affinity(struct ib_qp *qp, unsigned int tx_affinity; if (!(mlx5_ib_lag_should_assign_affinity(dev) && - qp_supports_affinity(qp))) + qp_supports_affinity(mqp))) return 0; if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1) @@ -4161,7 +4180,11 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, rae, 1); } MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index); - MLX5_SET(dctc, dctc, port, attr->port_num); + if (mlx5_lag_is_active(dev->mdev)) + MLX5_SET(dctc, dctc, port, + get_tx_affinity_rr(dev, udata)); + else + MLX5_SET(dctc, dctc, port, attr->port_num); set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); MLX5_SET(dctc, dctc, counter_set_id, set_id); @@ -4716,12 +4739,12 @@ int mlx5_ib_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) return mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); } -void mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) +int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; - mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); + return mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); } static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) @@ -4921,8 +4944,8 @@ static int prepare_user_rq(struct ib_pd *pd, int err; size_t required_cmd_sz; - required_cmd_sz = offsetof(typeof(ucmd), single_stride_log_num_of_bytes) - + sizeof(ucmd.single_stride_log_num_of_bytes); + required_cmd_sz = offsetofend(struct mlx5_ib_create_wq, + single_stride_log_num_of_bytes); if (udata->inlen < required_cmd_sz) { mlx5_ib_dbg(dev, "invalid inlen\n"); return -EINVAL; @@ -5006,7 +5029,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, if (!udata) return ERR_PTR(-ENOSYS); - min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + min_resp_len = offsetofend(struct mlx5_ib_create_wq_resp, reserved); if (udata->outlen && udata->outlen < min_resp_len) return ERR_PTR(-EINVAL); @@ -5036,8 +5059,8 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, rwq->ibwq.wq_num = rwq->core_qp.qpn; rwq->ibwq.state = IB_WQS_RESET; if (udata->outlen) { - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); + resp.response_length = offsetofend( + struct mlx5_ib_create_wq_resp, response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto err_copy; @@ -5056,22 +5079,27 @@ err: return ERR_PTR(err); } -void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) +int mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); + int ret; - mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp); + ret = mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp); + if (ret) + return ret; destroy_user_rq(dev, wq->pd, rwq, udata); kfree(rwq); + return 0; } -struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata) +int mlx5_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) { - struct mlx5_ib_dev *dev = to_mdev(device); - struct mlx5_ib_rwq_ind_table *rwq_ind_tbl; + struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = + to_mrwq_ind_table(ib_rwq_ind_table); + struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_table->device); int sz = 1 << init_attr->log_ind_tbl_size; struct mlx5_ib_create_rwq_ind_tbl_resp resp = {}; size_t min_resp_len; @@ -5084,30 +5112,25 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, if (udata->inlen > 0 && !ib_is_udata_cleared(udata, 0, udata->inlen)) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; if (init_attr->log_ind_tbl_size > MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)) { mlx5_ib_dbg(dev, "log_ind_tbl_size = %d is bigger than supported = %d\n", init_attr->log_ind_tbl_size, MLX5_CAP_GEN(dev->mdev, log_max_rqt_size)); - return ERR_PTR(-EINVAL); + return -EINVAL; } - min_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved); + min_resp_len = + offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, reserved); if (udata->outlen && udata->outlen < min_resp_len) - return ERR_PTR(-EINVAL); - - rwq_ind_tbl = kzalloc(sizeof(*rwq_ind_tbl), GFP_KERNEL); - if (!rwq_ind_tbl) - return ERR_PTR(-ENOMEM); + return -EINVAL; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - err = -ENOMEM; - goto err; - } + if (!in) + return -ENOMEM; rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); @@ -5122,26 +5145,24 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device, err = mlx5_core_create_rqt(dev->mdev, in, inlen, &rwq_ind_tbl->rqtn); kvfree(in); - if (err) - goto err; + return err; rwq_ind_tbl->ib_rwq_ind_tbl.ind_tbl_num = rwq_ind_tbl->rqtn; if (udata->outlen) { - resp.response_length = offsetof(typeof(resp), response_length) + - sizeof(resp.response_length); + resp.response_length = + offsetofend(struct mlx5_ib_create_rwq_ind_tbl_resp, + response_length); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto err_copy; } - return &rwq_ind_tbl->ib_rwq_ind_tbl; + return 0; err_copy: mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); -err: - kfree(rwq_ind_tbl); - return ERR_PTR(err); + return err; } int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) @@ -5149,10 +5170,7 @@ int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) struct mlx5_ib_rwq_ind_table *rwq_ind_tbl = to_mrwq_ind_table(ib_rwq_ind_tbl); struct mlx5_ib_dev *dev = to_mdev(ib_rwq_ind_tbl->device); - mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); - - kfree(rwq_ind_tbl); - return 0; + return mlx5_cmd_destroy_rqt(dev->mdev, rwq_ind_tbl->rqtn, rwq_ind_tbl->uid); } int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, @@ -5169,7 +5187,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, void *rqc; void *in; - required_cmd_sz = offsetof(typeof(ucmd), reserved) + sizeof(ucmd.reserved); + required_cmd_sz = offsetofend(struct mlx5_ib_modify_wq, reserved); if (udata->inlen < required_cmd_sz) return -EINVAL; diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index ba899df44c5b..5d4e140db99c 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -26,8 +26,8 @@ int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev, u32 timeout_usec); -void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, - struct mlx5_core_qp *rq); +int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, + struct mlx5_core_qp *rq); int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq); void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev, diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index 7c3968ef9cd1..c683d7000168 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -576,11 +576,12 @@ err_destroy_rq: return err; } -void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, - struct mlx5_core_qp *rq) +int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, + struct mlx5_core_qp *rq) { destroy_resource_common(dev, rq); destroy_rq_tracked(dev, rq->qpn, rq->uid); + return 0; } static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid) diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 7e10cbcb6d5c..e2f720eec1e1 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -389,24 +389,21 @@ out_box: return ret; } -void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +int mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); + int ret; - mlx5_cmd_destroy_srq(dev, &msrq->msrq); + ret = mlx5_cmd_destroy_srq(dev, &msrq->msrq); + if (ret) + return ret; - if (srq->uobject) { - mlx5_ib_db_unmap_user( - rdma_udata_to_drv_context( - udata, - struct mlx5_ib_ucontext, - ibucontext), - &msrq->db); - ib_umem_release(msrq->umem); - } else { + if (udata) + destroy_srq_user(srq->pd, msrq, udata); + else destroy_srq_kernel(dev, msrq); - } + return 0; } void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h index af197c36d757..2c3627b2509d 100644 --- a/drivers/infiniband/hw/mlx5/srq.h +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -56,7 +56,7 @@ struct mlx5_srq_table { int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *in); -void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, struct mlx5_srq_attr *out); int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index 37aaacebd3f2..db889ec3fd48 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -590,22 +590,32 @@ err_destroy_srq_split: return err; } -void mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) { struct mlx5_srq_table *table = &dev->srq_table; struct mlx5_core_srq *tmp; int err; - tmp = xa_erase_irq(&table->array, srq->srqn); - if (!tmp || tmp != srq) - return; + /* Delete entry, but leave index occupied */ + tmp = xa_cmpxchg_irq(&table->array, srq->srqn, srq, XA_ZERO_ENTRY, 0); + if (WARN_ON(tmp != srq)) + return xa_err(tmp) ?: -EINVAL; err = destroy_srq_split(dev, srq); - if (err) - return; + if (err) { + /* + * We don't need to check returned result for an error, + * because we are storing in pre-allocated space xarray + * entry and it can't fail at this stage. + */ + xa_cmpxchg_irq(&table->array, srq->srqn, XA_ZERO_ENTRY, srq, 0); + return err; + } + xa_erase_irq(&table->array, srq->srqn); mlx5_core_res_put(&srq->common); wait_for_completion(&srq->common.free); + return 0; } int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index 43880973a512..d6038fb6c50c 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -398,7 +398,8 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg) seg->status = MLX5_MKEY_STATUS_FREE; } -static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, +static void set_reg_mkey_segment(struct mlx5_ib_dev *dev, + struct mlx5_mkey_seg *seg, const struct ib_send_wr *wr) { const struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -414,10 +415,12 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ)); MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE)); MLX5_SET(mkc, seg, lr, 1); - MLX5_SET(mkc, seg, relaxed_ordering_write, - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); - MLX5_SET(mkc, seg, relaxed_ordering_read, - !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)) + MLX5_SET(mkc, seg, relaxed_ordering_write, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); + if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)) + MLX5_SET(mkc, seg, relaxed_ordering_read, + !!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING)); if (umrwr->pd) MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn); @@ -863,13 +866,11 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; u8 flags = 0; - if (!mlx5_ib_can_use_umr(dev, atomic, wr->access)) { - mlx5_ib_warn(to_mdev(qp->ibqp.device), - "Fast update of %s for MR is disabled\n", - (MLX5_CAP_GEN(dev->mdev, - umr_modify_entity_size_disabled)) ? - "entity size" : - "atomic access"); + /* Matches access in mlx5_set_umr_free_mkey() */ + if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) { + mlx5_ib_warn( + to_mdev(qp->ibqp.device), + "Fast update for MR access flags is not possible\n"); return -EINVAL; } @@ -1263,7 +1264,7 @@ static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - set_reg_mkey_segment(*seg, wr); + set_reg_mkey_segment(dev, *seg, wr); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 7550e9d03dec..9dbbf4d16796 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -548,7 +548,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp, + struct mthca_qp *qp, struct ib_udata *udata); void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); int mthca_create_ah(struct mthca_dev *dev, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 9fa2f9164a47..c4d9cdc4ee97 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -373,9 +373,10 @@ static int mthca_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -static void mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +static int mthca_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { mthca_pd_free(to_mdev(pd->device), to_mpd(pd)); + return 0; } static int mthca_ah_create(struct ib_ah *ibah, @@ -389,9 +390,10 @@ static int mthca_ah_create(struct ib_ah *ibah, init_attr->ah_attr, ah); } -static void mthca_ah_destroy(struct ib_ah *ah, u32 flags) +static int mthca_ah_destroy(struct ib_ah *ah, u32 flags) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); + return 0; } static int mthca_create_srq(struct ib_srq *ibsrq, @@ -440,7 +442,7 @@ static int mthca_create_srq(struct ib_srq *ibsrq, return 0; } -static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +static int mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { if (udata) { struct mthca_ucontext *context = @@ -454,6 +456,7 @@ static void mthca_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) } mthca_free_srq(to_mdev(srq->device), to_msrq(srq)); + return 0; } static struct ib_qp *mthca_create_qp(struct ib_pd *pd, @@ -532,13 +535,14 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_SMI: case IB_QPT_GSI: { - /* Don't allow userspace to create special QPs */ - if (udata) - return ERR_PTR(-EINVAL); - - qp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL); + qp = kzalloc(sizeof(*qp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); + qp->sqp = kzalloc(sizeof(struct mthca_sqp), GFP_KERNEL); + if (!qp->sqp) { + kfree(qp); + return ERR_PTR(-ENOMEM); + } qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; @@ -547,7 +551,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->recv_cq), init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, - to_msqp(qp), udata); + qp, udata); break; } default: @@ -556,6 +560,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, } if (err) { + kfree(qp->sqp); kfree(qp); return ERR_PTR(err); } @@ -588,7 +593,8 @@ static int mthca_destroy_qp(struct ib_qp *qp, struct ib_udata *udata) to_mqp(qp)->rq.db_index); } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); - kfree(qp); + kfree(to_mqp(qp)->sqp); + kfree(to_mqp(qp)); return 0; } @@ -789,7 +795,7 @@ out: return ret; } -static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +static int mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { if (udata) { struct mthca_ucontext *context = @@ -808,6 +814,7 @@ static void mthca_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) to_mcq(cq)->set_ci_db_index); } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); + return 0; } static inline u32 convert_access(int acc) @@ -846,7 +853,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt, int acc, struct ib_udata *udata) { struct mthca_dev *dev = to_mdev(pd->device); - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct mthca_ucontext *context = rdma_udata_to_drv_context( udata, struct mthca_ucontext, ibucontext); struct mthca_mr *mr; @@ -877,7 +884,7 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err; } - n = ib_umem_num_pages(mr->umem); + n = ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE); mr->mtt = mthca_alloc_mtt(dev, n); if (IS_ERR(mr->mtt)) { @@ -895,8 +902,8 @@ static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, write_mtt_size = min(mthca_write_mtt_size(dev), (int) (PAGE_SIZE / sizeof *pages)); - for_each_sg_dma_page(mr->umem->sg_head.sgl, &sg_iter, mr->umem->nmap, 0) { - pages[i++] = sg_page_iter_dma_address(&sg_iter); + rdma_umem_for_each_dma_block(mr->umem, &biter, PAGE_SIZE) { + pages[i++] = rdma_block_iter_dma_address(&biter); /* * Be friendly to write_mtt and pass it chunks @@ -1199,7 +1206,7 @@ int mthca_register_device(struct mthca_dev *dev) mutex_init(&dev->cap_mask_mutex); rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group); - ret = ib_register_device(&dev->ib_dev, "mthca%d"); + ret = ib_register_device(&dev->ib_dev, "mthca%d", &dev->pdev->dev); if (ret) return ret; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 84c64bff0d92..8a77483bb33c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h @@ -240,6 +240,16 @@ struct mthca_wq { __be32 *db; }; +struct mthca_sqp { + int pkey_index; + u32 qkey; + u32 send_psn; + struct ib_ud_header ud_header; + int header_buf_size; + void *header_buf; + dma_addr_t header_dma; +}; + struct mthca_qp { struct ib_qp ibqp; int refcount; @@ -265,17 +275,7 @@ struct mthca_qp { wait_queue_head_t wait; struct mutex mutex; -}; - -struct mthca_sqp { - struct mthca_qp qp; - int pkey_index; - u32 qkey; - u32 send_psn; - struct ib_ud_header ud_header; - int header_buf_size; - void *header_buf; - dma_addr_t header_dma; + struct mthca_sqp *sqp; }; static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) @@ -313,9 +313,4 @@ static inline struct mthca_qp *to_mqp(struct ib_qp *ibqp) return container_of(ibqp, struct mthca_qp, ibqp); } -static inline struct mthca_sqp *to_msqp(struct mthca_qp *qp) -{ - return container_of(qp, struct mthca_sqp, qp); -} - #endif /* MTHCA_PROVIDER_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index c6e95d0d760a..08a2a7afafd3 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -809,7 +809,7 @@ static int __mthca_modify_qp(struct ib_qp *ibqp, qp->alt_port = attr->alt_port_num; if (is_sqp(dev, qp)) - store_attrs(to_msqp(qp), attr, attr_mask); + store_attrs(qp->sqp, attr, attr_mask); /* * If we moved QP0 to RTR, bring the IB link up; if we moved @@ -1368,39 +1368,40 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp, + struct mthca_qp *qp, struct ib_udata *udata) { u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; - sqp->qp.transport = MLX; - err = mthca_set_qp_size(dev, cap, pd, &sqp->qp); + qp->transport = MLX; + err = mthca_set_qp_size(dev, cap, pd, qp); if (err) return err; - sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; - sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, - &sqp->header_dma, GFP_KERNEL); - if (!sqp->header_buf) + qp->sqp->header_buf_size = qp->sq.max * MTHCA_UD_HEADER_SIZE; + qp->sqp->header_buf = + dma_alloc_coherent(&dev->pdev->dev, qp->sqp->header_buf_size, + &qp->sqp->header_dma, GFP_KERNEL); + if (!qp->sqp->header_buf) return -ENOMEM; spin_lock_irq(&dev->qp_table.lock); if (mthca_array_get(&dev->qp_table.qp, mqpn)) err = -EBUSY; else - mthca_array_set(&dev->qp_table.qp, mqpn, sqp); + mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp); spin_unlock_irq(&dev->qp_table.lock); if (err) goto err_out; - sqp->qp.port = port; - sqp->qp.qpn = mqpn; - sqp->qp.transport = MLX; + qp->port = port; + qp->qpn = mqpn; + qp->transport = MLX; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, &sqp->qp, udata); + send_policy, qp, udata); if (err) goto err_out_free; @@ -1421,10 +1422,9 @@ int mthca_alloc_sqp(struct mthca_dev *dev, mthca_unlock_cqs(send_cq, recv_cq); - err_out: - dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, - sqp->header_buf, sqp->header_dma); - +err_out: + dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size, + qp->sqp->header_buf, qp->sqp->header_dma); return err; } @@ -1487,20 +1487,19 @@ void mthca_free_qp(struct mthca_dev *dev, if (is_sqp(dev, qp)) { atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); - dma_free_coherent(&dev->pdev->dev, - to_msqp(qp)->header_buf_size, - to_msqp(qp)->header_buf, - to_msqp(qp)->header_dma); + dma_free_coherent(&dev->pdev->dev, qp->sqp->header_buf_size, + qp->sqp->header_buf, qp->sqp->header_dma); } else mthca_free(&dev->qp_table.alloc, qp->qpn); } /* Create UD header for an MLX send and build a data segment for it */ -static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, - int ind, const struct ib_ud_wr *wr, +static int build_mlx_header(struct mthca_dev *dev, struct mthca_qp *qp, int ind, + const struct ib_ud_wr *wr, struct mthca_mlx_seg *mlx, struct mthca_data_seg *data) { + struct mthca_sqp *sqp = qp->sqp; int header_size; int err; u16 pkey; @@ -1513,7 +1512,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, if (err) return err; mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1); - mlx->flags |= cpu_to_be32((!sqp->qp.ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | + mlx->flags |= cpu_to_be32((!qp->ibqp.qp_num ? MTHCA_MLX_VL15 : 0) | (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE ? MTHCA_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); @@ -1534,29 +1533,29 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp, return -EINVAL; } - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + sqp->ud_header.lrh.virtual_lane = !qp->ibqp.qp_num ? 15 : 0; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED); - if (!sqp->qp.ibqp.qp_num) - ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, - sqp->pkey_index, &pkey); + if (!qp->ibqp.qp_num) + ib_get_cached_pkey(&dev->ib_dev, qp->port, sqp->pkey_index, + &pkey); else - ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port, - wr->pkey_index, &pkey); + ib_get_cached_pkey(&dev->ib_dev, qp->port, wr->pkey_index, + &pkey); sqp->ud_header.bth.pkey = cpu_to_be16(pkey); sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ? sqp->qkey : wr->remote_qkey); - sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num); + sqp->ud_header.deth.source_qpn = cpu_to_be32(qp->ibqp.qp_num); header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf + ind * MTHCA_UD_HEADER_SIZE); data->byte_count = cpu_to_be32(header_size); - data->lkey = cpu_to_be32(to_mpd(sqp->qp.ibqp.pd)->ntmr.ibmr.lkey); + data->lkey = cpu_to_be32(to_mpd(qp->ibqp.pd)->ntmr.ibmr.lkey); data->addr = cpu_to_be64(sqp->header_dma + ind * MTHCA_UD_HEADER_SIZE); @@ -1735,9 +1734,9 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), - wqe - sizeof (struct mthca_next_seg), - wqe); + err = build_mlx_header( + dev, qp, ind, ud_wr(wr), + wqe - sizeof(struct mthca_next_seg), wqe); if (err) { *bad_wr = wr; goto out; @@ -2065,9 +2064,9 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, break; case MLX: - err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr), - wqe - sizeof (struct mthca_next_seg), - wqe); + err = build_mlx_header( + dev, qp, ind, ud_wr(wr), + wqe - sizeof(struct mthca_next_seg), wqe); if (err) { *bad_wr = wr; goto out; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index fcfe0e82197a..5eb61c110090 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -185,7 +185,6 @@ struct ocrdma_hw_mr { u32 num_pbes; u32 pbl_size; u32 pbe_size; - u64 fbo; u64 va; }; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 6eea02b18968..699a8b719ed6 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -215,12 +215,13 @@ av_err: return status; } -void ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) +int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); ocrdma_free_av(dev, ah); + return 0; } int ocrdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 8b73b3489f3a..35cf2e2ff391 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -53,7 +53,7 @@ enum { int ocrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); +int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *dev, int process_mad_flags, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index e07bf0b2209a..c51c3f40700e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1962,6 +1962,7 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, int i; struct ocrdma_reg_nsmr *cmd; struct ocrdma_reg_nsmr_rsp *rsp; + u64 fbo = hwmr->va & (hwmr->pbe_size - 1); cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd)); if (!cmd) @@ -1987,8 +1988,8 @@ static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr, OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT; cmd->totlen_low = hwmr->len; cmd->totlen_high = upper_32_bits(hwmr->len); - cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff); - cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo); + cmd->fbo_low = lower_32_bits(fbo); + cmd->fbo_high = upper_32_bits(fbo); cmd->va_loaddr = (u32) hwmr->va; cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index d8c47d24d6d6..9b96661a7143 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -255,7 +255,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) if (ret) return ret; - return ib_register_device(&dev->ibdev, "ocrdma%d"); + dma_set_max_seg_size(&dev->nic_info.pdev->dev, UINT_MAX); + return ib_register_device(&dev->ibdev, "ocrdma%d", + &dev->nic_info.pdev->dev); } static int ocrdma_alloc_resources(struct ocrdma_dev *dev) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index c1751c9a0f62..7350fe16f164 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -112,7 +112,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, } static inline void get_link_speed_and_width(struct ocrdma_dev *dev, - u8 *ib_speed, u8 *ib_width) + u16 *ib_speed, u8 *ib_width) { int status; u8 speed; @@ -664,7 +664,7 @@ exit: return status; } -void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct ocrdma_pd *pd = get_ocrdma_pd(ibpd); struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); @@ -682,10 +682,11 @@ void ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) if (is_ucontext_pd(uctx, pd)) { ocrdma_release_ucontext_pd(uctx); - return; + return 0; } } _ocrdma_dealloc_pd(dev, pd); + return 0; } static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr, @@ -810,14 +811,12 @@ static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr) return status; } -static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, - u32 num_pbes) +static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr) { struct ocrdma_pbe *pbe; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table; - struct ib_umem *umem = mr->umem; - int pbe_cnt, total_num_pbes = 0; + int pbe_cnt; u64 pg_addr; if (!mr->hwmr.num_pbes) @@ -826,19 +825,14 @@ static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr, pbe = (struct ocrdma_pbe *)pbl_tbl->va; pbe_cnt = 0; - for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) { /* store the page address in pbe */ - pg_addr = sg_page_iter_dma_address(&sg_iter); + pg_addr = rdma_block_iter_dma_address(&biter); pbe->pa_lo = cpu_to_le32(pg_addr); pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr)); pbe_cnt += 1; - total_num_pbes += 1; pbe++; - /* if done building pbes, issue the mbx cmd. */ - if (total_num_pbes == num_pbes) - return; - /* if the given pbl is full storing the pbes, * move to next pbl. */ @@ -857,7 +851,6 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device); struct ocrdma_mr *mr; struct ocrdma_pd *pd; - u32 num_pbes; pd = get_ocrdma_pd(ibpd); @@ -872,13 +865,12 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, status = -EFAULT; goto umem_err; } - num_pbes = ib_umem_page_count(mr->umem); - status = ocrdma_get_pbl_info(dev, mr, num_pbes); + status = ocrdma_get_pbl_info( + dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE)); if (status) goto umem_err; mr->hwmr.pbe_size = PAGE_SIZE; - mr->hwmr.fbo = ib_umem_offset(mr->umem); mr->hwmr.va = usr_addr; mr->hwmr.len = len; mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; @@ -889,7 +881,7 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, status = ocrdma_build_pbl_tbl(dev, &mr->hwmr); if (status) goto umem_err; - build_user_pbes(dev, mr, num_pbes); + build_user_pbes(dev, mr); status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc); if (status) goto mbx_err; @@ -1056,7 +1048,7 @@ static void ocrdma_flush_cq(struct ocrdma_cq *cq) spin_unlock_irqrestore(&cq->cq_lock, flags); } -void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct ocrdma_cq *cq = get_ocrdma_cq(ibcq); struct ocrdma_eq *eq = NULL; @@ -1081,6 +1073,7 @@ void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) ocrdma_get_db_addr(dev, pdid), dev->nic_info.db_page_size); } + return 0; } static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) @@ -1857,7 +1850,7 @@ int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) return status; } -void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct ocrdma_srq *srq; struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device); @@ -1872,6 +1865,7 @@ void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) kfree(srq->idx_bit_fields); kfree(srq->rqe_wr_id_tbl); + return 0; } /* unprivileged verbs and their support functions. */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index df8e3b923a44..425d554e7f3f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -67,12 +67,12 @@ void ocrdma_dealloc_ucontext(struct ib_ucontext *uctx); int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma); int ocrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int ocrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -void ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); struct ib_qp *ocrdma_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, @@ -92,7 +92,7 @@ int ocrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *attr, int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *, enum ib_srq_attr_mask, struct ib_udata *); int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *); -void ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int ocrdma_post_srq_recv(struct ib_srq *, const struct ib_recv_wr *, const struct ib_recv_wr **bad_recv_wr); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index d85f992bac29..967641662b24 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -177,6 +177,8 @@ static int qedr_iw_register_device(struct qedr_dev *dev) } static const struct ib_device_ops qedr_roce_dev_ops = { + .alloc_xrcd = qedr_alloc_xrcd, + .dealloc_xrcd = qedr_dealloc_xrcd, .get_port_immutable = qedr_roce_port_immutable, .query_pkey = qedr_query_pkey, }; @@ -186,6 +188,10 @@ static void qedr_roce_register_device(struct qedr_dev *dev) dev->ibdev.node_type = RDMA_NODE_IB_CA; ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); + + dev->ibdev.uverbs_cmd_mask |= QEDR_UVERBS(OPEN_XRCD) | + QEDR_UVERBS(CLOSE_XRCD) | + QEDR_UVERBS(CREATE_XSRQ); } static const struct ib_device_ops qedr_dev_ops = { @@ -232,6 +238,7 @@ static const struct ib_device_ops qedr_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_cq, qedr_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, qedr_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, qedr_srq, ibsrq), + INIT_RDMA_OBJ_SIZE(ib_xrcd, qedr_xrcd, ibxrcd), INIT_RDMA_OBJ_SIZE(ib_ucontext, qedr_ucontext, ibucontext), }; @@ -286,7 +293,8 @@ static int qedr_register_device(struct qedr_dev *dev) if (rc) return rc; - return ib_register_device(&dev->ibdev, "qedr%d"); + dma_set_max_seg_size(&dev->pdev->dev, UINT_MAX); + return ib_register_device(&dev->ibdev, "qedr%d", &dev->pdev->dev); } /* This function allocates fast-path status block memory */ @@ -602,7 +610,7 @@ static int qedr_set_device_attr(struct qedr_dev *dev) qed_attr = dev->ops->rdma_query_device(dev->rdma_ctx); /* Part 2 - check capabilities */ - page_size = ~dev->attr.page_size_caps + 1; + page_size = ~qed_attr->page_size_caps + 1; if (page_size > PAGE_SIZE) { DP_ERR(dev, "Kernel PAGE_SIZE is %ld which is smaller than minimum page size (%d) required by qedr\n", @@ -705,6 +713,18 @@ static void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) event.event = IB_EVENT_SRQ_ERR; event_type = EVENT_TYPE_SRQ; break; + case ROCE_ASYNC_EVENT_XRC_DOMAIN_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_INVALID_XRCETH_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_XRC_SRQ_CATASTROPHIC_ERR: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; default: DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code, roce_handle64); @@ -1026,6 +1046,13 @@ static void qedr_notify(struct qedr_dev *dev, enum qede_rdma_event event) case QEDE_CHANGE_ADDR: qedr_mac_address_change(dev); break; + case QEDE_CHANGE_MTU: + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + if (dev->ndev->mtu != dev->iwarp_max_mtu) + DP_NOTICE(dev, + "Mtu was changed from %d to %d. This will not take affect for iWARP until qedr is reloaded\n", + dev->iwarp_max_mtu, dev->ndev->mtu); + break; default: pr_err("Event not supported\n"); } diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 460292179b32..9dde70373a55 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -310,6 +310,11 @@ struct qedr_pd { struct qedr_ucontext *uctx; }; +struct qedr_xrcd { + struct ib_xrcd ibxrcd; + u16 xrcd_id; +}; + struct qedr_qp_hwq_info { /* WQE Elements */ struct qed_chain pbl; @@ -361,6 +366,7 @@ struct qedr_srq { struct ib_umem *prod_umem; u16 srq_id; u32 srq_limit; + bool is_xrc; /* lock to protect srq recv post */ spinlock_t lock; }; @@ -573,6 +579,11 @@ static inline struct qedr_pd *get_qedr_pd(struct ib_pd *ibpd) return container_of(ibpd, struct qedr_pd, ibpd); } +static inline struct qedr_xrcd *get_qedr_xrcd(struct ib_xrcd *ibxrcd) +{ + return container_of(ibxrcd, struct qedr_xrcd, ibxrcd); +} + static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq) { return container_of(ibcq, struct qedr_cq, ibcq); @@ -598,6 +609,28 @@ static inline struct qedr_srq *get_qedr_srq(struct ib_srq *ibsrq) return container_of(ibsrq, struct qedr_srq, ibsrq); } +static inline bool qedr_qp_has_srq(struct qedr_qp *qp) +{ + return qp->srq; +} + +static inline bool qedr_qp_has_sq(struct qedr_qp *qp) +{ + if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT) + return 0; + + return 1; +} + +static inline bool qedr_qp_has_rq(struct qedr_qp *qp) +{ + if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI || + qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp)) + return 0; + + return 1; +} + static inline struct qedr_user_mmap_entry * get_qedr_mmap_entry(struct rdma_user_mmap_entry *rdma_entry) { diff --git a/drivers/infiniband/hw/qedr/qedr_iw_cm.c b/drivers/infiniband/hw/qedr/qedr_iw_cm.c index 97fc7dd353b0..c7169d2c69e5 100644 --- a/drivers/infiniband/hw/qedr/qedr_iw_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_iw_cm.c @@ -736,7 +736,7 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct qedr_dev *dev = ep->dev; struct qedr_qp *qp; struct qed_iwarp_accept_in params; - int rc = 0; + int rc; DP_DEBUG(dev, QEDR_MSG_IWARP, "Accept on qpid=%d\n", conn_param->qpn); @@ -759,8 +759,10 @@ int qedr_iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) params.ord = conn_param->ord; if (test_and_set_bit(QEDR_IWARP_CM_WAIT_FOR_CONNECT, - &qp->iwarp_cm_flags)) + &qp->iwarp_cm_flags)) { + rc = -EINVAL; goto err; /* QP already destroyed */ + } rc = dev->ops->iwarp_accept(dev->rdma_ctx, ¶ms); if (rc) { diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index b49bef94637e..019642ff24a7 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -136,6 +136,8 @@ int qedr_query_device(struct ib_device *ibdev, IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; + if (!rdma_protocol_iwarp(&dev->ibdev, 1)) + attr->device_cap_flags |= IB_DEVICE_XRC; attr->max_send_sge = qattr->max_sge; attr->max_recv_sge = qattr->max_sge; attr->max_sge_rd = qattr->max_sge; @@ -157,13 +159,13 @@ int qedr_query_device(struct ib_device *ibdev, attr->local_ca_ack_delay = qattr->dev_ack_delay; attr->max_fast_reg_page_list_len = qattr->max_mr / 8; - attr->max_pkeys = QEDR_ROCE_PKEY_MAX; + attr->max_pkeys = qattr->max_pkey; attr->max_ah = qattr->max_ah; return 0; } -static inline void get_link_speed_and_width(int speed, u8 *ib_speed, +static inline void get_link_speed_and_width(int speed, u16 *ib_speed, u8 *ib_width) { switch (speed) { @@ -231,15 +233,16 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } attr->max_mtu = IB_MTU_4096; - attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); attr->lid = 0; attr->lmc = 0; attr->sm_lid = 0; attr->sm_sl = 0; attr->ip_gids = true; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { + attr->active_mtu = iboe_get_mtu(dev->iwarp_max_mtu); attr->gid_tbl_len = 1; } else { + attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); attr->gid_tbl_len = QEDR_MAX_SGID; attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; } @@ -471,15 +474,33 @@ int qedr_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -void qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int qedr_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibpd->device); struct qedr_pd *pd = get_qedr_pd(ibpd); DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); + return 0; } + +int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibxrcd->device); + struct qedr_xrcd *xrcd = get_qedr_xrcd(ibxrcd); + + return dev->ops->rdma_alloc_xrcd(dev->rdma_ctx, &xrcd->xrcd_id); +} + +int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibxrcd->device); + u16 xrcd_id = get_qedr_xrcd(ibxrcd)->xrcd_id; + + dev->ops->rdma_dealloc_xrcd(dev->rdma_ctx, xrcd_id); + return 0; +} static void qedr_free_pbl(struct qedr_dev *dev, struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl) { @@ -600,11 +621,9 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, struct qedr_pbl_info *pbl_info, u32 pg_shift) { int pbe_cnt, total_num_pbes = 0; - u32 fw_pg_cnt, fw_pg_per_umem_pg; struct qedr_pbl *pbl_tbl; - struct sg_dma_page_iter sg_iter; + struct ib_block_iter biter; struct regpair *pbe; - u64 pg_addr; if (!pbl_info->num_pbes) return; @@ -625,32 +644,25 @@ static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, pbe_cnt = 0; - fw_pg_per_umem_pg = BIT(PAGE_SHIFT - pg_shift); + rdma_umem_for_each_dma_block (umem, &biter, BIT(pg_shift)) { + u64 pg_addr = rdma_block_iter_dma_address(&biter); - for_each_sg_dma_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - pg_addr = sg_page_iter_dma_address(&sg_iter); - for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) { - pbe->lo = cpu_to_le32(pg_addr); - pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); + pbe->lo = cpu_to_le32(pg_addr); + pbe->hi = cpu_to_le32(upper_32_bits(pg_addr)); - pg_addr += BIT(pg_shift); - pbe_cnt++; - total_num_pbes++; - pbe++; + pbe_cnt++; + total_num_pbes++; + pbe++; - if (total_num_pbes == pbl_info->num_pbes) - return; + if (total_num_pbes == pbl_info->num_pbes) + return; - /* If the given pbl is full storing the pbes, - * move to next pbl. - */ - if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) { - pbl_tbl++; - pbe = (struct regpair *)pbl_tbl->va; - pbe_cnt = 0; - } - - fw_pg_cnt++; + /* If the given pbl is full storing the pbes, move to next pbl. + */ + if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + pbe_cnt = 0; } } } @@ -792,9 +804,7 @@ static inline int qedr_init_user_queue(struct ib_udata *udata, return PTR_ERR(q->umem); } - fw_pages = ib_umem_page_count(q->umem) << - (PAGE_SHIFT - FW_PAGE_SHIFT); - + fw_pages = ib_umem_num_dma_blocks(q->umem, 1 << FW_PAGE_SHIFT); rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0); if (rc) goto err0; @@ -999,7 +1009,7 @@ int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, /* Generate doorbell address. */ cq->db.data.icid = cq->icid; cq->db_addr = dev->db_addr + db_offset; - cq->db.data.params = DB_AGG_CMD_SET << + cq->db.data.params = DB_AGG_CMD_MAX << RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; /* point to the very last element, passing it we will toggle */ @@ -1051,7 +1061,7 @@ int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) #define QEDR_DESTROY_CQ_MAX_ITERATIONS (10) #define QEDR_DESTROY_CQ_ITER_DURATION (10) -void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct qedr_dev *dev = get_qedr_dev(ibcq->device); struct qed_rdma_destroy_cq_out_params oparams; @@ -1066,7 +1076,7 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) /* GSIs CQs are handled by driver, so they don't exist in the FW */ if (cq->cq_type == QEDR_CQ_TYPE_GSI) { qedr_db_recovery_del(dev, cq->db_addr, &cq->db.data); - return; + return 0; } iparams.icid = cq->icid; @@ -1114,6 +1124,7 @@ void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) * Since the destroy CQ ramrod has also been received on the EQ we can * be certain that there's no event handler in process. */ + return 0; } static inline int get_gid_info_from_table(struct ib_qp *ibqp, @@ -1146,7 +1157,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, SET_FIELD(qp_params->modify_flags, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); break; - case RDMA_NETWORK_IB: + case RDMA_NETWORK_ROCE_V1: memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0], sizeof(qp_params->sgid)); memcpy(&qp_params->dgid.bytes[0], @@ -1166,6 +1177,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); qp_params->roce_mode = ROCE_V2_IPV4; break; + default: + return -EINVAL; } for (i = 0; i < 4; i++) { @@ -1186,7 +1199,10 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, struct qedr_device_attr *qattr = &dev->attr; /* QP0... attrs->qp_type == IB_QPT_GSI */ - if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) { + if (attrs->qp_type != IB_QPT_RC && + attrs->qp_type != IB_QPT_GSI && + attrs->qp_type != IB_QPT_XRC_INI && + attrs->qp_type != IB_QPT_XRC_TGT) { DP_DEBUG(dev, QEDR_MSG_QP, "create qp: unsupported qp type=0x%x requested\n", attrs->qp_type); @@ -1221,12 +1237,20 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, return -EINVAL; } - /* Unprivileged user space cannot create special QP */ - if (udata && attrs->qp_type == IB_QPT_GSI) { - DP_ERR(dev, - "create qp: userspace can't create special QPs of type=0x%x\n", - attrs->qp_type); - return -EINVAL; + /* verify consumer QPs are not trying to use GSI QP's CQ. + * TGT QP isn't associated with RQ/SQ + */ + if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) && + (attrs->qp_type != IB_QPT_XRC_TGT)) { + struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq); + struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq); + + if ((send_cq->cq_type == QEDR_CQ_TYPE_GSI) || + (recv_cq->cq_type == QEDR_CQ_TYPE_GSI)) { + DP_ERR(dev, + "create qp: consumer QP cannot use GSI CQs.\n"); + return -EINVAL; + } } return 0; @@ -1248,8 +1272,8 @@ static int qedr_copy_srq_uresp(struct qedr_dev *dev, } static void qedr_copy_rq_uresp(struct qedr_dev *dev, - struct qedr_create_qp_uresp *uresp, - struct qedr_qp *qp) + struct qedr_create_qp_uresp *uresp, + struct qedr_qp *qp) { /* iWARP requires two doorbells per RQ. */ if (rdma_protocol_iwarp(&dev->ibdev, 1)) { @@ -1291,8 +1315,12 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev, int rc; memset(uresp, 0, sizeof(*uresp)); - qedr_copy_sq_uresp(dev, uresp, qp); - qedr_copy_rq_uresp(dev, uresp, qp); + + if (qedr_qp_has_sq(qp)) + qedr_copy_sq_uresp(dev, uresp, qp); + + if (qedr_qp_has_rq(qp)) + qedr_copy_rq_uresp(dev, uresp, qp); uresp->atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; uresp->qp_id = qp->qp_id; @@ -1316,18 +1344,25 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, kref_init(&qp->refcnt); init_completion(&qp->iwarp_cm_comp); } + qp->pd = pd; qp->qp_type = attrs->qp_type; qp->max_inline_data = attrs->cap.max_inline_data; - qp->sq.max_sges = attrs->cap.max_send_sge; qp->state = QED_ROCE_QP_STATE_RESET; qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; - qp->sq_cq = get_qedr_cq(attrs->send_cq); qp->dev = dev; + if (qedr_qp_has_sq(qp)) { + qp->sq.max_sges = attrs->cap.max_send_sge; + qp->sq_cq = get_qedr_cq(attrs->send_cq); + DP_DEBUG(dev, QEDR_MSG_QP, + "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n", + qp->sq.max_sges, qp->sq_cq->icid); + } - if (attrs->srq) { + if (attrs->srq) qp->srq = get_qedr_srq(attrs->srq); - } else { + + if (qedr_qp_has_rq(qp)) { qp->rq_cq = get_qedr_cq(attrs->recv_cq); qp->rq.max_sges = attrs->cap.max_recv_sge; DP_DEBUG(dev, QEDR_MSG_QP, @@ -1346,30 +1381,26 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, static int qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp) { - int rc; + int rc = 0; - qp->sq.db = dev->db_addr + - DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); - qp->sq.db_data.data.icid = qp->icid + 1; - rc = qedr_db_recovery_add(dev, qp->sq.db, - &qp->sq.db_data, - DB_REC_WIDTH_32B, - DB_REC_KERNEL); - if (rc) - return rc; + if (qedr_qp_has_sq(qp)) { + qp->sq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + qp->sq.db_data.data.icid = qp->icid + 1; + rc = qedr_db_recovery_add(dev, qp->sq.db, &qp->sq.db_data, + DB_REC_WIDTH_32B, DB_REC_KERNEL); + if (rc) + return rc; + } - if (!qp->srq) { + if (qedr_qp_has_rq(qp)) { qp->rq.db = dev->db_addr + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); qp->rq.db_data.data.icid = qp->icid; - - rc = qedr_db_recovery_add(dev, qp->rq.db, - &qp->rq.db_data, - DB_REC_WIDTH_32B, - DB_REC_KERNEL); - if (rc) - qedr_db_recovery_del(dev, qp->sq.db, - &qp->sq.db_data); + rc = qedr_db_recovery_add(dev, qp->rq.db, &qp->rq.db_data, + DB_REC_WIDTH_32B, DB_REC_KERNEL); + if (rc && qedr_qp_has_sq(qp)) + qedr_db_recovery_del(dev, qp->sq.db, &qp->sq.db_data); } return rc; @@ -1392,6 +1423,10 @@ static int qedr_check_srq_params(struct qedr_dev *dev, DP_ERR(dev, "create srq: unsupported sge=0x%x requested (max_srq_sge=0x%x)\n", attrs->attr.max_sge, qattr->max_sge); + } + + if (!udata && attrs->srq_type == IB_SRQT_XRC) { + DP_ERR(dev, "XRC SRQs are not supported in kernel-space\n"); return -EINVAL; } @@ -1516,6 +1551,7 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, return -EINVAL; srq->dev = dev; + srq->is_xrc = (init_attr->srq_type == IB_SRQT_XRC); hw_srq = &srq->hw_srq; spin_lock_init(&srq->lock); @@ -1557,6 +1593,14 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, in_params.prod_pair_addr = phy_prod_pair_addr; in_params.num_pages = page_cnt; in_params.page_size = page_size; + if (srq->is_xrc) { + struct qedr_xrcd *xrcd = get_qedr_xrcd(init_attr->ext.xrc.xrcd); + struct qedr_cq *cq = get_qedr_cq(init_attr->ext.cq); + + in_params.is_xrc = 1; + in_params.xrcd_id = xrcd->xrcd_id; + in_params.cq_cid = cq->icid; + } rc = dev->ops->rdma_create_srq(dev->rdma_ctx, &in_params, &out_params); if (rc) @@ -1591,7 +1635,7 @@ err0: return -EFAULT; } -void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct qed_rdma_destroy_srq_in_params in_params = {}; struct qedr_dev *dev = get_qedr_dev(ibsrq->device); @@ -1599,6 +1643,7 @@ void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) xa_erase_irq(&dev->srqs, srq->srq_id); in_params.srq_id = srq->srq_id; + in_params.is_xrc = srq->is_xrc; dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); if (ibsrq->uobject) @@ -1609,6 +1654,7 @@ void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) DP_DEBUG(dev, QEDR_MSG_SRQ, "destroy srq: destroyed srq with srq_id=0x%0x\n", srq->srq_id); + return 0; } int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, @@ -1649,6 +1695,20 @@ int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return 0; } +static enum qed_rdma_qp_type qedr_ib_to_qed_qp_type(enum ib_qp_type ib_qp_type) +{ + switch (ib_qp_type) { + case IB_QPT_RC: + return QED_RDMA_QP_TYPE_RC; + case IB_QPT_XRC_INI: + return QED_RDMA_QP_TYPE_XRC_INI; + case IB_QPT_XRC_TGT: + return QED_RDMA_QP_TYPE_XRC_TGT; + default: + return QED_RDMA_QP_TYPE_INVAL; + } +} + static inline void qedr_init_common_qp_in_params(struct qedr_dev *dev, struct qedr_pd *pd, @@ -1663,20 +1723,27 @@ qedr_init_common_qp_in_params(struct qedr_dev *dev, params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); params->fmr_and_reserved_lkey = fmr_and_reserved_lkey; - params->pd = pd->pd_id; - params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; - params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; + params->qp_type = qedr_ib_to_qed_qp_type(attrs->qp_type); params->stats_queue = 0; - params->srq_id = 0; - params->use_srq = false; - if (!qp->srq) { + if (pd) { + params->pd = pd->pd_id; + params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; + } + + if (qedr_qp_has_sq(qp)) + params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; + + if (qedr_qp_has_rq(qp)) params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; - } else { + if (qedr_qp_has_srq(qp)) { params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; params->srq_id = qp->srq->srq_id; params->use_srq = true; + } else { + params->srq_id = 0; + params->use_srq = false; } } @@ -1690,8 +1757,10 @@ static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) "rq_len=%zd" "\n", qp, - qp->usq.buf_addr, - qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len); + qedr_qp_has_sq(qp) ? qp->usq.buf_addr : 0x0, + qedr_qp_has_sq(qp) ? qp->usq.buf_len : 0, + qedr_qp_has_rq(qp) ? qp->urq.buf_addr : 0x0, + qedr_qp_has_sq(qp) ? qp->urq.buf_len : 0); } static inline void @@ -1717,11 +1786,15 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_ucontext *ctx, struct qedr_qp *qp) { - ib_umem_release(qp->usq.umem); - qp->usq.umem = NULL; + if (qedr_qp_has_sq(qp)) { + ib_umem_release(qp->usq.umem); + qp->usq.umem = NULL; + } - ib_umem_release(qp->urq.umem); - qp->urq.umem = NULL; + if (qedr_qp_has_rq(qp)) { + ib_umem_release(qp->urq.umem); + qp->urq.umem = NULL; + } if (rdma_protocol_roce(&dev->ibdev, 1)) { qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); @@ -1756,28 +1829,38 @@ static int qedr_create_user_qp(struct qedr_dev *dev, { struct qed_rdma_create_qp_in_params in_params; struct qed_rdma_create_qp_out_params out_params; - struct qedr_pd *pd = get_qedr_pd(ibpd); - struct qedr_create_qp_uresp uresp; - struct qedr_ucontext *ctx = pd ? pd->uctx : NULL; - struct qedr_create_qp_ureq ureq; + struct qedr_create_qp_uresp uresp = {}; + struct qedr_create_qp_ureq ureq = {}; int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1); - int rc = -EINVAL; + struct qedr_ucontext *ctx = NULL; + struct qedr_pd *pd = NULL; + int rc = 0; qp->create_type = QEDR_QP_CREATE_USER; - memset(&ureq, 0, sizeof(ureq)); - rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), udata->inlen)); - if (rc) { - DP_ERR(dev, "Problem copying data from user space\n"); - return rc; + + if (ibpd) { + pd = get_qedr_pd(ibpd); + ctx = pd->uctx; } - /* SQ - read access only (0) */ - rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, - ureq.sq_len, true, 0, alloc_and_init); - if (rc) - return rc; + if (udata) { + rc = ib_copy_from_udata(&ureq, udata, min(sizeof(ureq), + udata->inlen)); + if (rc) { + DP_ERR(dev, "Problem copying data from user space\n"); + return rc; + } + } - if (!qp->srq) { + if (qedr_qp_has_sq(qp)) { + /* SQ - read access only (0) */ + rc = qedr_init_user_queue(udata, dev, &qp->usq, ureq.sq_addr, + ureq.sq_len, true, 0, alloc_and_init); + if (rc) + return rc; + } + + if (qedr_qp_has_rq(qp)) { /* RQ - read access only (0) */ rc = qedr_init_user_queue(udata, dev, &qp->urq, ureq.rq_addr, ureq.rq_len, true, 0, alloc_and_init); @@ -1789,9 +1872,21 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params); in_params.qp_handle_lo = ureq.qp_handle_lo; in_params.qp_handle_hi = ureq.qp_handle_hi; - in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; - in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; - if (!qp->srq) { + + if (qp->qp_type == IB_QPT_XRC_TGT) { + struct qedr_xrcd *xrcd = get_qedr_xrcd(attrs->xrcd); + + in_params.xrcd_id = xrcd->xrcd_id; + in_params.qp_handle_lo = qp->qp_id; + in_params.use_srq = 1; + } + + if (qedr_qp_has_sq(qp)) { + in_params.sq_num_pages = qp->usq.pbl_info.num_pbes; + in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa; + } + + if (qedr_qp_has_rq(qp)) { in_params.rq_num_pages = qp->urq.pbl_info.num_pbes; in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa; } @@ -1813,39 +1908,32 @@ static int qedr_create_user_qp(struct qedr_dev *dev, qp->qp_id = out_params.qp_id; qp->icid = out_params.icid; - rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp); - if (rc) - goto err; - - /* db offset was calculated in copy_qp_uresp, now set in the user q */ - ctx = pd->uctx; - qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; - qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; - - if (rdma_protocol_iwarp(&dev->ibdev, 1)) { - qp->urq.db_rec_db2_addr = ctx->dpi_addr + uresp.rq_db2_offset; - - /* calculate the db_rec_db2 data since it is constant so no - * need to reflect from user - */ - qp->urq.db_rec_db2_data.data.icid = cpu_to_le16(qp->icid); - qp->urq.db_rec_db2_data.data.value = - cpu_to_le16(DQ_TCM_IWARP_POST_RQ_CF_CMD); + if (udata) { + rc = qedr_copy_qp_uresp(dev, qp, udata, &uresp); + if (rc) + goto err; } - rc = qedr_db_recovery_add(dev, qp->usq.db_addr, - &qp->usq.db_rec_data->db_data, - DB_REC_WIDTH_32B, - DB_REC_USER); - if (rc) - goto err; + /* db offset was calculated in copy_qp_uresp, now set in the user q */ + if (qedr_qp_has_sq(qp)) { + qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; + rc = qedr_db_recovery_add(dev, qp->usq.db_addr, + &qp->usq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + } - rc = qedr_db_recovery_add(dev, qp->urq.db_addr, - &qp->urq.db_rec_data->db_data, - DB_REC_WIDTH_32B, - DB_REC_USER); - if (rc) - goto err; + if (qedr_qp_has_rq(qp)) { + qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + rc = qedr_db_recovery_add(dev, qp->urq.db_addr, + &qp->urq.db_rec_data->db_data, + DB_REC_WIDTH_32B, + DB_REC_USER); + if (rc) + goto err; + } if (rdma_protocol_iwarp(&dev->ibdev, 1)) { rc = qedr_db_recovery_add(dev, qp->urq.db_rec_db2_addr, @@ -1856,7 +1944,6 @@ static int qedr_create_user_qp(struct qedr_dev *dev, goto err; } qedr_qp_user_print(dev, qp); - return rc; err: rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); @@ -2112,16 +2199,47 @@ static int qedr_create_kernel_qp(struct qedr_dev *dev, return rc; } +static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, + struct ib_udata *udata) +{ + struct qedr_ucontext *ctx = + rdma_udata_to_drv_context(udata, struct qedr_ucontext, + ibucontext); + int rc; + + if (qp->qp_type != IB_QPT_GSI) { + rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); + if (rc) + return rc; + } + + if (qp->create_type == QEDR_QP_CREATE_USER) + qedr_cleanup_user(dev, ctx, qp); + else + qedr_cleanup_kernel(dev, qp); + + return 0; +} + struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { - struct qedr_dev *dev = get_qedr_dev(ibpd->device); - struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_xrcd *xrcd = NULL; + struct qedr_pd *pd = NULL; + struct qedr_dev *dev; struct qedr_qp *qp; struct ib_qp *ibqp; int rc = 0; + if (attrs->qp_type == IB_QPT_XRC_TGT) { + xrcd = get_qedr_xrcd(attrs->xrcd); + dev = get_qedr_dev(xrcd->ibxrcd.device); + } else { + pd = get_qedr_pd(ibpd); + dev = get_qedr_dev(ibpd->device); + } + DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", udata ? "user library" : "kernel", pd); @@ -2152,25 +2270,27 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, return ibqp; } - if (udata) + if (udata || xrcd) rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs); else rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs); if (rc) - goto err; + goto out_free_qp; qp->ibqp.qp_num = qp->qp_id; if (rdma_protocol_iwarp(&dev->ibdev, 1)) { rc = xa_insert(&dev->qps, qp->qp_id, qp, GFP_KERNEL); if (rc) - goto err; + goto out_free_qp_resources; } return &qp->ibqp; -err: +out_free_qp_resources: + qedr_free_qp_resources(dev, qp, udata); +out_free_qp: kfree(qp); return ERR_PTR(-EFAULT); @@ -2636,7 +2756,7 @@ int qedr_query_qp(struct ib_qp *ibqp, qp_attr->cap.max_recv_wr = qp->rq.max_wr; qp_attr->cap.max_send_sge = qp->sq.max_sges; qp_attr->cap.max_recv_sge = qp->rq.max_sges; - qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE; + qp_attr->cap.max_inline_data = dev->attr.max_inline; qp_init_attr->cap = qp_attr->cap; qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE; @@ -2671,28 +2791,6 @@ err: return rc; } -static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp, - struct ib_udata *udata) -{ - struct qedr_ucontext *ctx = - rdma_udata_to_drv_context(udata, struct qedr_ucontext, - ibucontext); - int rc; - - if (qp->qp_type != IB_QPT_GSI) { - rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); - if (rc) - return rc; - } - - if (qp->create_type == QEDR_QP_CREATE_USER) - qedr_cleanup_user(dev, ctx, qp); - else - qedr_cleanup_kernel(dev, qp); - - return 0; -} - int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct qedr_qp *qp = get_qedr_qp(ibqp); @@ -2752,6 +2850,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (rdma_protocol_iwarp(&dev->ibdev, 1)) qedr_iw_qp_rem_ref(&qp->ibqp); + else + kfree(qp); return 0; } @@ -2766,11 +2866,12 @@ int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, return 0; } -void qedr_destroy_ah(struct ib_ah *ibah, u32 flags) +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags) { struct qedr_ah *ah = get_qedr_ah(ibah); rdma_destroy_ah_attr(&ah->attr); + return 0; } static void free_mr_info(struct qedr_dev *dev, struct mr_info *info) @@ -2861,7 +2962,8 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, goto err0; } - rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1); + rc = init_mr_info(dev, &mr->info, + ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE), 1); if (rc) goto err1; @@ -2888,10 +2990,8 @@ struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); mr->hw_mr.page_size_log = PAGE_SHIFT; - mr->hw_mr.fbo = ib_umem_offset(mr->umem); mr->hw_mr.length = len; mr->hw_mr.vaddr = usr_addr; - mr->hw_mr.zbva = false; mr->hw_mr.phy_mr = false; mr->hw_mr.dma_mr = false; @@ -2984,10 +3084,8 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, mr->hw_mr.pbl_ptr = 0; mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); - mr->hw_mr.fbo = 0; mr->hw_mr.length = 0; mr->hw_mr.vaddr = 0; - mr->hw_mr.zbva = false; mr->hw_mr.phy_mr = true; mr->hw_mr.dma_mr = false; @@ -3765,10 +3863,10 @@ int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, * in first 4 bytes and need to update WQE producer in * next 4 bytes. */ - srq->hw_srq.virt_prod_pair_addr->sge_prod = hw_srq->sge_prod; + srq->hw_srq.virt_prod_pair_addr->sge_prod = cpu_to_le32(hw_srq->sge_prod); /* Make sure sge producer is updated first */ dma_wmb(); - srq->hw_srq.virt_prod_pair_addr->wqe_prod = hw_srq->wqe_prod; + srq->hw_srq.virt_prod_pair_addr->wqe_prod = cpu_to_le32(hw_srq->wqe_prod); wr = wr->next; } diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 39dd6286ba39..2672c32bc2f7 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -47,12 +47,13 @@ void qedr_dealloc_ucontext(struct ib_ucontext *uctx); int qedr_mmap(struct ib_ucontext *ucontext, struct vm_area_struct *vma); void qedr_mmap_free(struct rdma_user_mmap_entry *rdma_entry); int qedr_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); - +int qedr_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int qedr_alloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); +int qedr_dealloc_xrcd(struct ib_xrcd *ibxrcd, struct ib_udata *udata); int qedr_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); -void qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int qedr_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, struct ib_udata *); @@ -67,12 +68,12 @@ int qedr_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *attr, int qedr_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int qedr_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -void qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int qedr_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void qedr_destroy_ah(struct ib_ah *ibah, u32 flags); +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags); int qedr_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 432d6d0fd7f4..ee211423058a 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -619,11 +619,11 @@ struct qib_pportdata { /* LID mask control */ u8 lmc; u8 link_width_supported; - u8 link_speed_supported; + u16 link_speed_supported; u8 link_width_enabled; - u8 link_speed_enabled; + u16 link_speed_enabled; u8 link_width_active; - u8 link_speed_active; + u16 link_speed_active; u8 vls_supported; u8 vls_operational; /* Rx Polarity inversion (compensate for ~tx on partner) */ diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index a10eab89aee4..189a0ce6056a 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1733,9 +1733,9 @@ done: return; } -static void qib_error_tasklet(unsigned long data) +static void qib_error_tasklet(struct tasklet_struct *t) { - struct qib_devdata *dd = (struct qib_devdata *)data; + struct qib_devdata *dd = from_tasklet(dd, t, error_tasklet); handle_7322_errors(dd); qib_write_kreg(dd, kr_errmask, dd->cspec->errormask); @@ -3537,8 +3537,7 @@ try_intx: for (i = 0; i < ARRAY_SIZE(redirect); i++) qib_write_kreg(dd, kr_intredirect + i, redirect[i]); dd->cspec->main_int_mask = mask; - tasklet_init(&dd->error_tasklet, qib_error_tasklet, - (unsigned long)dd); + tasklet_setup(&dd->error_tasklet, qib_error_tasklet); } /** diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index e7789e724f56..f83e331977f8 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2293,76 +2293,50 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, struct ib_mad *out_mad) { struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad; - int ret; - *out_mad = *in_mad; if (ccp->class_version != 2) { ccp->status |= IB_SMP_UNSUP_VERSION; - ret = reply((struct ib_smp *)ccp); - goto bail; + return reply((struct ib_smp *)ccp); } switch (ccp->method) { case IB_MGMT_METHOD_GET: switch (ccp->attr_id) { case IB_CC_ATTR_CLASSPORTINFO: - ret = cc_get_classportinfo(ccp, ibdev); - goto bail; - + return cc_get_classportinfo(ccp, ibdev); case IB_CC_ATTR_CONGESTION_INFO: - ret = cc_get_congestion_info(ccp, ibdev, port); - goto bail; - + return cc_get_congestion_info(ccp, ibdev, port); case IB_CC_ATTR_CA_CONGESTION_SETTING: - ret = cc_get_congestion_setting(ccp, ibdev, port); - goto bail; - + return cc_get_congestion_setting(ccp, ibdev, port); case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: - ret = cc_get_congestion_control_table(ccp, ibdev, port); - goto bail; - - fallthrough; + return cc_get_congestion_control_table(ccp, ibdev, port); default: ccp->status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply((struct ib_smp *) ccp); - goto bail; + return reply((struct ib_smp *) ccp); } - case IB_MGMT_METHOD_SET: switch (ccp->attr_id) { case IB_CC_ATTR_CA_CONGESTION_SETTING: - ret = cc_set_congestion_setting(ccp, ibdev, port); - goto bail; - + return cc_set_congestion_setting(ccp, ibdev, port); case IB_CC_ATTR_CONGESTION_CONTROL_TABLE: - ret = cc_set_congestion_control_table(ccp, ibdev, port); - goto bail; - - fallthrough; + return cc_set_congestion_control_table(ccp, ibdev, port); default: ccp->status |= IB_SMP_UNSUP_METH_ATTR; - ret = reply((struct ib_smp *) ccp); - goto bail; + return reply((struct ib_smp *) ccp); } - case IB_MGMT_METHOD_GET_RESP: /* * The ib_mad module will call us to process responses * before checking for other consumers. * Just tell the caller to process it normally. */ - ret = IB_MAD_RESULT_SUCCESS; - goto bail; - - case IB_MGMT_METHOD_TRAP: - default: - ccp->status |= IB_SMP_UNSUP_METHOD; - ret = reply((struct ib_smp *) ccp); + return IB_MAD_RESULT_SUCCESS; } -bail: - return ret; + /* method is unsupported */ + ccp->status |= IB_SMP_UNSUP_METHOD; + return reply((struct ib_smp *) ccp); } /** diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 8f8d61736656..5e86cbf7d70e 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -62,7 +62,7 @@ static void sdma_get(struct qib_sdma_state *); static void sdma_put(struct qib_sdma_state *); static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states); static void sdma_start_sw_clean_up(struct qib_pportdata *); -static void sdma_sw_clean_up_task(unsigned long); +static void sdma_sw_clean_up_task(struct tasklet_struct *); static void unmap_desc(struct qib_pportdata *, unsigned); static void sdma_get(struct qib_sdma_state *ss) @@ -119,9 +119,10 @@ static void clear_sdma_activelist(struct qib_pportdata *ppd) } } -static void sdma_sw_clean_up_task(unsigned long opaque) +static void sdma_sw_clean_up_task(struct tasklet_struct *t) { - struct qib_pportdata *ppd = (struct qib_pportdata *) opaque; + struct qib_pportdata *ppd = from_tasklet(ppd, t, + sdma_sw_clean_up_task); unsigned long flags; spin_lock_irqsave(&ppd->sdma_lock, flags); @@ -436,8 +437,7 @@ int qib_setup_sdma(struct qib_pportdata *ppd) INIT_LIST_HEAD(&ppd->sdma_activelist); - tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task, - (unsigned long)ppd); + tasklet_setup(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task); ret = dd->f_init_sdma_regs(ppd); if (ret) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 662e7fc7f628..aa2e65fc5cd6 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -315,7 +315,6 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num, if (err) return err; - immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; return 0; @@ -355,7 +354,6 @@ static const struct ib_device_ops usnic_dev_ops = { .modify_qp = usnic_ib_modify_qp, .query_device = usnic_ib_query_device, .query_gid = usnic_ib_query_gid, - .query_pkey = usnic_ib_query_pkey, .query_port = usnic_ib_query_port, .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, @@ -427,7 +425,8 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (ret) goto err_fwd_dealloc; - if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d")) + dma_set_max_seg_size(&dev->dev, SZ_2G); + if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", &dev->dev)) goto err_fwd_dealloc; usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index b8a77ce11590..9e961f8ffa10 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -367,7 +367,6 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, props->port_cap_flags = 0; props->gid_tbl_len = 1; - props->pkey_tbl_len = 1; props->bad_pkey_cntr = 0; props->qkey_viol_cntr = 0; props->max_mtu = IB_MTU_4096; @@ -437,16 +436,6 @@ int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, return 0; } -int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey) -{ - if (index > 0) - return -EINVAL; - - *pkey = 0xffff; - return 0; -} - int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct usnic_ib_pd *pd = to_upd(ibpd); @@ -460,9 +449,10 @@ int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } -void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd); + return 0; } struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, @@ -596,9 +586,9 @@ int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; } -void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { - return; + return 0; } struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 2aedf78c13cf..11fe1ba6bbc9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -48,10 +48,8 @@ int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, struct ib_qp_init_attr *qp_init_attr); int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index, union ib_gid *gid); -int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, - u16 *pkey); int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); -void usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); +int usnic_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata); struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -60,7 +58,7 @@ int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int usnic_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int usnic_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index 4f6cc0de7ef9..319546a39a0d 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -142,7 +142,7 @@ int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_cq; } - npages = ib_umem_page_count(cq->umem); + npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE); } else { /* One extra page for shared ring state */ npages = 1 + (entries * sizeof(struct pvrdma_cqe) + @@ -235,7 +235,7 @@ static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) * @cq: the completion queue to destroy. * @udata: user data or null for kernel object */ -void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) +int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) { struct pvrdma_cq *vcq = to_vcq(cq); union pvrdma_cmd_req req; @@ -261,6 +261,7 @@ void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) pvrdma_free_cq(dev, vcq); atomic_dec(&dev->num_cqs); + return 0; } static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) @@ -375,7 +376,7 @@ retry: * pvrdma_poll_cq - poll for work completion queue entries * @ibcq: completion queue * @num_entries: the maximum number of entries - * @entry: pointer to work completion array + * @wc: pointer to work completion array * * @return: number of polled completion entries */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 780fd2dfc07e..fa2a3fa0c3e4 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -270,7 +270,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) spin_lock_init(&dev->srq_tbl_lock); rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group); - ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d"); + ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev); if (ret) goto err_srq_free; @@ -854,7 +854,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, goto err_free_resource; } } - + dma_set_max_seg_size(&pdev->dev, UINT_MAX); pci_set_master(pdev); /* Map register space */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c index 7944c58ded0e..ba43ad07898c 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c @@ -182,17 +182,16 @@ int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx, int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir, struct ib_umem *umem, u64 offset) { + struct ib_block_iter biter; u64 i = offset; int ret = 0; - struct sg_dma_page_iter sg_iter; if (offset >= pdir->npages) return -EINVAL; - for_each_sg_dma_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { - dma_addr_t addr = sg_page_iter_dma_address(&sg_iter); - - ret = pvrdma_page_dir_insert_dma(pdir, i, addr); + rdma_umem_for_each_dma_block (umem, &biter, PAGE_SIZE) { + ret = pvrdma_page_dir_insert_dma( + pdir, i, rdma_block_iter_dma_address(&biter)); if (ret) goto exit; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c index 77a010e68208..e80848bfb3bd 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c @@ -133,7 +133,7 @@ struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_CAST(umem); } - npages = ib_umem_num_pages(umem); + npages = ib_umem_num_dma_blocks(umem, PAGE_SIZE); if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n", npages); @@ -270,6 +270,7 @@ freemr: /** * pvrdma_dereg_mr - deregister a memory region * @ibmr: memory region + * @udata: pointer to user data * * @return: 0 on success. */ diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index 9a8f2a9507be..428256c55065 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -232,8 +232,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, switch (init_attr->qp_type) { case IB_QPT_GSI: if (init_attr->port_num == 0 || - init_attr->port_num > pd->device->phys_port_cnt || - udata) { + init_attr->port_num > pd->device->phys_port_cnt) { dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n"); ret = -EINVAL; goto err_qp; @@ -298,9 +297,11 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, goto err_qp; } - qp->npages_send = ib_umem_page_count(qp->sumem); + qp->npages_send = + ib_umem_num_dma_blocks(qp->sumem, PAGE_SIZE); if (!is_srq) - qp->npages_recv = ib_umem_page_count(qp->rumem); + qp->npages_recv = ib_umem_num_dma_blocks( + qp->rumem, PAGE_SIZE); else qp->npages_recv = 0; qp->npages = qp->npages_send + qp->npages_recv; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index d330decfb80a..082208f9aa90 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -90,7 +90,7 @@ int pvrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) /** * pvrdma_create_srq - create shared receive queue - * @pd: protection domain + * @ibsrq: the IB shared receive queue * @init_attr: shared receive queue attributes * @udata: user data * @@ -152,7 +152,7 @@ int pvrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr, goto err_srq; } - srq->npages = ib_umem_page_count(srq->umem); + srq->npages = ib_umem_num_dma_blocks(srq->umem, PAGE_SIZE); if (srq->npages < 0 || srq->npages > PVRDMA_PAGE_DIR_MAX_PAGES) { dev_warn(&dev->pdev->dev, @@ -240,7 +240,7 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) * * @return: 0 for success. */ -void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) +int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) { struct pvrdma_srq *vsrq = to_vsrq(srq); union pvrdma_cmd_req req; @@ -259,6 +259,7 @@ void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) ret); pvrdma_free_srq(dev, vsrq); + return 0; } /** diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index ccbded2d26ce..fc412cbfd042 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -479,9 +479,9 @@ err: * @pd: the protection domain to be released * @udata: user data or null for kernel object * - * @return: 0 on success, otherwise errno. + * @return: Always 0 */ -void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); union pvrdma_cmd_req req = {}; @@ -498,14 +498,14 @@ void pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) ret); atomic_dec(&dev->num_pds); + return 0; } /** * pvrdma_create_ah - create an address handle - * @pd: the protection domain - * @ah_attr: the attributes of the AH - * @udata: user data blob - * @flags: create address handle flags (see enum rdma_create_ah_flags) + * @ibah: the IB address handle + * @init_attr: the attributes of the AH + * @udata: pointer to user data * * @return: 0 on success, otherwise errno. */ @@ -548,9 +548,10 @@ int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * */ -void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) { struct pvrdma_dev *dev = to_vdev(ah->device); atomic_dec(&dev->num_ahs); + return 0; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index 699b20849a7e..f0e5ffba2d51 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -176,7 +176,7 @@ struct pvrdma_port_attr { u8 subnet_timeout; u8 init_type_reply; u8 active_width; - u8 active_speed; + u16 active_speed; u8 phys_state; u8 reserved[2]; }; @@ -399,7 +399,7 @@ int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata); void pvrdma_dealloc_ucontext(struct ib_ucontext *context); int pvrdma_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int pvrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, @@ -411,19 +411,19 @@ int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); +int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); int pvrdma_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); int pvrdma_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); int pvrdma_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int pvrdma_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr); -void pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); +int pvrdma_destroy_srq(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 75a04b1497c4..b938c4ffa99a 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -132,7 +132,7 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, * * Return: 0 on success */ -void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) +int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) { struct rvt_dev_info *dev = ib_to_rvt(ibah->device); struct rvt_ah *ah = ibah_to_rvtah(ibah); @@ -143,6 +143,7 @@ void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) spin_unlock_irqrestore(&dev->n_ahs_lock, flags); rdma_destroy_ah_attr(&ah->attr); + return 0; } /** diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 40b7123fec76..5a85edd06491 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -52,7 +52,7 @@ int rvt_create_ah(struct ib_ah *ah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata); -void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); +int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags); int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 04d2e72017fe..19248be14093 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -315,7 +315,7 @@ bail_wc: * * Called by ib_destroy_cq() in the generic verbs code. */ -void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); struct rvt_dev_info *rdi = cq->rdi; @@ -328,6 +328,7 @@ void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) kref_put(&cq->ip->ref, rvt_release_mmap_info); else vfree(cq->kqueue); + return 0; } /** diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 5e26a2eb19a4..feb01e7ee004 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -53,7 +53,7 @@ int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); -void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index a403718f0b5e..01b7abf91520 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -95,11 +95,12 @@ bail: * * Return: always 0 */ -void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +int rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); spin_lock(&dev->n_pds_lock); dev->n_pds_allocated--; spin_unlock(&dev->n_pds_lock); + return 0; } diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index 71ba76d72b1d..06a6a38beedc 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -51,6 +51,6 @@ #include int rvt_alloc_pd(struct ib_pd *pd, struct ib_udata *udata); -void rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int rvt_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); #endif /* DEF_RDMAVTPD_H */ diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index f547c115af03..64d98bf238ab 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -332,7 +332,7 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) * @ibsrq: srq object to destroy * */ -void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); @@ -343,4 +343,5 @@ void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) if (srq->ip) kref_put(&srq->ip->ref, rvt_release_mmap_info); kvfree(srq->rq.kwq); + return 0; } diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index 6427d7d62a9a..d5a1a053b1b9 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -56,6 +56,6 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); -void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); +int rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); #endif /* DEF_RVTSRQ_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index f904bb34477a..52218684ad4a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -95,9 +95,7 @@ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) if (!rdi) return rdi; - rdi->ports = kcalloc(nports, - sizeof(struct rvt_ibport **), - GFP_KERNEL); + rdi->ports = kcalloc(nports, sizeof(*rdi->ports), GFP_KERNEL); if (!rdi->ports) ib_dealloc_device(&rdi->ibdev); @@ -581,7 +579,9 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_cqs_lock); /* DMA Operations */ - rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops; + rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms; + dma_set_coherent_mask(&rdi->ibdev.dev, + rdi->ibdev.dev.parent->coherent_dma_mask); /* Protection Domain */ spin_lock_init(&rdi->n_pds_lock); @@ -629,7 +629,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) rdi->ibdev.num_comp_vectors = 1; /* We are now good to announce we exist */ - ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev)); + ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev), NULL); if (ret) { rvt_pr_err(rdi, "Failed to register driver with ib core.\n"); goto bail_wss; diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 77f2c7cd1216..95f0de0c8b49 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include @@ -279,6 +252,12 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) struct rxe_dev *exists; int err = 0; + if (is_vlan_dev(ndev)) { + pr_err("rxe creation allowed on top of a real device only\n"); + err = -EPERM; + goto err; + } + exists = rxe_get_dev_from_net(ndev); if (exists) { ib_device_put(&exists->ib_dev); @@ -305,13 +284,6 @@ static int __init rxe_module_init(void) { int err; - /* initialize slab caches for managed objects */ - err = rxe_cache_init(); - if (err) { - pr_err("unable to init object pools\n"); - return err; - } - err = rxe_net_init(); if (err) return err; @@ -327,7 +299,6 @@ static void __exit rxe_module_exit(void) rdma_link_unregister(&rxe_link_ops); ib_unregister_driver(RDMA_DRIVER_RXE); rxe_net_exit(); - rxe_cache_exit(); rxe_initialized = false; pr_info("unloaded\n"); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index cae1b0a24c85..623fd17df02d 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_H diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 81ee756c19b8..38021e2c8688 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 7b4df0028388..0a1e6393250b 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include @@ -690,9 +663,8 @@ int rxe_completer(void *arg) */ /* there is nothing to retry in this case */ - if (!wqe || (wqe->state == wqe_state_posted)) { + if (!wqe || (wqe->state == wqe_state_posted)) goto exit; - } /* if we've started a retry, don't start another * retry sequence, unless this is a timeout. diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index ad3090131126..43394c3f29d4 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include #include "rxe.h" @@ -66,9 +39,9 @@ err1: return -EINVAL; } -static void rxe_send_complete(unsigned long data) +static void rxe_send_complete(struct tasklet_struct *t) { - struct rxe_cq *cq = (struct rxe_cq *)data; + struct rxe_cq *cq = from_tasklet(cq, t, comp_task); unsigned long flags; spin_lock_irqsave(&cq->cq_lock, flags); @@ -107,7 +80,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, cq->is_dying = false; - tasklet_init(&cq->comp_task, rxe_send_complete, (unsigned long)cq); + tasklet_setup(&cq->comp_task, rxe_send_complete); spin_lock_init(&cq->cq_lock); cq->ibcq.cqe = cqe; diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index ce003666b800..3b483b75dfe3 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_HDR_H diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index 636edb5f4cf4..ac9154f0593d 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -1,33 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h index 72c0d63c79e0..49ee6f96656d 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h @@ -1,33 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_HW_COUNTERS_H diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c index 39e0be31aab1..66b2aad54bb7 100644 --- a/drivers/infiniband/sw/rxe/rxe_icrc.c +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 39dc3bfa5d5d..0d758760b9ae 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_LOC_H diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index 522a7942c56c..c02315aed8d1 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 7887f623f62c..035f226af133 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index ce24144de16a..d2ce852447c1 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" @@ -79,13 +52,8 @@ static void rxe_mem_init(int access, struct rxe_mem *mem) u32 lkey = mem->pelem.index << 8 | rxe_get_key(); u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; - if (mem->pelem.pool->type == RXE_TYPE_MR) { - mem->ibmr.lkey = lkey; - mem->ibmr.rkey = rkey; - } - - mem->lkey = lkey; - mem->rkey = rkey; + mem->ibmr.lkey = lkey; + mem->ibmr.rkey = rkey; mem->state = RXE_MEM_STATE_INVALID; mem->type = RXE_MEM_TYPE_NONE; mem->map_shift = ilog2(RXE_BUF_PER_MAP); @@ -149,7 +117,7 @@ void rxe_mem_init_dma(struct rxe_pd *pd, { rxe_mem_init(access, mem); - mem->pd = pd; + mem->ibmr.pd = &pd->ibpd; mem->access = access; mem->state = RXE_MEM_STATE_VALID; mem->type = RXE_MEM_TYPE_DMA; @@ -218,7 +186,7 @@ int rxe_mem_init_user(struct rxe_pd *pd, u64 start, } } - mem->pd = pd; + mem->ibmr.pd = &pd->ibpd; mem->umem = umem; mem->access = access; mem->length = length; @@ -248,7 +216,7 @@ int rxe_mem_init_fast(struct rxe_pd *pd, if (err) goto err1; - mem->pd = pd; + mem->ibmr.pd = &pd->ibpd; mem->max_buf = max_pages; mem->state = RXE_MEM_STATE_FREE; mem->type = RXE_MEM_TYPE_MR; @@ -368,7 +336,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, memcpy(dest, src, length); if (crcp) - *crcp = rxe_crc32(to_rdev(mem->pd->ibpd.device), + *crcp = rxe_crc32(to_rdev(mem->ibmr.device), *crcp, dest, length); return 0; @@ -402,7 +370,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, memcpy(dest, src, bytes); if (crcp) - crc = rxe_crc32(to_rdev(mem->pd->ibpd.device), + crc = rxe_crc32(to_rdev(mem->ibmr.device), crc, dest, bytes); length -= bytes; @@ -575,9 +543,9 @@ struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key, if (!mem) return NULL; - if (unlikely((type == lookup_local && mem->lkey != key) || - (type == lookup_remote && mem->rkey != key) || - mem->pd != pd || + if (unlikely((type == lookup_local && mr_lkey(mem) != key) || + (type == lookup_remote && mr_rkey(mem) != key) || + mr_pd(mem) != pd || (access && !(access & mem->access)) || mem->state != RXE_MEM_STATE_VALID)) { rxe_drop_ref(mem); diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0c3808611f95..575e1a4ec821 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include @@ -120,7 +93,7 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, ndst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(recv_sockets.sk6->sk), recv_sockets.sk6->sk, &fl6, NULL); - if (unlikely(IS_ERR(ndst))) { + if (IS_ERR(ndst)) { pr_err_ratelimited("no route to %pI6\n", daddr); return NULL; } @@ -160,14 +133,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev, if (dst) dst_release(dst); - if (av->network_type == RDMA_NETWORK_IPV4) { + if (av->network_type == RXE_NETWORK_TYPE_IPV4) { struct in_addr *saddr; struct in_addr *daddr; saddr = &av->sgid_addr._sockaddr_in.sin_addr; daddr = &av->dgid_addr._sockaddr_in.sin_addr; dst = rxe_find_route4(ndev, saddr, daddr); - } else if (av->network_type == RDMA_NETWORK_IPV6) { + } else if (av->network_type == RXE_NETWORK_TYPE_IPV6) { struct in6_addr *saddr6; struct in6_addr *daddr6; @@ -469,7 +442,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, if (IS_ERR(attr)) return NULL; - if (av->network_type == RDMA_NETWORK_IPV4) + if (av->network_type == RXE_NETWORK_TYPE_IPV6) hdr_len = ETH_HLEN + sizeof(struct udphdr) + sizeof(struct iphdr); else @@ -496,7 +469,7 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, skb->dev = ndev; rcu_read_unlock(); - if (av->network_type == RDMA_NETWORK_IPV4) + if (av->network_type == RXE_NETWORK_TYPE_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 2ca71d3d245c..45d80d00f86b 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_NET_H diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index 4cf11063e0b5..0cb4b01fd910 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index 307604e9c78d..1041ac9a9233 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_OPCODE_H diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 2f381aeafcb5..25ab50d9b7c2 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_PARAM_H diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index fbcbac52290b..b374eb53e2fe 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" @@ -110,62 +83,6 @@ static inline const char *pool_name(struct rxe_pool *pool) return rxe_type_info[pool->type].name; } -static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) -{ - return rxe_type_info[pool->type].cache; -} - -static void rxe_cache_clean(size_t cnt) -{ - int i; - struct rxe_type_info *type; - - for (i = 0; i < cnt; i++) { - type = &rxe_type_info[i]; - if (!(type->flags & RXE_POOL_NO_ALLOC)) { - kmem_cache_destroy(type->cache); - type->cache = NULL; - } - } -} - -int rxe_cache_init(void) -{ - int err; - int i; - size_t size; - struct rxe_type_info *type; - - for (i = 0; i < RXE_NUM_TYPES; i++) { - type = &rxe_type_info[i]; - size = ALIGN(type->size, RXE_POOL_ALIGN); - if (!(type->flags & RXE_POOL_NO_ALLOC)) { - type->cache = - kmem_cache_create(type->name, size, - RXE_POOL_ALIGN, - RXE_POOL_CACHE_FLAGS, NULL); - if (!type->cache) { - pr_err("Unable to init kmem cache for %s\n", - type->name); - err = -ENOMEM; - goto err1; - } - } - } - - return 0; - -err1: - rxe_cache_clean(i); - - return err; -} - -void rxe_cache_exit(void) -{ - rxe_cache_clean(RXE_NUM_TYPES); -} - static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) { int err = 0; @@ -406,7 +323,7 @@ void *rxe_alloc(struct rxe_pool *pool) if (atomic_inc_return(&pool->num_elem) > pool->max_elem) goto out_cnt; - elem = kmem_cache_zalloc(pool_cache(pool), + elem = kzalloc(rxe_type_info[pool->type].size, (pool->flags & RXE_POOL_ATOMIC) ? GFP_ATOMIC : GFP_KERNEL); if (!elem) @@ -468,7 +385,7 @@ void rxe_elem_release(struct kref *kref) pool->cleanup(elem); if (!(pool->flags & RXE_POOL_NO_ALLOC)) - kmem_cache_free(pool_cache(pool), elem); + kfree(elem); atomic_dec(&pool->num_elem); ib_device_put(&pool->rxe->ib_dev); rxe_pool_put(pool); diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 2f2cff1cbe43..432745ffc8d4 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_POOL_H @@ -69,7 +42,6 @@ struct rxe_type_info { u32 min_index; size_t key_offset; size_t key_size; - struct kmem_cache *cache; }; extern struct rxe_type_info rxe_type_info[]; @@ -113,12 +85,6 @@ struct rxe_pool { size_t key_size; }; -/* initialize slab caches for managed objects */ -int rxe_cache_init(void); - -/* cleanup slab caches for managed objects */ -void rxe_cache_exit(void); - /* initialize a pool of objects with given limit on * number of elements. gets parameters from rxe_type_info * pool elements will be allocated out of a slab cache diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 6c11c3aeeca6..656a5b4be847 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include @@ -628,9 +601,8 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_QKEY) qp->attr.qkey = attr->qkey; - if (mask & IB_QP_AV) { + if (mask & IB_QP_AV) rxe_init_av(&attr->ah_attr, &qp->pri_av); - } if (mask & IB_QP_ALT_PATH) { rxe_init_av(&attr->alt_ah_attr, &qp->alt_av); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.c b/drivers/infiniband/sw/rxe/rxe_queue.c index 245040c3a35d..fa69241b1187 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.c +++ b/drivers/infiniband/sw/rxe/rxe_queue.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must retailuce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 8ef17d617022..7d434a6837a7 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_QUEUE_H diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 7e123d3c4d09..c9984a28eecc 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include @@ -260,6 +233,8 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) struct rxe_mc_elem *mce; struct rxe_qp *qp; union ib_gid dgid; + struct sk_buff *per_qp_skb; + struct rxe_pkt_info *per_qp_pkt; int err; if (skb->protocol == htons(ETH_P_IP)) @@ -288,26 +263,44 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) if (err) continue; - /* if *not* the last qp in the list - * increase the users of the skb then post to the next qp + /* for all but the last qp create a new clone of the + * skb and pass to the qp. */ if (mce->qp_list.next != &mcg->qp_list) - skb_get(skb); + per_qp_skb = skb_clone(skb, GFP_ATOMIC); + else + per_qp_skb = skb; - pkt->qp = qp; + if (unlikely(!per_qp_skb)) + continue; + + per_qp_pkt = SKB_TO_PKT(per_qp_skb); + per_qp_pkt->qp = qp; rxe_add_ref(qp); - rxe_rcv_pkt(pkt, skb); + rxe_rcv_pkt(per_qp_pkt, per_qp_skb); } spin_unlock_bh(&mcg->mcg_lock); rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ + return; + err1: kfree_skb(skb); } -static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) +/** + * rxe_chk_dgid - validate destination IP address + * @rxe: rxe device that received packet + * @skb: the received packet buffer + * + * Accept any loopback packets + * Extract IP address from packet and + * Accept if multicast packet + * Accept if matches an SGID table entry + */ +static int rxe_chk_dgid(struct rxe_dev *rxe, struct sk_buff *skb) { struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); const struct ib_gid_attr *gid_attr; @@ -325,6 +318,9 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb) pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr; } + if (rdma_is_multicast_addr((struct in6_addr *)pdgid)) + return 0; + gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid, IB_GID_TYPE_ROCE_UDP_ENCAP, 1, skb->dev); @@ -349,8 +345,8 @@ void rxe_rcv(struct sk_buff *skb) if (unlikely(skb->len < pkt->offset + RXE_BTH_BYTES)) goto drop; - if (rxe_match_dgid(rxe, skb) < 0) { - pr_warn_ratelimited("failed matching dgid\n"); + if (rxe_chk_dgid(rxe, skb) < 0) { + pr_warn_ratelimited("failed checking dgid\n"); goto drop; } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 34df2b55e650..af3923bf0a36 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include @@ -644,8 +617,8 @@ next_wqe: rmr->state = RXE_MEM_STATE_VALID; rmr->access = wqe->wr.wr.reg.access; - rmr->lkey = wqe->wr.wr.reg.key; - rmr->rkey = wqe->wr.wr.reg.key; + rmr->ibmr.lkey = wqe->wr.wr.reg.key; + rmr->ibmr.rkey = wqe->wr.wr.reg.key; rmr->iova = wqe->wr.wr.reg.mr->iova; wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index c4a8195bf670..c7e3b6a4af38 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index d8459431534e..41b0d1e11baf 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index 2af31d421bfc..666202ddff48 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include "rxe.h" @@ -78,6 +51,12 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) return -EINVAL; } + if (is_vlan_dev(ndev)) { + pr_err("rxe creation allowed on top of a real device only\n"); + err = -EPERM; + goto err; + } + exists = rxe_get_dev_from_net(ndev); if (exists) { ib_device_put(&exists->ib_dev); diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index ecdac3f8fcc9..6951fdcb31bf 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include @@ -55,12 +28,12 @@ int __rxe_do_task(struct rxe_task *task) * a second caller finds the task already running * but looks just after the last call to func */ -void rxe_do_task(unsigned long data) +void rxe_do_task(struct tasklet_struct *t) { int cont; int ret; unsigned long flags; - struct rxe_task *task = (struct rxe_task *)data; + struct rxe_task *task = from_tasklet(task, t, tasklet); spin_lock_irqsave(&task->state_lock, flags); switch (task->state) { @@ -123,7 +96,7 @@ int rxe_init_task(void *obj, struct rxe_task *task, snprintf(task->name, sizeof(task->name), "%s", name); task->destroyed = false; - tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task); + tasklet_setup(&task->tasklet, rxe_do_task); task->state = TASK_STATE_START; spin_lock_init(&task->state_lock); @@ -159,7 +132,7 @@ void rxe_run_task(struct rxe_task *task, int sched) if (sched) tasklet_schedule(&task->tasklet); else - rxe_do_task((unsigned long)task); + rxe_do_task(&task->tasklet); } void rxe_disable_task(struct rxe_task *task) diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 08ff42d451c6..11d183fd3338 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_TASK_H @@ -60,7 +33,7 @@ struct rxe_task { /* * init rxe_task structure * arg => parameter to pass to fcn - * fcn => function to call until it returns != 0 + * func => function to call until it returns != 0 */ int rxe_init_task(void *obj, struct rxe_task *task, void *arg, int (*func)(void *), char *name); @@ -80,7 +53,7 @@ int __rxe_do_task(struct rxe_task *task); * work to do someone must reschedule the task before * leaving */ -void rxe_do_task(unsigned long data); +void rxe_do_task(struct tasklet_struct *t); /* run a task, else schedule it to run as a tasklet, The decision * to run or schedule tasklet is based on the parameter sched. diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 8522e9a3e914..1fc022362fbe 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1,34 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #include @@ -175,11 +148,12 @@ static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); } -static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +static int rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_pd *pd = to_rpd(ibpd); rxe_drop_ref(pd); + return 0; } static int rxe_create_ah(struct ib_ah *ibah, @@ -227,11 +201,12 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) return 0; } -static void rxe_destroy_ah(struct ib_ah *ibah, u32 flags) +static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); rxe_drop_ref(ah); + return 0; } static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) @@ -365,7 +340,7 @@ static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) return 0; } -static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) +static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rxe_srq *srq = to_rsrq(ibsrq); @@ -374,6 +349,7 @@ static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) rxe_drop_ref(srq->pd); rxe_drop_ref(srq); + return 0; } static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, @@ -803,13 +779,14 @@ static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem); } -static void rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rxe_cq *cq = to_rcq(ibcq); rxe_cq_disable(cq); rxe_drop_ref(cq); + return 0; } static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) @@ -944,7 +921,7 @@ static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) struct rxe_mem *mr = to_rmr(ibmr); mr->state = RXE_MEM_STATE_ZOMBIE; - rxe_drop_ref(mr->pd); + rxe_drop_ref(mr_pd(mr)); rxe_drop_index(mr); rxe_drop_ref(mr); return 0; @@ -1151,12 +1128,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); - dev->dev.dma_ops = &dma_virt_ops; dev->dev.dma_parms = &rxe->dma_parms; - rxe->dma_parms = (struct device_dma_parameters) - { .max_segment_size = SZ_2G }; - dma_coerce_mask_and_coherent(&dev->dev, - dma_get_required_mask(&dev->dev)); + dma_set_max_seg_size(&dev->dev, UINT_MAX); + dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev)); dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) @@ -1205,7 +1179,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) rxe->tfm = tfm; rdma_set_device_sysfs_group(dev, &rxe_attr_group); - err = ib_register_device(dev, ibdev_name); + err = ib_register_device(dev, ibdev_name, NULL); if (err) pr_warn("%s failed with error %d\n", __func__, err); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index c664c7f36ab5..3414b341b709 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -1,34 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. */ #ifndef RXE_VERBS_H @@ -322,12 +295,8 @@ struct rxe_mem { struct ib_mw ibmw; }; - struct rxe_pd *pd; struct ib_umem *umem; - u32 lkey; - u32 rkey; - enum rxe_mem_state state; enum rxe_mem_type type; u64 va; @@ -465,6 +434,21 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw) return mw ? container_of(mw, struct rxe_mem, ibmw) : NULL; } +static inline struct rxe_pd *mr_pd(struct rxe_mem *mr) +{ + return to_rpd(mr->ibmr.pd); +} + +static inline u32 mr_lkey(struct rxe_mem *mr) +{ + return mr->ibmr.lkey; +} + +static inline u32 mr_rkey(struct rxe_mem *mr) +{ + return mr->ibmr.rkey; +} + int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name); void rxe_mc_cleanup(struct rxe_pool_entry *arg); diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index d862bec84376..ca8bc7296867 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -69,7 +69,7 @@ static int siw_device_register(struct siw_device *sdev, const char *name) sdev->vendor_part_id = dev_id++; - rv = ib_register_device(base_dev, name); + rv = ib_register_device(base_dev, name, NULL); if (rv) { pr_warn("siw: device registration error %d\n", rv); return rv; @@ -382,10 +382,10 @@ static struct siw_device *siw_device_create(struct net_device *netdev) */ base_dev->phys_port_cnt = 1; base_dev->dev.parent = parent; - base_dev->dev.dma_ops = &dma_virt_ops; base_dev->dev.dma_parms = &sdev->dma_parms; - sdev->dma_parms = (struct device_dma_parameters) - { .max_segment_size = SZ_2G }; + dma_set_max_seg_size(&base_dev->dev, UINT_MAX); + dma_set_coherent_mask(&base_dev->dev, + dma_get_required_mask(&base_dev->dev)); base_dev->num_comp_vectors = num_possible_cpus(); xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index adafa1b8bebe..7cf3242ffb41 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -234,12 +234,13 @@ int siw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata) return 0; } -void siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) +int siw_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) { struct siw_device *sdev = to_siw_dev(pd->device); siw_dbg_pd(pd, "free PD\n"); atomic_dec(&sdev->num_pd); + return 0; } void siw_qp_get_ref(struct ib_qp *base_qp) @@ -1055,7 +1056,7 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, return rv > 0 ? 0 : rv; } -void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) +int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) { struct siw_cq *cq = to_siw_cq(base_cq); struct siw_device *sdev = to_siw_dev(base_cq->device); @@ -1073,6 +1074,7 @@ void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata) atomic_dec(&sdev->num_cq); vfree(cq->queue); + return 0; } /* @@ -1690,7 +1692,7 @@ int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attrs) * QP anymore - the code trusts the RDMA core environment to keep track * of QP references. */ -void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) +int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) { struct siw_srq *srq = to_siw_srq(base_srq); struct siw_device *sdev = to_siw_dev(base_srq->device); @@ -1702,6 +1704,7 @@ void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata) rdma_user_mmap_entry_remove(srq->srq_entry); vfree(srq->recvq); atomic_dec(&sdev->num_srq); + return 0; } /* diff --git a/drivers/infiniband/sw/siw/siw_verbs.h b/drivers/infiniband/sw/siw/siw_verbs.h index d9572275a6b6..637454529357 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.h +++ b/drivers/infiniband/sw/siw/siw_verbs.h @@ -49,7 +49,7 @@ int siw_query_port(struct ib_device *base_dev, u8 port, int siw_query_gid(struct ib_device *base_dev, u8 port, int idx, union ib_gid *gid); int siw_alloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); -void siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); +int siw_dealloc_pd(struct ib_pd *base_pd, struct ib_udata *udata); struct ib_qp *siw_create_qp(struct ib_pd *base_pd, struct ib_qp_init_attr *attr, struct ib_udata *udata); @@ -62,7 +62,7 @@ int siw_post_send(struct ib_qp *base_qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -void siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata); +int siw_destroy_cq(struct ib_cq *base_cq, struct ib_udata *udata); int siw_poll_cq(struct ib_cq *base_cq, int num_entries, struct ib_wc *wc); int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags); struct ib_mr *siw_reg_user_mr(struct ib_pd *base_pd, u64 start, u64 len, @@ -78,7 +78,7 @@ int siw_create_srq(struct ib_srq *base_srq, struct ib_srq_init_attr *attr, int siw_modify_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct ib_udata *udata); int siw_query_srq(struct ib_srq *base_srq, struct ib_srq_attr *attr); -void siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata); +int siw_destroy_srq(struct ib_srq *base_srq, struct ib_udata *udata); int siw_post_srq_recv(struct ib_srq *base_srq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int siw_mmap(struct ib_ucontext *ctx, struct vm_area_struct *vma); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 7c41fb040f7c..8f0b598a46ec 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1647,17 +1647,13 @@ int ipoib_cm_dev_init(struct net_device *dev) void ipoib_cm_dev_cleanup(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - int ret; if (!priv->cm.srq) return; ipoib_dbg(priv, "Cleanup ipoib connected mode.\n"); - ret = ib_destroy_srq(priv->cm.srq); - if (ret) - ipoib_warn(priv, "ib_destroy_srq failed: %d\n", ret); - + ib_destroy_srq(priv->cm.srq); priv->cm.srq = NULL; if (!priv->cm.srq_ring) return; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 64c19f6fa931..12ba7a0fe0b5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -124,35 +124,14 @@ static int ipoib_mcg_seq_show(struct seq_file *file, void *iter_ptr) return 0; } -static const struct seq_operations ipoib_mcg_seq_ops = { +static const struct seq_operations ipoib_mcg_sops = { .start = ipoib_mcg_seq_start, .next = ipoib_mcg_seq_next, .stop = ipoib_mcg_seq_stop, .show = ipoib_mcg_seq_show, }; -static int ipoib_mcg_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - int ret; - - ret = seq_open(file, &ipoib_mcg_seq_ops); - if (ret) - return ret; - - seq = file->private_data; - seq->private = inode->i_private; - - return 0; -} - -static const struct file_operations ipoib_mcg_fops = { - .owner = THIS_MODULE, - .open = ipoib_mcg_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; +DEFINE_SEQ_ATTRIBUTE(ipoib_mcg); static void *ipoib_path_seq_start(struct seq_file *file, loff_t *pos) { @@ -229,35 +208,14 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr) return 0; } -static const struct seq_operations ipoib_path_seq_ops = { +static const struct seq_operations ipoib_path_sops = { .start = ipoib_path_seq_start, .next = ipoib_path_seq_next, .stop = ipoib_path_seq_stop, .show = ipoib_path_seq_show, }; -static int ipoib_path_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - int ret; - - ret = seq_open(file, &ipoib_path_seq_ops); - if (ret) - return ret; - - seq = file->private_data; - seq->private = inode->i_private; - - return 0; -} - -static const struct file_operations ipoib_path_fops = { - .owner = THIS_MODULE, - .open = ipoib_path_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; +DEFINE_SEQ_ATTRIBUTE(ipoib_path); void ipoib_create_debug_files(struct net_device *dev) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index f772fe8c5b66..abfab89423f4 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2480,6 +2480,8 @@ static struct net_device *ipoib_add_port(const char *format, /* call event handler to ensure pkey in sync */ queue_work(ipoib_workqueue, &priv->flush_heavy); + ndev->rtnl_link_ops = ipoib_get_link_ops(); + result = register_netdev(ndev); if (result) { pr_warn("%s: couldn't register ipoib port %d; error %d\n", diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index 38c984d16996..d5a90a66b45c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -144,6 +144,16 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, return 0; } +static void ipoib_del_child_link(struct net_device *dev, struct list_head *head) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + if (!priv->parent) + return; + + unregister_netdevice_queue(dev, head); +} + static size_t ipoib_get_size(const struct net_device *dev) { return nla_total_size(2) + /* IFLA_IPOIB_PKEY */ @@ -158,6 +168,7 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = { .priv_size = sizeof(struct ipoib_dev_priv), .setup = ipoib_setup_common, .newlink = ipoib_new_child_link, + .dellink = ipoib_del_child_link, .changelink = ipoib_changelink, .get_size = ipoib_get_size, .fill_info = ipoib_fill_info, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 30865605e098..4c50a87ed7cc 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -195,6 +195,8 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) } priv = ipoib_priv(ndev); + ndev->rtnl_link_ops = ipoib_get_link_ops(); + result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); if (result && ndev->reg_state == NETREG_UNINITIALIZED) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 695f701dc43d..436e17f1d0e5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1141,12 +1141,7 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn, * multiple data-outs on the same command can arrive - * so post the buffer before hand */ - rc = isert_post_recv(isert_conn, rx_desc); - if (rc) { - isert_err("ib_post_recv failed with %d\n", rc); - return rc; - } - return 0; + return isert_post_recv(isert_conn, rx_desc); } static int @@ -1723,10 +1718,8 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) int ret; ret = isert_post_recv(isert_conn, isert_cmd->rx_desc); - if (ret) { - isert_err("ib_post_recv failed with %d\n", ret); + if (ret) return ret; - } ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, NULL); if (ret) { @@ -2098,10 +2091,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd) &isert_cmd->tx_desc.send_wr); rc = isert_post_recv(isert_conn, isert_cmd->rx_desc); - if (rc) { - isert_err("ib_post_recv failed with %d\n", rc); + if (rc) return rc; - } chain_wr = &isert_cmd->tx_desc.send_wr; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index 298b747d0330..ac4c49cbf153 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -312,7 +312,7 @@ static struct attribute *rtrs_clt_stats_attrs[] = { NULL }; -static struct attribute_group rtrs_clt_stats_attr_group = { +static const struct attribute_group rtrs_clt_stats_attr_group = { .attrs = rtrs_clt_stats_attrs, }; @@ -388,7 +388,7 @@ static struct attribute *rtrs_clt_sess_attrs[] = { NULL, }; -static struct attribute_group rtrs_clt_sess_attr_group = { +static const struct attribute_group rtrs_clt_sess_attr_group = { .attrs = rtrs_clt_sess_attrs, }; @@ -460,7 +460,7 @@ static struct attribute *rtrs_clt_attrs[] = { NULL, }; -static struct attribute_group rtrs_clt_attr_group = { +static const struct attribute_group rtrs_clt_attr_group = { .attrs = rtrs_clt_attrs, }; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index 0a93c87ef92b..b8e43dc4d95a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -115,7 +115,6 @@ struct rtrs_sess { /* rtrs information unit */ struct rtrs_iu { - struct list_head list; struct ib_cqe cqe; dma_addr_t dma_addr; void *buf; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index cf6a2be61695..07fbb063555d 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -135,7 +135,7 @@ static struct attribute *rtrs_srv_sess_attrs[] = { NULL, }; -static struct attribute_group rtrs_srv_sess_attr_group = { +static const struct attribute_group rtrs_srv_sess_attr_group = { .attrs = rtrs_srv_sess_attrs, }; @@ -148,7 +148,7 @@ static struct attribute *rtrs_srv_stats_attrs[] = { NULL, }; -static struct attribute_group rtrs_srv_stats_attr_group = { +static const struct attribute_group rtrs_srv_stats_attr_group = { .attrs = rtrs_srv_stats_attrs, }; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 28f6414dfa3d..d6f93601712e 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -16,6 +16,7 @@ #include "rtrs-srv.h" #include "rtrs-log.h" #include +#include MODULE_DESCRIPTION("RDMA Transport Server"); MODULE_LICENSE("GPL"); @@ -31,6 +32,7 @@ MODULE_LICENSE("GPL"); static struct rtrs_rdma_dev_pd dev_pd; static mempool_t *chunk_pool; struct class *rtrs_dev_class; +static struct rtrs_srv_ib_ctx ib_ctx; static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE; static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH; @@ -2042,6 +2044,70 @@ static void free_srv_ctx(struct rtrs_srv_ctx *ctx) kfree(ctx); } +static int rtrs_srv_add_one(struct ib_device *device) +{ + struct rtrs_srv_ctx *ctx; + int ret = 0; + + mutex_lock(&ib_ctx.ib_dev_mutex); + if (ib_ctx.ib_dev_count) + goto out; + + /* + * Since our CM IDs are NOT bound to any ib device we will create them + * only once + */ + ctx = ib_ctx.srv_ctx; + ret = rtrs_srv_rdma_init(ctx, ib_ctx.port); + if (ret) { + /* + * We errored out here. + * According to the ib code, if we encounter an error here then the + * error code is ignored, and no more calls to our ops are made. + */ + pr_err("Failed to initialize RDMA connection"); + goto err_out; + } + +out: + /* + * Keep a track on the number of ib devices added + */ + ib_ctx.ib_dev_count++; + +err_out: + mutex_unlock(&ib_ctx.ib_dev_mutex); + return ret; +} + +static void rtrs_srv_remove_one(struct ib_device *device, void *client_data) +{ + struct rtrs_srv_ctx *ctx; + + mutex_lock(&ib_ctx.ib_dev_mutex); + ib_ctx.ib_dev_count--; + + if (ib_ctx.ib_dev_count) + goto out; + + /* + * Since our CM IDs are NOT bound to any ib device we will remove them + * only once, when the last device is removed + */ + ctx = ib_ctx.srv_ctx; + rdma_destroy_id(ctx->cm_id_ip); + rdma_destroy_id(ctx->cm_id_ib); + +out: + mutex_unlock(&ib_ctx.ib_dev_mutex); +} + +static struct ib_client rtrs_srv_client = { + .name = "rtrs_server", + .add = rtrs_srv_add_one, + .remove = rtrs_srv_remove_one +}; + /** * rtrs_srv_open() - open RTRS server context * @ops: callback functions @@ -2060,7 +2126,11 @@ struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port) if (!ctx) return ERR_PTR(-ENOMEM); - err = rtrs_srv_rdma_init(ctx, port); + mutex_init(&ib_ctx.ib_dev_mutex); + ib_ctx.srv_ctx = ctx; + ib_ctx.port = port; + + err = ib_register_client(&rtrs_srv_client); if (err) { free_srv_ctx(ctx); return ERR_PTR(err); @@ -2099,8 +2169,8 @@ static void close_ctx(struct rtrs_srv_ctx *ctx) */ void rtrs_srv_close(struct rtrs_srv_ctx *ctx) { - rdma_destroy_id(ctx->cm_id_ip); - rdma_destroy_id(ctx->cm_id_ib); + ib_unregister_client(&rtrs_srv_client); + mutex_destroy(&ib_ctx.ib_dev_mutex); close_ctx(ctx); free_srv_ctx(ctx); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index dc95b0932f0d..08b0b8a6eebe 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -118,6 +118,13 @@ struct rtrs_srv_ctx { struct list_head srv_list; }; +struct rtrs_srv_ib_ctx { + struct rtrs_srv_ctx *srv_ctx; + u16 port; + struct mutex ib_dev_mutex; + int ib_dev_count; +}; + extern struct class *rtrs_dev_class; void close_sess(struct rtrs_srv_sess *sess); diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index f96287c4b789..0f4c2d823de8 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -91,7 +91,7 @@ config MTD_MCHP23K256 config MTD_SPEAR_SMI tristate "SPEAR MTD NOR Support through SMI controller" - depends on PLAT_SPEAR + depends on PLAT_SPEAR || COMPILE_TEST default y help This enable SNOR support on SPEAR platforms using SMI controller diff --git a/drivers/mtd/devices/lart.c b/drivers/mtd/devices/lart.c index 56f50d27b7fd..aecd441e4183 100644 --- a/drivers/mtd/devices/lart.c +++ b/drivers/mtd/devices/lart.c @@ -436,7 +436,10 @@ static int flash_read (struct mtd_info *mtd,loff_t from,size_t len,size_t *retle { int gap = BUSWIDTH - (from & (BUSWIDTH - 1)); - while (len && gap--) *buf++ = read8 (from++), len--; + while (len && gap--) { + *buf++ = read8 (from++); + len--; + } } /* now we read dwords until we reach a non-dword boundary */ @@ -518,7 +521,10 @@ static int flash_write (struct mtd_info *mtd,loff_t to,size_t len,size_t *retlen i = n = 0; while (gap--) tmp[i++] = 0xFF; - while (len && i < BUSWIDTH) tmp[i++] = buf[n++], len--; + while (len && i < BUSWIDTH) { + tmp[i++] = buf[n++]; + len--; + } while (i < BUSWIDTH) tmp[i++] = 0xFF; if (!write_dword (aligned,*((__u32 *) tmp))) return (-EIO); diff --git a/drivers/mtd/devices/spear_smi.c b/drivers/mtd/devices/spear_smi.c index 79dcca16481d..2e00862389dd 100644 --- a/drivers/mtd/devices/spear_smi.c +++ b/drivers/mtd/devices/spear_smi.c @@ -793,7 +793,7 @@ static int spear_smi_probe_config_dt(struct platform_device *pdev, struct device_node *np) { struct spear_smi_plat_data *pdata = dev_get_platdata(&pdev->dev); - struct device_node *pp = NULL; + struct device_node *pp; const __be32 *addr; u32 val; int len; @@ -812,7 +812,7 @@ static int spear_smi_probe_config_dt(struct platform_device *pdev, return -ENOMEM; /* Fill structs for each subnode (flash device) */ - while ((pp = of_get_next_child(np, pp))) { + for_each_child_of_node(np, pp) { pdata->np[i] = pp; /* Read base-addr and size from DT */ diff --git a/drivers/mtd/hyperbus/Kconfig b/drivers/mtd/hyperbus/Kconfig index a4d8968d133d..46c7e407e378 100644 --- a/drivers/mtd/hyperbus/Kconfig +++ b/drivers/mtd/hyperbus/Kconfig @@ -22,4 +22,11 @@ config HBMC_AM654 This is the driver for HyperBus controller on TI's AM65x and other SoCs +config RPCIF_HYPERBUS + tristate "Renesas RPC-IF HyperBus driver" + depends on RENESAS_RPCIF + depends on MTD_CFI_BE_BYTE_SWAP + help + This option includes Renesas RPC-IF HyperBus support. + endif # MTD_HYPERBUS diff --git a/drivers/mtd/hyperbus/Makefile b/drivers/mtd/hyperbus/Makefile index 8a936e066f48..5fc2b5124542 100644 --- a/drivers/mtd/hyperbus/Makefile +++ b/drivers/mtd/hyperbus/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_MTD_HYPERBUS) += hyperbus-core.o obj-$(CONFIG_HBMC_AM654) += hbmc-am654.o +obj-$(CONFIG_RPCIF_HYPERBUS) += rpc-if.o diff --git a/drivers/mtd/hyperbus/hbmc-am654.c b/drivers/mtd/hyperbus/hbmc-am654.c index e0e33f6bf513..a3439b791eeb 100644 --- a/drivers/mtd/hyperbus/hbmc-am654.c +++ b/drivers/mtd/hyperbus/hbmc-am654.c @@ -3,6 +3,10 @@ // Copyright (C) 2019 Texas Instruments Incorporated - https://www.ti.com/ // Author: Vignesh Raghavendra +#include +#include +#include +#include #include #include #include @@ -13,11 +17,18 @@ #include #include #include -#include +#include #include #define AM654_HBMC_CALIB_COUNT 25 +struct am654_hbmc_device_priv { + struct completion rx_dma_complete; + phys_addr_t device_base; + struct hyperbus_ctlr *ctlr; + struct dma_chan *rx_chan; +}; + struct am654_hbmc_priv { struct hyperbus_ctlr ctlr; struct hyperbus_device hbdev; @@ -52,13 +63,103 @@ static int am654_hbmc_calibrate(struct hyperbus_device *hbdev) return ret; } +static void am654_hbmc_dma_callback(void *param) +{ + struct am654_hbmc_device_priv *priv = param; + + complete(&priv->rx_dma_complete); +} + +static int am654_hbmc_dma_read(struct am654_hbmc_device_priv *priv, void *to, + unsigned long from, ssize_t len) + +{ + enum dma_ctrl_flags flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; + struct dma_chan *rx_chan = priv->rx_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dst, dma_src; + dma_cookie_t cookie; + int ret; + + if (!priv->rx_chan || !virt_addr_valid(to) || object_is_on_stack(to)) + return -EINVAL; + + dma_dst = dma_map_single(rx_chan->device->dev, to, len, DMA_FROM_DEVICE); + if (dma_mapping_error(rx_chan->device->dev, dma_dst)) { + dev_dbg(priv->ctlr->dev, "DMA mapping failed\n"); + return -EIO; + } + + dma_src = priv->device_base + from; + tx = dmaengine_prep_dma_memcpy(rx_chan, dma_dst, dma_src, len, flags); + if (!tx) { + dev_err(priv->ctlr->dev, "device_prep_dma_memcpy error\n"); + ret = -EIO; + goto unmap_dma; + } + + reinit_completion(&priv->rx_dma_complete); + tx->callback = am654_hbmc_dma_callback; + tx->callback_param = priv; + cookie = dmaengine_submit(tx); + + ret = dma_submit_error(cookie); + if (ret) { + dev_err(priv->ctlr->dev, "dma_submit_error %d\n", cookie); + goto unmap_dma; + } + + dma_async_issue_pending(rx_chan); + if (!wait_for_completion_timeout(&priv->rx_dma_complete, msecs_to_jiffies(len + 1000))) { + dmaengine_terminate_sync(rx_chan); + dev_err(priv->ctlr->dev, "DMA wait_for_completion_timeout\n"); + ret = -ETIMEDOUT; + } + +unmap_dma: + dma_unmap_single(rx_chan->device->dev, dma_dst, len, DMA_FROM_DEVICE); + return ret; +} + +static void am654_hbmc_read(struct hyperbus_device *hbdev, void *to, + unsigned long from, ssize_t len) +{ + struct am654_hbmc_device_priv *priv = hbdev->priv; + + if (len < SZ_1K || am654_hbmc_dma_read(priv, to, from, len)) + memcpy_fromio(to, hbdev->map.virt + from, len); +} + static const struct hyperbus_ops am654_hbmc_ops = { .calibrate = am654_hbmc_calibrate, + .copy_from = am654_hbmc_read, }; +static int am654_hbmc_request_mmap_dma(struct am654_hbmc_device_priv *priv) +{ + struct dma_chan *rx_chan; + dma_cap_mask_t mask; + + dma_cap_zero(mask); + dma_cap_set(DMA_MEMCPY, mask); + + rx_chan = dma_request_chan_by_mask(&mask); + if (IS_ERR(rx_chan)) { + if (PTR_ERR(rx_chan) == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_dbg(priv->ctlr->dev, "No DMA channel available\n"); + return 0; + } + priv->rx_chan = rx_chan; + init_completion(&priv->rx_dma_complete); + + return 0; +} + static int am654_hbmc_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; + struct am654_hbmc_device_priv *dev_priv; struct device *dev = &pdev->dev; struct am654_hbmc_priv *priv; struct resource res; @@ -70,7 +171,8 @@ static int am654_hbmc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); - ret = of_address_to_resource(np, 0, &res); + priv->hbdev.np = of_get_next_child(np, NULL); + ret = of_address_to_resource(priv->hbdev.np, 0, &res); if (ret) return ret; @@ -88,13 +190,6 @@ static int am654_hbmc_probe(struct platform_device *pdev) priv->mux_ctrl = control; } - pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); - if (ret < 0) { - pm_runtime_put_noidle(dev); - goto disable_pm; - } - priv->hbdev.map.size = resource_size(&res); priv->hbdev.map.virt = devm_ioremap_resource(dev, &res); if (IS_ERR(priv->hbdev.map.virt)) @@ -103,17 +198,32 @@ static int am654_hbmc_probe(struct platform_device *pdev) priv->ctlr.dev = dev; priv->ctlr.ops = &am654_hbmc_ops; priv->hbdev.ctlr = &priv->ctlr; - priv->hbdev.np = of_get_next_child(dev->of_node, NULL); + + dev_priv = devm_kzalloc(dev, sizeof(*dev_priv), GFP_KERNEL); + if (!dev_priv) { + ret = -ENOMEM; + goto disable_mux; + } + + priv->hbdev.priv = dev_priv; + dev_priv->device_base = res.start; + dev_priv->ctlr = &priv->ctlr; + + ret = am654_hbmc_request_mmap_dma(dev_priv); + if (ret) + goto disable_mux; + ret = hyperbus_register_device(&priv->hbdev); if (ret) { dev_err(dev, "failed to register controller\n"); - pm_runtime_put_sync(&pdev->dev); - goto disable_pm; + goto release_dma; } return 0; -disable_pm: - pm_runtime_disable(dev); +release_dma: + if (dev_priv->rx_chan) + dma_release_channel(dev_priv->rx_chan); +disable_mux: if (priv->mux_ctrl) mux_control_deselect(priv->mux_ctrl); return ret; @@ -122,13 +232,15 @@ disable_pm: static int am654_hbmc_remove(struct platform_device *pdev) { struct am654_hbmc_priv *priv = platform_get_drvdata(pdev); + struct am654_hbmc_device_priv *dev_priv = priv->hbdev.priv; int ret; ret = hyperbus_unregister_device(&priv->hbdev); if (priv->mux_ctrl) mux_control_deselect(priv->mux_ctrl); - pm_runtime_put_sync(&pdev->dev); - pm_runtime_disable(&pdev->dev); + + if (dev_priv->rx_chan) + dma_release_channel(dev_priv->rx_chan); return ret; } diff --git a/drivers/mtd/hyperbus/rpc-if.c b/drivers/mtd/hyperbus/rpc-if.c new file mode 100644 index 000000000000..ecb050ba95cd --- /dev/null +++ b/drivers/mtd/hyperbus/rpc-if.c @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Linux driver for RPC-IF HyperFlash + * + * Copyright (C) 2019-2020 Cogent Embedded, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +struct rpcif_hyperbus { + struct rpcif rpc; + struct hyperbus_ctlr ctlr; + struct hyperbus_device hbdev; +}; + +static const struct rpcif_op rpcif_op_tmpl = { + .cmd = { + .buswidth = 8, + .ddr = true, + }, + .ocmd = { + .buswidth = 8, + .ddr = true, + }, + .addr = { + .nbytes = 1, + .buswidth = 8, + .ddr = true, + }, + .data = { + .buswidth = 8, + .ddr = true, + }, +}; + +static void rpcif_hb_prepare_read(struct rpcif *rpc, void *to, + unsigned long from, ssize_t len) +{ + struct rpcif_op op = rpcif_op_tmpl; + + op.cmd.opcode = HYPERBUS_RW_READ | HYPERBUS_AS_MEM; + op.addr.val = from >> 1; + op.dummy.buswidth = 1; + op.dummy.ncycles = 15; + op.data.dir = RPCIF_DATA_IN; + op.data.nbytes = len; + op.data.buf.in = to; + + rpcif_prepare(rpc, &op, NULL, NULL); +} + +static void rpcif_hb_prepare_write(struct rpcif *rpc, unsigned long to, + void *from, ssize_t len) +{ + struct rpcif_op op = rpcif_op_tmpl; + + op.cmd.opcode = HYPERBUS_RW_WRITE | HYPERBUS_AS_MEM; + op.addr.val = to >> 1; + op.data.dir = RPCIF_DATA_OUT; + op.data.nbytes = len; + op.data.buf.out = from; + + rpcif_prepare(rpc, &op, NULL, NULL); +} + +static u16 rpcif_hb_read16(struct hyperbus_device *hbdev, unsigned long addr) +{ + struct rpcif_hyperbus *hyperbus = + container_of(hbdev, struct rpcif_hyperbus, hbdev); + map_word data; + + rpcif_hb_prepare_read(&hyperbus->rpc, &data, addr, 2); + + rpcif_manual_xfer(&hyperbus->rpc); + + return data.x[0]; +} + +static void rpcif_hb_write16(struct hyperbus_device *hbdev, unsigned long addr, + u16 data) +{ + struct rpcif_hyperbus *hyperbus = + container_of(hbdev, struct rpcif_hyperbus, hbdev); + + rpcif_hb_prepare_write(&hyperbus->rpc, addr, &data, 2); + + rpcif_manual_xfer(&hyperbus->rpc); +} + +static void rpcif_hb_copy_from(struct hyperbus_device *hbdev, void *to, + unsigned long from, ssize_t len) +{ + struct rpcif_hyperbus *hyperbus = + container_of(hbdev, struct rpcif_hyperbus, hbdev); + + rpcif_hb_prepare_read(&hyperbus->rpc, to, from, len); + + rpcif_dirmap_read(&hyperbus->rpc, from, len, to); +} + +static const struct hyperbus_ops rpcif_hb_ops = { + .read16 = rpcif_hb_read16, + .write16 = rpcif_hb_write16, + .copy_from = rpcif_hb_copy_from, +}; + +static int rpcif_hb_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct rpcif_hyperbus *hyperbus; + int error; + + hyperbus = devm_kzalloc(dev, sizeof(*hyperbus), GFP_KERNEL); + if (!hyperbus) + return -ENOMEM; + + rpcif_sw_init(&hyperbus->rpc, pdev->dev.parent); + + platform_set_drvdata(pdev, hyperbus); + + rpcif_enable_rpm(&hyperbus->rpc); + + rpcif_hw_init(&hyperbus->rpc, true); + + hyperbus->hbdev.map.size = hyperbus->rpc.size; + hyperbus->hbdev.map.virt = hyperbus->rpc.dirmap; + + hyperbus->ctlr.dev = dev; + hyperbus->ctlr.ops = &rpcif_hb_ops; + hyperbus->hbdev.ctlr = &hyperbus->ctlr; + hyperbus->hbdev.np = of_get_next_child(pdev->dev.parent->of_node, NULL); + error = hyperbus_register_device(&hyperbus->hbdev); + if (error) + rpcif_disable_rpm(&hyperbus->rpc); + + return error; +} + +static int rpcif_hb_remove(struct platform_device *pdev) +{ + struct rpcif_hyperbus *hyperbus = platform_get_drvdata(pdev); + int error = hyperbus_unregister_device(&hyperbus->hbdev); + struct rpcif *rpc = dev_get_drvdata(pdev->dev.parent); + + rpcif_disable_rpm(rpc); + return error; +} + +static struct platform_driver rpcif_platform_driver = { + .probe = rpcif_hb_probe, + .remove = rpcif_hb_remove, + .driver = { + .name = "rpc-if-hyperflash", + }, +}; + +module_platform_driver(rpcif_platform_driver); + +MODULE_DESCRIPTION("Renesas RPC-IF HyperFlash driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/mtd/lpddr/lpddr2_nvm.c b/drivers/mtd/lpddr/lpddr2_nvm.c index 0f1547f09d08..72f5c7b30079 100644 --- a/drivers/mtd/lpddr/lpddr2_nvm.c +++ b/drivers/mtd/lpddr/lpddr2_nvm.c @@ -393,6 +393,17 @@ static int lpddr2_nvm_lock(struct mtd_info *mtd, loff_t start_add, return lpddr2_nvm_do_block_op(mtd, start_add, len, LPDDR2_NVM_LOCK); } +static const struct mtd_info lpddr2_nvm_mtd_info = { + .type = MTD_RAM, + .writesize = 1, + .flags = (MTD_CAP_NVRAM | MTD_POWERUP_LOCK), + ._read = lpddr2_nvm_read, + ._write = lpddr2_nvm_write, + ._erase = lpddr2_nvm_erase, + ._unlock = lpddr2_nvm_unlock, + ._lock = lpddr2_nvm_lock, +}; + /* * lpddr2_nvm driver probe method */ @@ -433,6 +444,7 @@ static int lpddr2_nvm_probe(struct platform_device *pdev) .pfow_base = OW_BASE_ADDRESS, .fldrv_priv = pcm_data, }; + if (IS_ERR(map->virt)) return PTR_ERR(map->virt); @@ -444,22 +456,13 @@ static int lpddr2_nvm_probe(struct platform_device *pdev) return PTR_ERR(pcm_data->ctl_regs); /* Populate mtd_info data structure */ - *mtd = (struct mtd_info) { - .dev = { .parent = &pdev->dev }, - .name = pdev->dev.init_name, - .type = MTD_RAM, - .priv = map, - .size = resource_size(add_range), - .erasesize = ERASE_BLOCKSIZE * pcm_data->bus_width, - .writesize = 1, - .writebufsize = WRITE_BUFFSIZE * pcm_data->bus_width, - .flags = (MTD_CAP_NVRAM | MTD_POWERUP_LOCK), - ._read = lpddr2_nvm_read, - ._write = lpddr2_nvm_write, - ._erase = lpddr2_nvm_erase, - ._unlock = lpddr2_nvm_unlock, - ._lock = lpddr2_nvm_lock, - }; + *mtd = lpddr2_nvm_mtd_info; + mtd->dev.parent = &pdev->dev; + mtd->name = pdev->dev.init_name; + mtd->priv = map; + mtd->size = resource_size(add_range); + mtd->erasesize = ERASE_BLOCKSIZE * pcm_data->bus_width; + mtd->writebufsize = WRITE_BUFFSIZE * pcm_data->bus_width; /* Verify the presence of the device looking for PFOW string */ if (!lpddr2_nvm_pfow_present(map)) { diff --git a/drivers/mtd/lpddr/lpddr_cmds.c b/drivers/mtd/lpddr/lpddr_cmds.c index fb1cbc9a2870..ee063baed136 100644 --- a/drivers/mtd/lpddr/lpddr_cmds.c +++ b/drivers/mtd/lpddr/lpddr_cmds.c @@ -94,6 +94,34 @@ struct mtd_info *lpddr_cmdset(struct map_info *map) } EXPORT_SYMBOL(lpddr_cmdset); +static void print_drs_error(unsigned int dsr) +{ + int prog_status = (dsr & DSR_RPS) >> 8; + + if (!(dsr & DSR_AVAILABLE)) + pr_notice("DSR.15: (0) Device not Available\n"); + if ((prog_status & 0x03) == 0x03) + pr_notice("DSR.9,8: (11) Attempt to program invalid half with 41h command\n"); + else if (prog_status & 0x02) + pr_notice("DSR.9,8: (10) Object Mode Program attempt in region with Control Mode data\n"); + else if (prog_status & 0x01) + pr_notice("DSR.9,8: (01) Program attempt in region with Object Mode data\n"); + if (!(dsr & DSR_READY_STATUS)) + pr_notice("DSR.7: (0) Device is Busy\n"); + if (dsr & DSR_ESS) + pr_notice("DSR.6: (1) Erase Suspended\n"); + if (dsr & DSR_ERASE_STATUS) + pr_notice("DSR.5: (1) Erase/Blank check error\n"); + if (dsr & DSR_PROGRAM_STATUS) + pr_notice("DSR.4: (1) Program Error\n"); + if (dsr & DSR_VPPS) + pr_notice("DSR.3: (1) Vpp low detect, operation aborted\n"); + if (dsr & DSR_PSS) + pr_notice("DSR.2: (1) Program suspended\n"); + if (dsr & DSR_DPS) + pr_notice("DSR.1: (1) Aborted Erase/Program attempt on locked block\n"); +} + static int wait_for_ready(struct map_info *map, struct flchip *chip, unsigned int chip_op_time) { diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index fd37553f1b07..6650acbc961e 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -75,6 +75,17 @@ config MTD_PHYSMAP_OF physically into the CPU's memory. The mapping description here is taken from OF device tree. +config MTD_PHYSMAP_BT1_ROM + bool "Baikal-T1 Boot ROMs OF-based physical memory map handling" + depends on MTD_PHYSMAP_OF + depends on MIPS_BAIKAL_T1 || COMPILE_TEST + select MTD_COMPLEX_MAPPINGS + select MULTIPLEXER + select MUX_MMIO + help + This provides some extra DT physmap parsing for the Baikal-T1 + platforms, some detection and setting up ROMs-specific accessors. + config MTD_PHYSMAP_VERSATILE bool "ARM Versatile OF-based physical memory map handling" depends on MTD_PHYSMAP_OF diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile index c0da86a5d26f..79f018cf412f 100644 --- a/drivers/mtd/maps/Makefile +++ b/drivers/mtd/maps/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_MTD_CK804XROM) += ck804xrom.o obj-$(CONFIG_MTD_TSUNAMI) += tsunami_flash.o obj-$(CONFIG_MTD_PXA2XX) += pxa2xx-flash.o physmap-objs-y += physmap-core.o +physmap-objs-$(CONFIG_MTD_PHYSMAP_BT1_ROM) += physmap-bt1-rom.o physmap-objs-$(CONFIG_MTD_PHYSMAP_VERSATILE) += physmap-versatile.o physmap-objs-$(CONFIG_MTD_PHYSMAP_GEMINI) += physmap-gemini.o physmap-objs-$(CONFIG_MTD_PHYSMAP_IXP4XX) += physmap-ixp4xx.o diff --git a/drivers/mtd/maps/physmap-bt1-rom.c b/drivers/mtd/maps/physmap-bt1-rom.c new file mode 100644 index 000000000000..27cfe1c63a2e --- /dev/null +++ b/drivers/mtd/maps/physmap-bt1-rom.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 BAIKAL ELECTRONICS, JSC + * + * Authors: + * Serge Semin + * + * Baikal-T1 Physically Mapped Internal ROM driver + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "physmap-bt1-rom.h" + +/* + * Baikal-T1 SoC ROMs are only accessible by the dword-aligned instructions. + * We have to take this into account when implementing the data read-methods. + * Note there is no need in bothering with endianness, since both Baikal-T1 + * CPU and MMIO are LE. + */ +static map_word __xipram bt1_rom_map_read(struct map_info *map, + unsigned long ofs) +{ + void __iomem *src = map->virt + ofs; + unsigned long shift; + map_word ret; + u32 data; + + /* Read data within offset dword. */ + shift = (unsigned long)src & 0x3; + data = readl_relaxed(src - shift); + if (!shift) { + ret.x[0] = data; + return ret; + } + ret.x[0] = data >> (shift * BITS_PER_BYTE); + + /* Read data from the next dword. */ + shift = 4 - shift; + if (ofs + shift >= map->size) + return ret; + + data = readl_relaxed(src + shift); + ret.x[0] |= data << (shift * BITS_PER_BYTE); + + return ret; +} + +static void __xipram bt1_rom_map_copy_from(struct map_info *map, + void *to, unsigned long from, + ssize_t len) +{ + void __iomem *src = map->virt + from; + ssize_t shift, chunk; + u32 data; + + if (len <= 0 || from >= map->size) + return; + + /* Make sure we don't go over the map limit. */ + len = min_t(ssize_t, map->size - from, len); + + /* + * Since requested data size can be pretty big we have to implement + * the copy procedure as optimal as possible. That's why it's split + * up into the next three stages: unaligned head, aligned body, + * unaligned tail. + */ + shift = (ssize_t)src & 0x3; + if (shift) { + chunk = min_t(ssize_t, 4 - shift, len); + data = readl_relaxed(src - shift); + memcpy(to, &data + shift, chunk); + src += chunk; + to += chunk; + len -= chunk; + } + + while (len >= 4) { + data = readl_relaxed(src); + memcpy(to, &data, 4); + src += 4; + to += 4; + len -= 4; + } + + if (len) { + data = readl_relaxed(src); + memcpy(to, &data, len); + } +} + +int of_flash_probe_bt1_rom(struct platform_device *pdev, + struct device_node *np, + struct map_info *map) +{ + struct device *dev = &pdev->dev; + + /* It's supposed to be read-only MTD. */ + if (!of_device_is_compatible(np, "mtd-rom")) { + dev_info(dev, "No mtd-rom compatible string\n"); + return 0; + } + + /* Multiplatform guard. */ + if (!of_device_is_compatible(np, "baikal,bt1-int-rom")) + return 0; + + /* Sanity check the device parameters retrieved from DTB. */ + if (map->bankwidth != 4) + dev_warn(dev, "Bank width is supposed to be 32 bits wide\n"); + + map->read = bt1_rom_map_read; + map->copy_from = bt1_rom_map_copy_from; + + return 0; +} diff --git a/drivers/mtd/maps/physmap-bt1-rom.h b/drivers/mtd/maps/physmap-bt1-rom.h new file mode 100644 index 000000000000..6782899598a4 --- /dev/null +++ b/drivers/mtd/maps/physmap-bt1-rom.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#include +#include + +#ifdef CONFIG_MTD_PHYSMAP_BT1_ROM +int of_flash_probe_bt1_rom(struct platform_device *pdev, + struct device_node *np, + struct map_info *map); +#else +static inline +int of_flash_probe_bt1_rom(struct platform_device *pdev, + struct device_node *np, + struct map_info *map) +{ + return 0; +} +#endif diff --git a/drivers/mtd/maps/physmap-core.c b/drivers/mtd/maps/physmap-core.c index 8f7f966fa9a7..001ed5deb622 100644 --- a/drivers/mtd/maps/physmap-core.c +++ b/drivers/mtd/maps/physmap-core.c @@ -41,6 +41,7 @@ #include #include +#include "physmap-bt1-rom.h" #include "physmap-gemini.h" #include "physmap-ixp4xx.h" #include "physmap-versatile.h" @@ -371,6 +372,10 @@ static int physmap_flash_of_init(struct platform_device *dev) info->maps[i].bankwidth = bankwidth; info->maps[i].device_node = dp; + err = of_flash_probe_bt1_rom(dev, dp, &info->maps[i]); + if (err) + return err; + err = of_flash_probe_gemini(dev, dp, &info->maps[i]); if (err) return err; @@ -515,7 +520,8 @@ static int physmap_flash_probe(struct platform_device *dev) dev_notice(&dev->dev, "physmap platform flash device: %pR\n", res); - info->maps[i].name = dev_name(&dev->dev); + if (!info->maps[i].name) + info->maps[i].name = dev_name(&dev->dev); if (!info->maps[i].phys) info->maps[i].phys = res->start; diff --git a/drivers/mtd/maps/vmu-flash.c b/drivers/mtd/maps/vmu-flash.c index 177bf134e189..a7ec947a3ebb 100644 --- a/drivers/mtd/maps/vmu-flash.c +++ b/drivers/mtd/maps/vmu-flash.c @@ -40,7 +40,7 @@ struct memcard { u32 blocklen; u32 writecnt; u32 readcnt; - u32 removeable; + u32 removable; int partition; int read; unsigned char *blockread; @@ -619,7 +619,7 @@ static int vmu_connect(struct maple_device *mdev) card->blocklen = ((basic_flash_data >> 16 & 0xFF) + 1) << 5; card->writecnt = basic_flash_data >> 12 & 0xF; card->readcnt = basic_flash_data >> 8 & 0xF; - card->removeable = basic_flash_data >> 7 & 1; + card->removable = basic_flash_data >> 7 & 1; card->partition = 0; @@ -772,7 +772,6 @@ static void vmu_file_error(struct maple_device *mdev, void *recvbuf) static int probe_maple_vmu(struct device *dev) { - int error; struct maple_device *mdev = to_maple_dev(dev); struct maple_driver *mdrv = to_maple_driver(dev->driver); @@ -780,11 +779,7 @@ static int probe_maple_vmu(struct device *dev) mdev->fileerr_handler = vmu_file_error; mdev->driver = mdrv; - error = vmu_connect(mdev); - if (error) - return error; - - return 0; + return vmu_connect(mdev); } static int remove_maple_vmu(struct device *dev) diff --git a/drivers/mtd/mtdconcat.c b/drivers/mtd/mtdconcat.c index 1d6c9e7e7b7d..6e4d0017c0bd 100644 --- a/drivers/mtd/mtdconcat.c +++ b/drivers/mtd/mtdconcat.c @@ -102,6 +102,47 @@ concat_read(struct mtd_info *mtd, loff_t from, size_t len, return -EINVAL; } +static int +concat_panic_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t * retlen, const u_char * buf) +{ + struct mtd_concat *concat = CONCAT(mtd); + int err = -EINVAL; + int i; + for (i = 0; i < concat->num_subdev; i++) { + struct mtd_info *subdev = concat->subdev[i]; + size_t size, retsize; + + if (to >= subdev->size) { + to -= subdev->size; + continue; + } + if (to + len > subdev->size) + size = subdev->size - to; + else + size = len; + + err = mtd_panic_write(subdev, to, size, &retsize, buf); + if (err == -EOPNOTSUPP) { + printk(KERN_ERR "mtdconcat: Cannot write from panic without panic_write\n"); + return err; + } + if (err) + break; + + *retlen += retsize; + len -= size; + if (len == 0) + break; + + err = -EINVAL; + buf += size; + to = 0; + } + return err; +} + + static int concat_write(struct mtd_info *mtd, loff_t to, size_t len, size_t * retlen, const u_char * buf) @@ -648,6 +689,8 @@ struct mtd_info *mtd_concat_create(struct mtd_info *subdev[], /* subdevices to c concat->mtd._block_isbad = concat_block_isbad; if (subdev[0]->_block_markbad) concat->mtd._block_markbad = concat_block_markbad; + if (subdev[0]->_panic_write) + concat->mtd._panic_write = concat_panic_write; concat->mtd.ecc_stats.badblocks = subdev[0]->ecc_stats.badblocks; diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index b5e5d3140f57..e9e163ae9d86 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -335,7 +335,7 @@ static const struct device_type mtd_devtype = { .release = mtd_release, }; -static int mtd_partid_show(struct seq_file *s, void *p) +static int mtd_partid_debug_show(struct seq_file *s, void *p) { struct mtd_info *mtd = s->private; @@ -344,19 +344,9 @@ static int mtd_partid_show(struct seq_file *s, void *p) return 0; } -static int mtd_partid_debugfs_open(struct inode *inode, struct file *file) -{ - return single_open(file, mtd_partid_show, inode->i_private); -} +DEFINE_SHOW_ATTRIBUTE(mtd_partid_debug); -static const struct file_operations mtd_partid_debug_fops = { - .open = mtd_partid_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int mtd_partname_show(struct seq_file *s, void *p) +static int mtd_partname_debug_show(struct seq_file *s, void *p) { struct mtd_info *mtd = s->private; @@ -365,17 +355,7 @@ static int mtd_partname_show(struct seq_file *s, void *p) return 0; } -static int mtd_partname_debugfs_open(struct inode *inode, struct file *file) -{ - return single_open(file, mtd_partname_show, inode->i_private); -} - -static const struct file_operations mtd_partname_debug_fops = { - .open = mtd_partname_debugfs_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(mtd_partname_debug); static struct dentry *dfs_dir_mtd; diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index 4ced68be7ed7..774970bfcf85 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -279,12 +279,13 @@ static void mtdoops_do_dump(struct kmsg_dumper *dumper, kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, record_size - MTDOOPS_HEADER_SIZE, NULL); - /* Panics must be written immediately */ - if (reason != KMSG_DUMP_OOPS) + if (reason != KMSG_DUMP_OOPS) { + /* Panics must be written immediately */ mtdoops_write(cxt, 1); - - /* For other cases, schedule work to write it "nicely" */ - schedule_work(&cxt->work_write); + } else { + /* For other cases, schedule work to write it "nicely" */ + schedule_work(&cxt->work_write); + } } static void mtdoops_notify_add(struct mtd_info *mtd) diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index c1a45b071165..4a9aed4f0104 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -9,4 +9,12 @@ source "drivers/mtd/nand/onenand/Kconfig" source "drivers/mtd/nand/raw/Kconfig" source "drivers/mtd/nand/spi/Kconfig" +menu "ECC engine support" + +config MTD_NAND_ECC + bool + depends on MTD_NAND_CORE + +endmenu + endmenu diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index 7ecd80c0a66e..981372953b56 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -6,3 +6,5 @@ obj-$(CONFIG_MTD_NAND_CORE) += nandcore.o obj-y += onenand/ obj-y += raw/ obj-y += spi/ + +nandcore-$(CONFIG_MTD_NAND_ECC) += ecc.o diff --git a/drivers/mtd/nand/ecc.c b/drivers/mtd/nand/ecc.c new file mode 100644 index 000000000000..4a56e6c0da67 --- /dev/null +++ b/drivers/mtd/nand/ecc.c @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Generic Error-Correcting Code (ECC) engine + * + * Copyright (C) 2019 Macronix + * Author: + * Miquèl RAYNAL + * + * + * This file describes the abstraction of any NAND ECC engine. It has been + * designed to fit most cases, including parallel NANDs and SPI-NANDs. + * + * There are three main situations where instantiating this ECC engine makes + * sense: + * - external: The ECC engine is outside the NAND pipeline, typically this + * is a software ECC engine, or an hardware engine that is + * outside the NAND controller pipeline. + * - pipelined: The ECC engine is inside the NAND pipeline, ie. on the + * controller's side. This is the case of most of the raw NAND + * controllers. In the pipeline case, the ECC bytes are + * generated/data corrected on the fly when a page is + * written/read. + * - ondie: The ECC engine is inside the NAND pipeline, on the chip's side. + * Some NAND chips can correct themselves the data. + * + * Besides the initial setup and final cleanups, the interfaces are rather + * simple: + * - prepare: Prepare an I/O request. Enable/disable the ECC engine based on + * the I/O request type. In case of software correction or external + * engine, this step may involve to derive the ECC bytes and place + * them in the OOB area before a write. + * - finish: Finish an I/O request. Correct the data in case of a read + * request and report the number of corrected bits/uncorrectable + * errors. Most likely empty for write operations, unless you have + * hardware specific stuff to do, like shutting down the engine to + * save power. + * + * The I/O request should be enclosed in a prepare()/finish() pair of calls + * and will behave differently depending on the requested I/O type: + * - raw: Correction disabled + * - ecc: Correction enabled + * + * The request direction is impacting the logic as well: + * - read: Load data from the NAND chip + * - write: Store data in the NAND chip + * + * Mixing all this combinations together gives the following behavior. + * Those are just examples, drivers are free to add custom steps in their + * prepare/finish hook. + * + * [external ECC engine] + * - external + prepare + raw + read: do nothing + * - external + finish + raw + read: do nothing + * - external + prepare + raw + write: do nothing + * - external + finish + raw + write: do nothing + * - external + prepare + ecc + read: do nothing + * - external + finish + ecc + read: calculate expected ECC bytes, extract + * ECC bytes from OOB buffer, correct + * and report any bitflip/error + * - external + prepare + ecc + write: calculate ECC bytes and store them at + * the right place in the OOB buffer based + * on the OOB layout + * - external + finish + ecc + write: do nothing + * + * [pipelined ECC engine] + * - pipelined + prepare + raw + read: disable the controller's ECC engine if + * activated + * - pipelined + finish + raw + read: do nothing + * - pipelined + prepare + raw + write: disable the controller's ECC engine if + * activated + * - pipelined + finish + raw + write: do nothing + * - pipelined + prepare + ecc + read: enable the controller's ECC engine if + * deactivated + * - pipelined + finish + ecc + read: check the status, report any + * error/bitflip + * - pipelined + prepare + ecc + write: enable the controller's ECC engine if + * deactivated + * - pipelined + finish + ecc + write: do nothing + * + * [ondie ECC engine] + * - ondie + prepare + raw + read: send commands to disable the on-chip ECC + * engine if activated + * - ondie + finish + raw + read: do nothing + * - ondie + prepare + raw + write: send commands to disable the on-chip ECC + * engine if activated + * - ondie + finish + raw + write: do nothing + * - ondie + prepare + ecc + read: send commands to enable the on-chip ECC + * engine if deactivated + * - ondie + finish + ecc + read: send commands to check the status, report + * any error/bitflip + * - ondie + prepare + ecc + write: send commands to enable the on-chip ECC + * engine if deactivated + * - ondie + finish + ecc + write: do nothing + */ + +#include +#include + +/** + * nand_ecc_init_ctx - Init the ECC engine context + * @nand: the NAND device + * + * On success, the caller is responsible of calling @nand_ecc_cleanup_ctx(). + */ +int nand_ecc_init_ctx(struct nand_device *nand) +{ + if (!nand->ecc.engine->ops->init_ctx) + return 0; + + return nand->ecc.engine->ops->init_ctx(nand); +} +EXPORT_SYMBOL(nand_ecc_init_ctx); + +/** + * nand_ecc_cleanup_ctx - Cleanup the ECC engine context + * @nand: the NAND device + */ +void nand_ecc_cleanup_ctx(struct nand_device *nand) +{ + if (nand->ecc.engine->ops->cleanup_ctx) + nand->ecc.engine->ops->cleanup_ctx(nand); +} +EXPORT_SYMBOL(nand_ecc_cleanup_ctx); + +/** + * nand_ecc_prepare_io_req - Prepare an I/O request + * @nand: the NAND device + * @req: the I/O request + */ +int nand_ecc_prepare_io_req(struct nand_device *nand, + struct nand_page_io_req *req) +{ + if (!nand->ecc.engine->ops->prepare_io_req) + return 0; + + return nand->ecc.engine->ops->prepare_io_req(nand, req); +} +EXPORT_SYMBOL(nand_ecc_prepare_io_req); + +/** + * nand_ecc_finish_io_req - Finish an I/O request + * @nand: the NAND device + * @req: the I/O request + */ +int nand_ecc_finish_io_req(struct nand_device *nand, + struct nand_page_io_req *req) +{ + if (!nand->ecc.engine->ops->finish_io_req) + return 0; + + return nand->ecc.engine->ops->finish_io_req(nand, req); +} +EXPORT_SYMBOL(nand_ecc_finish_io_req); + +/* Define default OOB placement schemes for large and small page devices */ +static int nand_ooblayout_ecc_sp(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_device *nand = mtd_to_nanddev(mtd); + unsigned int total_ecc_bytes = nand->ecc.ctx.total; + + if (section > 1) + return -ERANGE; + + if (!section) { + oobregion->offset = 0; + if (mtd->oobsize == 16) + oobregion->length = 4; + else + oobregion->length = 3; + } else { + if (mtd->oobsize == 8) + return -ERANGE; + + oobregion->offset = 6; + oobregion->length = total_ecc_bytes - 4; + } + + return 0; +} + +static int nand_ooblayout_free_sp(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + if (section > 1) + return -ERANGE; + + if (mtd->oobsize == 16) { + if (section) + return -ERANGE; + + oobregion->length = 8; + oobregion->offset = 8; + } else { + oobregion->length = 2; + if (!section) + oobregion->offset = 3; + else + oobregion->offset = 6; + } + + return 0; +} + +static const struct mtd_ooblayout_ops nand_ooblayout_sp_ops = { + .ecc = nand_ooblayout_ecc_sp, + .free = nand_ooblayout_free_sp, +}; + +const struct mtd_ooblayout_ops *nand_get_small_page_ooblayout(void) +{ + return &nand_ooblayout_sp_ops; +} +EXPORT_SYMBOL_GPL(nand_get_small_page_ooblayout); + +static int nand_ooblayout_ecc_lp(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_device *nand = mtd_to_nanddev(mtd); + unsigned int total_ecc_bytes = nand->ecc.ctx.total; + + if (section || !total_ecc_bytes) + return -ERANGE; + + oobregion->length = total_ecc_bytes; + oobregion->offset = mtd->oobsize - oobregion->length; + + return 0; +} + +static int nand_ooblayout_free_lp(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_device *nand = mtd_to_nanddev(mtd); + unsigned int total_ecc_bytes = nand->ecc.ctx.total; + + if (section) + return -ERANGE; + + oobregion->length = mtd->oobsize - total_ecc_bytes - 2; + oobregion->offset = 2; + + return 0; +} + +static const struct mtd_ooblayout_ops nand_ooblayout_lp_ops = { + .ecc = nand_ooblayout_ecc_lp, + .free = nand_ooblayout_free_lp, +}; + +const struct mtd_ooblayout_ops *nand_get_large_page_ooblayout(void) +{ + return &nand_ooblayout_lp_ops; +} +EXPORT_SYMBOL_GPL(nand_get_large_page_ooblayout); + +/* + * Support the old "large page" layout used for 1-bit Hamming ECC where ECC + * are placed at a fixed offset. + */ +static int nand_ooblayout_ecc_lp_hamming(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_device *nand = mtd_to_nanddev(mtd); + unsigned int total_ecc_bytes = nand->ecc.ctx.total; + + if (section) + return -ERANGE; + + switch (mtd->oobsize) { + case 64: + oobregion->offset = 40; + break; + case 128: + oobregion->offset = 80; + break; + default: + return -EINVAL; + } + + oobregion->length = total_ecc_bytes; + if (oobregion->offset + oobregion->length > mtd->oobsize) + return -ERANGE; + + return 0; +} + +static int nand_ooblayout_free_lp_hamming(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + struct nand_device *nand = mtd_to_nanddev(mtd); + unsigned int total_ecc_bytes = nand->ecc.ctx.total; + int ecc_offset = 0; + + if (section < 0 || section > 1) + return -ERANGE; + + switch (mtd->oobsize) { + case 64: + ecc_offset = 40; + break; + case 128: + ecc_offset = 80; + break; + default: + return -EINVAL; + } + + if (section == 0) { + oobregion->offset = 2; + oobregion->length = ecc_offset - 2; + } else { + oobregion->offset = ecc_offset + total_ecc_bytes; + oobregion->length = mtd->oobsize - oobregion->offset; + } + + return 0; +} + +static const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = { + .ecc = nand_ooblayout_ecc_lp_hamming, + .free = nand_ooblayout_free_lp_hamming, +}; + +const struct mtd_ooblayout_ops *nand_get_large_page_hamming_ooblayout(void) +{ + return &nand_ooblayout_lp_hamming_ops; +} +EXPORT_SYMBOL_GPL(nand_get_large_page_hamming_ooblayout); + +static enum nand_ecc_engine_type +of_get_nand_ecc_engine_type(struct device_node *np) +{ + struct device_node *eng_np; + + if (of_property_read_bool(np, "nand-no-ecc-engine")) + return NAND_ECC_ENGINE_TYPE_NONE; + + if (of_property_read_bool(np, "nand-use-soft-ecc-engine")) + return NAND_ECC_ENGINE_TYPE_SOFT; + + eng_np = of_parse_phandle(np, "nand-ecc-engine", 0); + of_node_put(eng_np); + + if (eng_np) { + if (eng_np == np) + return NAND_ECC_ENGINE_TYPE_ON_DIE; + else + return NAND_ECC_ENGINE_TYPE_ON_HOST; + } + + return NAND_ECC_ENGINE_TYPE_INVALID; +} + +static const char * const nand_ecc_placement[] = { + [NAND_ECC_PLACEMENT_OOB] = "oob", + [NAND_ECC_PLACEMENT_INTERLEAVED] = "interleaved", +}; + +static enum nand_ecc_placement of_get_nand_ecc_placement(struct device_node *np) +{ + enum nand_ecc_placement placement; + const char *pm; + int err; + + err = of_property_read_string(np, "nand-ecc-placement", &pm); + if (!err) { + for (placement = NAND_ECC_PLACEMENT_OOB; + placement < ARRAY_SIZE(nand_ecc_placement); placement++) { + if (!strcasecmp(pm, nand_ecc_placement[placement])) + return placement; + } + } + + return NAND_ECC_PLACEMENT_UNKNOWN; +} + +static const char * const nand_ecc_algos[] = { + [NAND_ECC_ALGO_HAMMING] = "hamming", + [NAND_ECC_ALGO_BCH] = "bch", + [NAND_ECC_ALGO_RS] = "rs", +}; + +static enum nand_ecc_algo of_get_nand_ecc_algo(struct device_node *np) +{ + enum nand_ecc_algo ecc_algo; + const char *pm; + int err; + + err = of_property_read_string(np, "nand-ecc-algo", &pm); + if (!err) { + for (ecc_algo = NAND_ECC_ALGO_HAMMING; + ecc_algo < ARRAY_SIZE(nand_ecc_algos); + ecc_algo++) { + if (!strcasecmp(pm, nand_ecc_algos[ecc_algo])) + return ecc_algo; + } + } + + return NAND_ECC_ALGO_UNKNOWN; +} + +static int of_get_nand_ecc_step_size(struct device_node *np) +{ + int ret; + u32 val; + + ret = of_property_read_u32(np, "nand-ecc-step-size", &val); + return ret ? ret : val; +} + +static int of_get_nand_ecc_strength(struct device_node *np) +{ + int ret; + u32 val; + + ret = of_property_read_u32(np, "nand-ecc-strength", &val); + return ret ? ret : val; +} + +void of_get_nand_ecc_user_config(struct nand_device *nand) +{ + struct device_node *dn = nanddev_get_of_node(nand); + int strength, size; + + nand->ecc.user_conf.engine_type = of_get_nand_ecc_engine_type(dn); + nand->ecc.user_conf.algo = of_get_nand_ecc_algo(dn); + nand->ecc.user_conf.placement = of_get_nand_ecc_placement(dn); + + strength = of_get_nand_ecc_strength(dn); + if (strength >= 0) + nand->ecc.user_conf.strength = strength; + + size = of_get_nand_ecc_step_size(dn); + if (size >= 0) + nand->ecc.user_conf.step_size = size; + + if (of_property_read_bool(dn, "nand-ecc-maximize")) + nand->ecc.user_conf.flags |= NAND_ECC_MAXIMIZE_STRENGTH; +} +EXPORT_SYMBOL(of_get_nand_ecc_user_config); + +/** + * nand_ecc_is_strong_enough - Check if the chip configuration meets the + * datasheet requirements. + * + * @nand: Device to check + * + * If our configuration corrects A bits per B bytes and the minimum + * required correction level is X bits per Y bytes, then we must ensure + * both of the following are true: + * + * (1) A / B >= X / Y + * (2) A >= X + * + * Requirement (1) ensures we can correct for the required bitflip density. + * Requirement (2) ensures we can correct even when all bitflips are clumped + * in the same sector. + */ +bool nand_ecc_is_strong_enough(struct nand_device *nand) +{ + const struct nand_ecc_props *reqs = nanddev_get_ecc_requirements(nand); + const struct nand_ecc_props *conf = nanddev_get_ecc_conf(nand); + struct mtd_info *mtd = nanddev_to_mtd(nand); + int corr, ds_corr; + + if (conf->step_size == 0 || reqs->step_size == 0) + /* Not enough information */ + return true; + + /* + * We get the number of corrected bits per page to compare + * the correction density. + */ + corr = (mtd->writesize * conf->strength) / conf->step_size; + ds_corr = (mtd->writesize * reqs->strength) / reqs->step_size; + + return corr >= ds_corr && conf->strength >= reqs->strength; +} +EXPORT_SYMBOL(nand_ecc_is_strong_enough); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Miquel Raynal "); +MODULE_DESCRIPTION("Generic ECC engine"); diff --git a/drivers/mtd/nand/onenand/onenand_base.c b/drivers/mtd/nand/onenand/onenand_base.c index ec18ade33262..188b8061e1f7 100644 --- a/drivers/mtd/nand/onenand/onenand_base.c +++ b/drivers/mtd/nand/onenand/onenand_base.c @@ -1052,16 +1052,11 @@ static int onenand_transfer_auto_oob(struct mtd_info *mtd, uint8_t *buf, int col int thislen) { struct onenand_chip *this = mtd->priv; - int ret; this->read_bufferram(mtd, ONENAND_SPARERAM, this->oob_buf, 0, mtd->oobsize); - ret = mtd_ooblayout_get_databytes(mtd, buf, this->oob_buf, - column, thislen); - if (ret) - return ret; - - return 0; + return mtd_ooblayout_get_databytes(mtd, buf, this->oob_buf, + column, thislen); } /** diff --git a/drivers/mtd/nand/onenand/onenand_omap2.c b/drivers/mtd/nand/onenand/onenand_omap2.c index aa9368bf7a0c..d8c0bd002c2b 100644 --- a/drivers/mtd/nand/onenand/onenand_omap2.c +++ b/drivers/mtd/nand/onenand/onenand_omap2.c @@ -494,11 +494,8 @@ static int omap2_onenand_probe(struct platform_device *pdev) c->int_gpiod = devm_gpiod_get_optional(dev, "int", GPIOD_IN); if (IS_ERR(c->int_gpiod)) { - r = PTR_ERR(c->int_gpiod); /* Just try again if this happens */ - if (r != -EPROBE_DEFER) - dev_err(dev, "error getting gpio: %d\n", r); - return r; + return dev_err_probe(dev, PTR_ERR(c->int_gpiod), "error getting gpio\n"); } if (c->int_gpiod) { diff --git a/drivers/mtd/nand/raw/Kconfig b/drivers/mtd/nand/raw/Kconfig index 1203775023ad..6c46f25b57e2 100644 --- a/drivers/mtd/nand/raw/Kconfig +++ b/drivers/mtd/nand/raw/Kconfig @@ -13,6 +13,7 @@ config MTD_NAND_ECC_SW_HAMMING_SMC menuconfig MTD_RAW_NAND tristate "Raw/Parallel NAND Device Support" select MTD_NAND_CORE + select MTD_NAND_ECC select MTD_NAND_ECC_SW_HAMMING help This enables support for accessing all type of raw/parallel diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c index fdba155416d2..d3c5cc513c8f 100644 --- a/drivers/mtd/nand/raw/ams-delta.c +++ b/drivers/mtd/nand/raw/ams-delta.c @@ -260,8 +260,8 @@ static int gpio_nand_probe(struct platform_device *pdev) return err; } - this->ecc.mode = NAND_ECC_SOFT; - this->ecc.algo = NAND_ECC_HAMMING; + this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + this->ecc.algo = NAND_ECC_ALGO_HAMMING; platform_set_drvdata(pdev, priv); @@ -400,12 +400,14 @@ static int gpio_nand_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_OF static const struct of_device_id gpio_nand_of_id_table[] = { { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, gpio_nand_of_id_table); +#endif static const struct platform_device_id gpio_nand_plat_id_table[] = { { diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c index 12c643e97c85..fbb4ea751be8 100644 --- a/drivers/mtd/nand/raw/arasan-nand-controller.c +++ b/drivers/mtd/nand/raw/arasan-nand-controller.c @@ -980,10 +980,10 @@ static int anfc_init_hw_ecc_controller(struct arasan_nfc *nfc, return -EINVAL; } - mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); + mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout()); ecc->steps = mtd->writesize / ecc->size; - ecc->algo = NAND_ECC_BCH; + ecc->algo = NAND_ECC_ALGO_BCH; anand->ecc_bits = bch_gf_mag * ecc->strength; ecc->bytes = DIV_ROUND_UP(anand->ecc_bits, 8); anand->ecc_total = DIV_ROUND_UP(anand->ecc_bits * ecc->steps, 8); @@ -1056,17 +1056,17 @@ static int anfc_attach_chip(struct nand_chip *chip) chip->ecc.read_page_raw = nand_monolithic_read_page_raw; chip->ecc.write_page_raw = nand_monolithic_write_page_raw; - switch (chip->ecc.mode) { - case NAND_ECC_NONE: - case NAND_ECC_SOFT: - case NAND_ECC_ON_DIE: + switch (chip->ecc.engine_type) { + case NAND_ECC_ENGINE_TYPE_NONE: + case NAND_ECC_ENGINE_TYPE_SOFT: + case NAND_ECC_ENGINE_TYPE_ON_DIE: break; - case NAND_ECC_HW: + case NAND_ECC_ENGINE_TYPE_ON_HOST: ret = anfc_init_hw_ecc_controller(nfc, chip); break; default: dev_err(nfc->dev, "Unsupported ECC mode: %d\n", - chip->ecc.mode); + chip->ecc.engine_type); return -EINVAL; } diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c index c9818f548d07..e6ceec8f50dc 100644 --- a/drivers/mtd/nand/raw/atmel/nand-controller.c +++ b/drivers/mtd/nand/raw/atmel/nand-controller.c @@ -202,6 +202,8 @@ struct atmel_nand_controller_ops { int (*ecc_init)(struct nand_chip *chip); int (*setup_interface)(struct atmel_nand *nand, int csline, const struct nand_interface_config *conf); + int (*exec_op)(struct atmel_nand *nand, + const struct nand_operation *op, bool check_only); }; struct atmel_nand_controller_caps { @@ -259,6 +261,7 @@ struct atmel_hsmc_nand_controller { struct regmap *io; struct atmel_nfc_op op; struct completion complete; + u32 cfg; int irq; /* Only used when instantiating from legacy DT bindings. */ @@ -414,138 +417,6 @@ err: return -EIO; } -static u8 atmel_nand_read_byte(struct nand_chip *chip) -{ - struct atmel_nand *nand = to_atmel_nand(chip); - - return ioread8(nand->activecs->io.virt); -} - -static void atmel_nand_write_byte(struct nand_chip *chip, u8 byte) -{ - struct atmel_nand *nand = to_atmel_nand(chip); - - if (chip->options & NAND_BUSWIDTH_16) - iowrite16(byte | (byte << 8), nand->activecs->io.virt); - else - iowrite8(byte, nand->activecs->io.virt); -} - -static void atmel_nand_read_buf(struct nand_chip *chip, u8 *buf, int len) -{ - struct atmel_nand *nand = to_atmel_nand(chip); - struct atmel_nand_controller *nc; - - nc = to_nand_controller(chip->controller); - - /* - * If the controller supports DMA, the buffer address is DMA-able and - * len is long enough to make DMA transfers profitable, let's trigger - * a DMA transfer. If it fails, fallback to PIO mode. - */ - if (nc->dmac && virt_addr_valid(buf) && - len >= MIN_DMA_LEN && - !atmel_nand_dma_transfer(nc, buf, nand->activecs->io.dma, len, - DMA_FROM_DEVICE)) - return; - - if (chip->options & NAND_BUSWIDTH_16) - ioread16_rep(nand->activecs->io.virt, buf, len / 2); - else - ioread8_rep(nand->activecs->io.virt, buf, len); -} - -static void atmel_nand_write_buf(struct nand_chip *chip, const u8 *buf, int len) -{ - struct atmel_nand *nand = to_atmel_nand(chip); - struct atmel_nand_controller *nc; - - nc = to_nand_controller(chip->controller); - - /* - * If the controller supports DMA, the buffer address is DMA-able and - * len is long enough to make DMA transfers profitable, let's trigger - * a DMA transfer. If it fails, fallback to PIO mode. - */ - if (nc->dmac && virt_addr_valid(buf) && - len >= MIN_DMA_LEN && - !atmel_nand_dma_transfer(nc, (void *)buf, nand->activecs->io.dma, - len, DMA_TO_DEVICE)) - return; - - if (chip->options & NAND_BUSWIDTH_16) - iowrite16_rep(nand->activecs->io.virt, buf, len / 2); - else - iowrite8_rep(nand->activecs->io.virt, buf, len); -} - -static int atmel_nand_dev_ready(struct nand_chip *chip) -{ - struct atmel_nand *nand = to_atmel_nand(chip); - - return gpiod_get_value(nand->activecs->rb.gpio); -} - -static void atmel_nand_select_chip(struct nand_chip *chip, int cs) -{ - struct atmel_nand *nand = to_atmel_nand(chip); - - if (cs < 0 || cs >= nand->numcs) { - nand->activecs = NULL; - chip->legacy.dev_ready = NULL; - return; - } - - nand->activecs = &nand->cs[cs]; - - if (nand->activecs->rb.type == ATMEL_NAND_GPIO_RB) - chip->legacy.dev_ready = atmel_nand_dev_ready; -} - -static int atmel_hsmc_nand_dev_ready(struct nand_chip *chip) -{ - struct atmel_nand *nand = to_atmel_nand(chip); - struct atmel_hsmc_nand_controller *nc; - u32 status; - - nc = to_hsmc_nand_controller(chip->controller); - - regmap_read(nc->base.smc, ATMEL_HSMC_NFC_SR, &status); - - return status & ATMEL_HSMC_NFC_SR_RBEDGE(nand->activecs->rb.id); -} - -static void atmel_hsmc_nand_select_chip(struct nand_chip *chip, int cs) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct atmel_nand *nand = to_atmel_nand(chip); - struct atmel_hsmc_nand_controller *nc; - - nc = to_hsmc_nand_controller(chip->controller); - - atmel_nand_select_chip(chip, cs); - - if (!nand->activecs) { - regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL, - ATMEL_HSMC_NFC_CTRL_DIS); - return; - } - - if (nand->activecs->rb.type == ATMEL_NAND_NATIVE_RB) - chip->legacy.dev_ready = atmel_hsmc_nand_dev_ready; - - regmap_update_bits(nc->base.smc, ATMEL_HSMC_NFC_CFG, - ATMEL_HSMC_NFC_CFG_PAGESIZE_MASK | - ATMEL_HSMC_NFC_CFG_SPARESIZE_MASK | - ATMEL_HSMC_NFC_CFG_RSPARE | - ATMEL_HSMC_NFC_CFG_WSPARE, - ATMEL_HSMC_NFC_CFG_PAGESIZE(mtd->writesize) | - ATMEL_HSMC_NFC_CFG_SPARESIZE(mtd->oobsize) | - ATMEL_HSMC_NFC_CFG_RSPARE); - regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL, - ATMEL_HSMC_NFC_CTRL_EN); -} - static int atmel_nfc_exec_op(struct atmel_hsmc_nand_controller *nc, bool poll) { u8 *addrs = nc->op.addrs; @@ -596,51 +467,250 @@ static int atmel_nfc_exec_op(struct atmel_hsmc_nand_controller *nc, bool poll) return ret; } -static void atmel_hsmc_nand_cmd_ctrl(struct nand_chip *chip, int dat, - unsigned int ctrl) +static void atmel_nand_data_in(struct atmel_nand *nand, void *buf, + unsigned int len, bool force_8bit) +{ + struct atmel_nand_controller *nc; + + nc = to_nand_controller(nand->base.controller); + + /* + * If the controller supports DMA, the buffer address is DMA-able and + * len is long enough to make DMA transfers profitable, let's trigger + * a DMA transfer. If it fails, fallback to PIO mode. + */ + if (nc->dmac && virt_addr_valid(buf) && + len >= MIN_DMA_LEN && !force_8bit && + !atmel_nand_dma_transfer(nc, buf, nand->activecs->io.dma, len, + DMA_FROM_DEVICE)) + return; + + if ((nand->base.options & NAND_BUSWIDTH_16) && !force_8bit) + ioread16_rep(nand->activecs->io.virt, buf, len / 2); + else + ioread8_rep(nand->activecs->io.virt, buf, len); +} + +static void atmel_nand_data_out(struct atmel_nand *nand, const void *buf, + unsigned int len, bool force_8bit) +{ + struct atmel_nand_controller *nc; + + nc = to_nand_controller(nand->base.controller); + + /* + * If the controller supports DMA, the buffer address is DMA-able and + * len is long enough to make DMA transfers profitable, let's trigger + * a DMA transfer. If it fails, fallback to PIO mode. + */ + if (nc->dmac && virt_addr_valid(buf) && + len >= MIN_DMA_LEN && !force_8bit && + !atmel_nand_dma_transfer(nc, (void *)buf, nand->activecs->io.dma, + len, DMA_TO_DEVICE)) + return; + + if ((nand->base.options & NAND_BUSWIDTH_16) && !force_8bit) + iowrite16_rep(nand->activecs->io.virt, buf, len / 2); + else + iowrite8_rep(nand->activecs->io.virt, buf, len); +} + +static int atmel_nand_waitrdy(struct atmel_nand *nand, unsigned int timeout_ms) +{ + if (nand->activecs->rb.type == ATMEL_NAND_NO_RB) + return nand_soft_waitrdy(&nand->base, timeout_ms); + + return nand_gpio_waitrdy(&nand->base, nand->activecs->rb.gpio, + timeout_ms); +} + +static int atmel_hsmc_nand_waitrdy(struct atmel_nand *nand, + unsigned int timeout_ms) +{ + struct atmel_hsmc_nand_controller *nc; + u32 status, mask; + + if (nand->activecs->rb.type != ATMEL_NAND_NATIVE_RB) + return atmel_nand_waitrdy(nand, timeout_ms); + + nc = to_hsmc_nand_controller(nand->base.controller); + mask = ATMEL_HSMC_NFC_SR_RBEDGE(nand->activecs->rb.id); + return regmap_read_poll_timeout_atomic(nc->base.smc, ATMEL_HSMC_NFC_SR, + status, status & mask, + 10, timeout_ms * 1000); +} + +static void atmel_nand_select_target(struct atmel_nand *nand, + unsigned int cs) +{ + nand->activecs = &nand->cs[cs]; +} + +static void atmel_hsmc_nand_select_target(struct atmel_nand *nand, + unsigned int cs) +{ + struct mtd_info *mtd = nand_to_mtd(&nand->base); + struct atmel_hsmc_nand_controller *nc; + u32 cfg = ATMEL_HSMC_NFC_CFG_PAGESIZE(mtd->writesize) | + ATMEL_HSMC_NFC_CFG_SPARESIZE(mtd->oobsize) | + ATMEL_HSMC_NFC_CFG_RSPARE; + + nand->activecs = &nand->cs[cs]; + nc = to_hsmc_nand_controller(nand->base.controller); + if (nc->cfg == cfg) + return; + + regmap_update_bits(nc->base.smc, ATMEL_HSMC_NFC_CFG, + ATMEL_HSMC_NFC_CFG_PAGESIZE_MASK | + ATMEL_HSMC_NFC_CFG_SPARESIZE_MASK | + ATMEL_HSMC_NFC_CFG_RSPARE | + ATMEL_HSMC_NFC_CFG_WSPARE, + cfg); + nc->cfg = cfg; +} + +static int atmel_smc_nand_exec_instr(struct atmel_nand *nand, + const struct nand_op_instr *instr) +{ + struct atmel_nand_controller *nc; + unsigned int i; + + nc = to_nand_controller(nand->base.controller); + switch (instr->type) { + case NAND_OP_CMD_INSTR: + writeb(instr->ctx.cmd.opcode, + nand->activecs->io.virt + nc->caps->cle_offs); + return 0; + case NAND_OP_ADDR_INSTR: + for (i = 0; i < instr->ctx.addr.naddrs; i++) + writeb(instr->ctx.addr.addrs[i], + nand->activecs->io.virt + nc->caps->ale_offs); + return 0; + case NAND_OP_DATA_IN_INSTR: + atmel_nand_data_in(nand, instr->ctx.data.buf.in, + instr->ctx.data.len, + instr->ctx.data.force_8bit); + return 0; + case NAND_OP_DATA_OUT_INSTR: + atmel_nand_data_out(nand, instr->ctx.data.buf.out, + instr->ctx.data.len, + instr->ctx.data.force_8bit); + return 0; + case NAND_OP_WAITRDY_INSTR: + return atmel_nand_waitrdy(nand, + instr->ctx.waitrdy.timeout_ms); + default: + break; + } + + return -EINVAL; +} + +static int atmel_smc_nand_exec_op(struct atmel_nand *nand, + const struct nand_operation *op, + bool check_only) +{ + unsigned int i; + int ret = 0; + + if (check_only) + return 0; + + atmel_nand_select_target(nand, op->cs); + gpiod_set_value(nand->activecs->csgpio, 0); + for (i = 0; i < op->ninstrs; i++) { + ret = atmel_smc_nand_exec_instr(nand, &op->instrs[i]); + if (ret) + break; + } + gpiod_set_value(nand->activecs->csgpio, 1); + + return ret; +} + +static int atmel_hsmc_exec_cmd_addr(struct nand_chip *chip, + const struct nand_subop *subop) { struct atmel_nand *nand = to_atmel_nand(chip); struct atmel_hsmc_nand_controller *nc; + unsigned int i, j; nc = to_hsmc_nand_controller(chip->controller); - if (ctrl & NAND_ALE) { - if (nc->op.naddrs == ATMEL_NFC_MAX_ADDR_CYCLES) - return; + nc->op.cs = nand->activecs->id; + for (i = 0; i < subop->ninstrs; i++) { + const struct nand_op_instr *instr = &subop->instrs[i]; - nc->op.addrs[nc->op.naddrs++] = dat; - } else if (ctrl & NAND_CLE) { - if (nc->op.ncmds > 1) - return; + if (instr->type == NAND_OP_CMD_INSTR) { + nc->op.cmds[nc->op.ncmds++] = instr->ctx.cmd.opcode; + continue; + } - nc->op.cmds[nc->op.ncmds++] = dat; + for (j = nand_subop_get_addr_start_off(subop, i); + j < nand_subop_get_num_addr_cyc(subop, i); j++) { + nc->op.addrs[nc->op.naddrs] = instr->ctx.addr.addrs[j]; + nc->op.naddrs++; + } } - if (dat == NAND_CMD_NONE) { - nc->op.cs = nand->activecs->id; - atmel_nfc_exec_op(nc, true); - } + return atmel_nfc_exec_op(nc, true); } -static void atmel_nand_cmd_ctrl(struct nand_chip *chip, int cmd, - unsigned int ctrl) +static int atmel_hsmc_exec_rw(struct nand_chip *chip, + const struct nand_subop *subop) { + const struct nand_op_instr *instr = subop->instrs; struct atmel_nand *nand = to_atmel_nand(chip); - struct atmel_nand_controller *nc; - nc = to_nand_controller(chip->controller); + if (instr->type == NAND_OP_DATA_IN_INSTR) + atmel_nand_data_in(nand, instr->ctx.data.buf.in, + instr->ctx.data.len, + instr->ctx.data.force_8bit); + else + atmel_nand_data_out(nand, instr->ctx.data.buf.out, + instr->ctx.data.len, + instr->ctx.data.force_8bit); - if ((ctrl & NAND_CTRL_CHANGE) && nand->activecs->csgpio) { - if (ctrl & NAND_NCE) - gpiod_set_value(nand->activecs->csgpio, 0); - else - gpiod_set_value(nand->activecs->csgpio, 1); - } + return 0; +} - if (ctrl & NAND_ALE) - writeb(cmd, nand->activecs->io.virt + nc->caps->ale_offs); - else if (ctrl & NAND_CLE) - writeb(cmd, nand->activecs->io.virt + nc->caps->cle_offs); +static int atmel_hsmc_exec_waitrdy(struct nand_chip *chip, + const struct nand_subop *subop) +{ + const struct nand_op_instr *instr = subop->instrs; + struct atmel_nand *nand = to_atmel_nand(chip); + + return atmel_hsmc_nand_waitrdy(nand, instr->ctx.waitrdy.timeout_ms); +} + +static const struct nand_op_parser atmel_hsmc_op_parser = NAND_OP_PARSER( + NAND_OP_PARSER_PATTERN(atmel_hsmc_exec_cmd_addr, + NAND_OP_PARSER_PAT_CMD_ELEM(true), + NAND_OP_PARSER_PAT_ADDR_ELEM(true, 5), + NAND_OP_PARSER_PAT_CMD_ELEM(true)), + NAND_OP_PARSER_PATTERN(atmel_hsmc_exec_rw, + NAND_OP_PARSER_PAT_DATA_IN_ELEM(false, 0)), + NAND_OP_PARSER_PATTERN(atmel_hsmc_exec_rw, + NAND_OP_PARSER_PAT_DATA_OUT_ELEM(false, 0)), + NAND_OP_PARSER_PATTERN(atmel_hsmc_exec_waitrdy, + NAND_OP_PARSER_PAT_WAITRDY_ELEM(false)), +); + +static int atmel_hsmc_nand_exec_op(struct atmel_nand *nand, + const struct nand_operation *op, + bool check_only) +{ + int ret; + + if (check_only) + return nand_op_parser_exec_op(&nand->base, + &atmel_hsmc_op_parser, op, true); + + atmel_hsmc_nand_select_target(nand, op->cs); + ret = nand_op_parser_exec_op(&nand->base, &atmel_hsmc_op_parser, op, + false); + + return ret; } static void atmel_nfc_copy_to_sram(struct nand_chip *chip, const u8 *buf, @@ -838,7 +908,7 @@ static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf, if (ret) return ret; - atmel_nand_write_buf(chip, buf, mtd->writesize); + nand_write_data_op(chip, buf, mtd->writesize, false); ret = atmel_nand_pmecc_generate_eccbytes(chip, raw); if (ret) { @@ -848,7 +918,7 @@ static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf, atmel_nand_pmecc_disable(chip, raw); - atmel_nand_write_buf(chip, chip->oob_poi, mtd->oobsize); + nand_write_data_op(chip, chip->oob_poi, mtd->oobsize, false); return nand_prog_page_end_op(chip); } @@ -878,11 +948,17 @@ static int atmel_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf, if (ret) return ret; - atmel_nand_read_buf(chip, buf, mtd->writesize); - atmel_nand_read_buf(chip, chip->oob_poi, mtd->oobsize); + ret = nand_read_data_op(chip, buf, mtd->writesize, false, false); + if (ret) + goto out_disable; + + ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, false, false); + if (ret) + goto out_disable; ret = atmel_nand_pmecc_correct_data(chip, buf, raw); +out_disable: atmel_nand_pmecc_disable(chip, raw); return ret; @@ -907,8 +983,9 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip, struct mtd_info *mtd = nand_to_mtd(chip); struct atmel_nand *nand = to_atmel_nand(chip); struct atmel_hsmc_nand_controller *nc; - int ret, status; + int ret; + atmel_hsmc_nand_select_target(nand, chip->cur_cs); nc = to_hsmc_nand_controller(chip->controller); atmel_nfc_copy_to_sram(chip, buf, false); @@ -939,21 +1016,9 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip, if (ret) return ret; - atmel_nand_write_buf(chip, chip->oob_poi, mtd->oobsize); + nand_write_data_op(chip, chip->oob_poi, mtd->oobsize, false); - nc->op.cmds[0] = NAND_CMD_PAGEPROG; - nc->op.ncmds = 1; - nc->op.cs = nand->activecs->id; - ret = atmel_nfc_exec_op(nc, false); - if (ret) - dev_err(nc->base.dev, "Failed to program NAND page (err = %d)\n", - ret); - - status = chip->legacy.waitfunc(chip); - if (status & NAND_STATUS_FAIL) - return -EIO; - - return ret; + return nand_prog_page_end_op(chip); } static int atmel_hsmc_nand_pmecc_write_page(struct nand_chip *chip, @@ -981,6 +1046,7 @@ static int atmel_hsmc_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf, struct atmel_hsmc_nand_controller *nc; int ret; + atmel_hsmc_nand_select_target(nand, chip->cur_cs); nc = to_hsmc_nand_controller(chip->controller); /* @@ -988,12 +1054,9 @@ static int atmel_hsmc_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf, * connected to a native SoC R/B pin. If that's not the case, fallback * to the non-optimized one. */ - if (nand->activecs->rb.type != ATMEL_NAND_NATIVE_RB) { - nand_read_page_op(chip, page, 0, NULL, 0); - + if (nand->activecs->rb.type != ATMEL_NAND_NATIVE_RB) return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page, raw); - } nc->op.cmds[nc->op.ncmds++] = NAND_CMD_READ0; @@ -1043,7 +1106,10 @@ static int atmel_hsmc_nand_pmecc_read_page_raw(struct nand_chip *chip, static int atmel_nand_pmecc_init(struct nand_chip *chip) { + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(&chip->base); struct mtd_info *mtd = nand_to_mtd(chip); + struct nand_device *nanddev = mtd_to_nanddev(mtd); struct atmel_nand *nand = to_atmel_nand(chip); struct atmel_nand_controller *nc; struct atmel_pmecc_user_req req; @@ -1068,19 +1134,19 @@ static int atmel_nand_pmecc_init(struct nand_chip *chip) chip->ecc.size = val; } - if (chip->ecc.options & NAND_ECC_MAXIMIZE) + if (nanddev->ecc.user_conf.flags & NAND_ECC_MAXIMIZE_STRENGTH) req.ecc.strength = ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH; else if (chip->ecc.strength) req.ecc.strength = chip->ecc.strength; - else if (chip->base.eccreq.strength) - req.ecc.strength = chip->base.eccreq.strength; + else if (requirements->strength) + req.ecc.strength = requirements->strength; else req.ecc.strength = ATMEL_PMECC_MAXIMIZE_ECC_STRENGTH; if (chip->ecc.size) req.ecc.sectorsize = chip->ecc.size; - else if (chip->base.eccreq.step_size) - req.ecc.sectorsize = chip->base.eccreq.step_size; + else if (requirements->step_size) + req.ecc.sectorsize = requirements->step_size; else req.ecc.sectorsize = ATMEL_PMECC_SECTOR_SIZE_AUTO; @@ -1099,14 +1165,14 @@ static int atmel_nand_pmecc_init(struct nand_chip *chip) if (IS_ERR(nand->pmecc)) return PTR_ERR(nand->pmecc); - chip->ecc.algo = NAND_ECC_BCH; + chip->ecc.algo = NAND_ECC_ALGO_BCH; chip->ecc.size = req.ecc.sectorsize; chip->ecc.bytes = req.ecc.bytes / req.ecc.nsectors; chip->ecc.strength = req.ecc.strength; chip->options |= NAND_NO_SUBPAGE_WRITE; - mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); + mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout()); return 0; } @@ -1118,15 +1184,15 @@ static int atmel_nand_ecc_init(struct nand_chip *chip) nc = to_nand_controller(chip->controller); - switch (chip->ecc.mode) { - case NAND_ECC_NONE: - case NAND_ECC_SOFT: + switch (chip->ecc.engine_type) { + case NAND_ECC_ENGINE_TYPE_NONE: + case NAND_ECC_ENGINE_TYPE_SOFT: /* * Nothing to do, the core will initialize everything for us. */ break; - case NAND_ECC_HW: + case NAND_ECC_ENGINE_TYPE_ON_HOST: ret = atmel_nand_pmecc_init(chip); if (ret) return ret; @@ -1140,7 +1206,7 @@ static int atmel_nand_ecc_init(struct nand_chip *chip) default: /* Other modes are not supported. */ dev_err(nc->dev, "Unsupported ECC mode: %d\n", - chip->ecc.mode); + chip->ecc.engine_type); return -ENOTSUPP; } @@ -1155,7 +1221,7 @@ static int atmel_hsmc_nand_ecc_init(struct nand_chip *chip) if (ret) return ret; - if (chip->ecc.mode != NAND_ECC_HW) + if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) return 0; /* Adjust the ECC operations for the HSMC IP. */ @@ -1467,6 +1533,18 @@ static int atmel_nand_setup_interface(struct nand_chip *chip, int csline, return nc->caps->ops->setup_interface(nand, csline, conf); } +static int atmel_nand_exec_op(struct nand_chip *chip, + const struct nand_operation *op, + bool check_only) +{ + struct atmel_nand *nand = to_atmel_nand(chip); + struct atmel_nand_controller *nc; + + nc = to_nand_controller(nand->base.controller); + + return nc->caps->ops->exec_op(nand, op, check_only); +} + static void atmel_nand_init(struct atmel_nand_controller *nc, struct atmel_nand *nand) { @@ -1476,19 +1554,9 @@ static void atmel_nand_init(struct atmel_nand_controller *nc, mtd->dev.parent = nc->dev; nand->base.controller = &nc->base; - chip->legacy.cmd_ctrl = atmel_nand_cmd_ctrl; - chip->legacy.read_byte = atmel_nand_read_byte; - chip->legacy.write_byte = atmel_nand_write_byte; - chip->legacy.read_buf = atmel_nand_read_buf; - chip->legacy.write_buf = atmel_nand_write_buf; - chip->legacy.select_chip = atmel_nand_select_chip; - if (!nc->mck || !nc->caps->ops->setup_interface) chip->options |= NAND_KEEP_TIMINGS; - /* Some NANDs require a longer delay than the default one (20us). */ - chip->legacy.chip_delay = 40; - /* * Use a bounce buffer when the buffer passed by the MTD user is not * suitable for DMA. @@ -1498,7 +1566,7 @@ static void atmel_nand_init(struct atmel_nand_controller *nc, /* Default to HW ECC if pmecc is available. */ if (nc->pmecc) - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; } static void atmel_smc_nand_init(struct atmel_nand_controller *nc, @@ -1527,18 +1595,6 @@ static void atmel_smc_nand_init(struct atmel_nand_controller *nc, smc_nc->ebi_csa->nfd0_on_d16); } -static void atmel_hsmc_nand_init(struct atmel_nand_controller *nc, - struct atmel_nand *nand) -{ - struct nand_chip *chip = &nand->base; - - atmel_nand_init(nc, nand); - - /* Overload some methods for the HSMC controller. */ - chip->legacy.cmd_ctrl = atmel_hsmc_nand_cmd_ctrl; - chip->legacy.select_chip = atmel_hsmc_nand_select_chip; -} - static int atmel_nand_controller_remove_nand(struct atmel_nand *nand) { struct nand_chip *chip = &nand->base; @@ -1957,6 +2013,7 @@ static int atmel_nand_attach_chip(struct nand_chip *chip) static const struct nand_controller_ops atmel_nand_controller_ops = { .attach_chip = atmel_nand_attach_chip, .setup_interface = atmel_nand_setup_interface, + .exec_op = atmel_nand_exec_op, }; static int atmel_nand_controller_init(struct atmel_nand_controller *nc, @@ -1976,13 +2033,9 @@ static int atmel_nand_controller_init(struct atmel_nand_controller *nc, platform_set_drvdata(pdev, nc); nc->pmecc = devm_atmel_pmecc_get(dev); - if (IS_ERR(nc->pmecc)) { - ret = PTR_ERR(nc->pmecc); - if (ret != -EPROBE_DEFER) - dev_err(dev, "Could not get PMECC object (err = %d)\n", - ret); - return ret; - } + if (IS_ERR(nc->pmecc)) + return dev_err_probe(dev, PTR_ERR(nc->pmecc), + "Could not get PMECC object\n"); if (nc->caps->has_dma && !atmel_nand_avoid_dma) { dma_cap_mask_t mask; @@ -2248,6 +2301,9 @@ atmel_hsmc_nand_controller_remove(struct atmel_nand_controller *nc) return ret; hsmc_nc = container_of(nc, struct atmel_hsmc_nand_controller, base); + regmap_write(hsmc_nc->base.smc, ATMEL_HSMC_NFC_CTRL, + ATMEL_HSMC_NFC_CTRL_DIS); + if (hsmc_nc->sram.pool) gen_pool_free(hsmc_nc->sram.pool, (unsigned long)hsmc_nc->sram.virt, @@ -2300,6 +2356,8 @@ static int atmel_hsmc_nand_controller_probe(struct platform_device *pdev, /* Initial NFC configuration. */ regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CFG, ATMEL_HSMC_NFC_CFG_DTO_MAX); + regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL, + ATMEL_HSMC_NFC_CTRL_EN); ret = atmel_nand_controller_add_nands(&nc->base); if (ret) @@ -2317,8 +2375,9 @@ static const struct atmel_nand_controller_ops atmel_hsmc_nc_ops = { .probe = atmel_hsmc_nand_controller_probe, .remove = atmel_hsmc_nand_controller_remove, .ecc_init = atmel_hsmc_nand_ecc_init, - .nand_init = atmel_hsmc_nand_init, + .nand_init = atmel_nand_init, .setup_interface = atmel_hsmc_nand_setup_interface, + .exec_op = atmel_hsmc_nand_exec_op, }; static const struct atmel_nand_controller_caps atmel_sama5_nc_caps = { @@ -2385,6 +2444,7 @@ static const struct atmel_nand_controller_ops at91rm9200_nc_ops = { .remove = atmel_smc_nand_controller_remove, .ecc_init = atmel_nand_ecc_init, .nand_init = atmel_smc_nand_init, + .exec_op = atmel_smc_nand_exec_op, }; static const struct atmel_nand_controller_caps atmel_rm9200_nc_caps = { @@ -2400,6 +2460,7 @@ static const struct atmel_nand_controller_ops atmel_smc_nc_ops = { .ecc_init = atmel_nand_ecc_init, .nand_init = atmel_smc_nand_init, .setup_interface = atmel_smc_nand_setup_interface, + .exec_op = atmel_smc_nand_exec_op, }; static const struct atmel_nand_controller_caps atmel_sam9260_nc_caps = { diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c index d865200ccd08..79b057400fe9 100644 --- a/drivers/mtd/nand/raw/au1550nd.c +++ b/drivers/mtd/nand/raw/au1550nd.c @@ -294,8 +294,8 @@ static int au1550nd_probe(struct platform_device *pdev) nand_controller_init(&ctx->controller); ctx->controller.ops = &au1550nd_ops; this->controller = &ctx->controller; - this->ecc.mode = NAND_ECC_SOFT; - this->ecc.algo = NAND_ECC_HAMMING; + this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + this->ecc.algo = NAND_ECC_ALGO_HAMMING; if (pd->devwidth) this->options |= NAND_BUSWIDTH_16; diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c index 591775173034..8bb17c5a66c3 100644 --- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c +++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c @@ -391,7 +391,8 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n) nand_chip->legacy.chip_delay = 50; b47n->nand_chip.bbt_options = NAND_BBT_USE_FLASH; - b47n->nand_chip.ecc.mode = NAND_ECC_NONE; /* TODO: implement ECC */ + /* TODO: implement ECC */ + b47n->nand_chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_NONE; /* Enable NAND flash access */ bcma_cc_set32(b47n->cc, BCMA_CC_4706_FLASHSCFG, diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index a4033d32a710..2da39ab89286 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c @@ -2532,6 +2532,8 @@ static int brcmnand_setup_dev(struct brcmnand_host *host) { struct mtd_info *mtd = nand_to_mtd(&host->chip); struct nand_chip *chip = &host->chip; + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(&chip->base); struct brcmnand_controller *ctrl = host->ctrl; struct brcmnand_cfg *cfg = &host->hwcfg; char msg[128]; @@ -2565,34 +2567,34 @@ static int brcmnand_setup_dev(struct brcmnand_host *host) cfg->col_adr_bytes = 2; cfg->blk_adr_bytes = get_blk_adr_bytes(mtd->size, mtd->writesize); - if (chip->ecc.mode != NAND_ECC_HW) { + if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) { dev_err(ctrl->dev, "only HW ECC supported; selected: %d\n", - chip->ecc.mode); + chip->ecc.engine_type); return -EINVAL; } - if (chip->ecc.algo == NAND_ECC_UNKNOWN) { + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) { if (chip->ecc.strength == 1 && chip->ecc.size == 512) /* Default to Hamming for 1-bit ECC, if unspecified */ - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; else /* Otherwise, BCH */ - chip->ecc.algo = NAND_ECC_BCH; + chip->ecc.algo = NAND_ECC_ALGO_BCH; } - if (chip->ecc.algo == NAND_ECC_HAMMING && (chip->ecc.strength != 1 || - chip->ecc.size != 512)) { + if (chip->ecc.algo == NAND_ECC_ALGO_HAMMING && + (chip->ecc.strength != 1 || chip->ecc.size != 512)) { dev_err(ctrl->dev, "invalid Hamming params: %d bits per %d bytes\n", chip->ecc.strength, chip->ecc.size); return -EINVAL; } - if (chip->ecc.mode != NAND_ECC_NONE && + if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_NONE && (!chip->ecc.size || !chip->ecc.strength)) { - if (chip->base.eccreq.step_size && chip->base.eccreq.strength) { + if (requirements->step_size && requirements->strength) { /* use detected ECC parameters */ - chip->ecc.size = chip->base.eccreq.step_size; - chip->ecc.strength = chip->base.eccreq.strength; + chip->ecc.size = requirements->step_size; + chip->ecc.strength = requirements->strength; dev_info(ctrl->dev, "Using ECC step-size %d, strength %d\n", chip->ecc.size, chip->ecc.strength); } @@ -2600,7 +2602,7 @@ static int brcmnand_setup_dev(struct brcmnand_host *host) switch (chip->ecc.size) { case 512: - if (chip->ecc.algo == NAND_ECC_HAMMING) + if (chip->ecc.algo == NAND_ECC_ALGO_HAMMING) cfg->ecc_level = 15; else cfg->ecc_level = chip->ecc.strength; @@ -2728,7 +2730,7 @@ static int brcmnand_init_cs(struct brcmnand_host *host, struct device_node *dn) chip->legacy.read_buf = brcmnand_read_buf; chip->legacy.write_buf = brcmnand_write_buf; - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.read_page = brcmnand_read_page; chip->ecc.write_page = brcmnand_write_page; chip->ecc.read_page_raw = brcmnand_read_page_raw; diff --git a/drivers/mtd/nand/raw/cadence-nand-controller.c b/drivers/mtd/nand/raw/cadence-nand-controller.c index 71516af85f23..b46786cd53e0 100644 --- a/drivers/mtd/nand/raw/cadence-nand-controller.c +++ b/drivers/mtd/nand/raw/cadence-nand-controller.c @@ -2611,7 +2611,7 @@ static int cadence_nand_attach_chip(struct nand_chip *chip) chip->bbt_options |= NAND_BBT_USE_FLASH; chip->bbt_options |= NAND_BBT_NO_OOB; - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->options |= NAND_NO_SUBPAGE_WRITE; @@ -2757,7 +2757,7 @@ static int cadence_nand_chip_init(struct cdns_nand_ctrl *cdns_ctrl, * Default to HW ECC engine mode. If the nand-ecc-mode property is given * in the DT node, this entry will be overwritten in nand_scan_ident(). */ - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; ret = nand_scan(chip, cdns_chip->nsels); if (ret) { @@ -2980,18 +2980,14 @@ static int cadence_nand_dt_probe(struct platform_device *ofdev) dev_info(cdns_ctrl->dev, "IRQ: nr %d\n", cdns_ctrl->irq); cdns_ctrl->reg = devm_platform_ioremap_resource(ofdev, 0); - if (IS_ERR(cdns_ctrl->reg)) { - dev_err(&ofdev->dev, "devm_ioremap_resource res 0 failed\n"); + if (IS_ERR(cdns_ctrl->reg)) return PTR_ERR(cdns_ctrl->reg); - } res = platform_get_resource(ofdev, IORESOURCE_MEM, 1); cdns_ctrl->io.dma = res->start; cdns_ctrl->io.virt = devm_ioremap_resource(&ofdev->dev, res); - if (IS_ERR(cdns_ctrl->io.virt)) { - dev_err(cdns_ctrl->dev, "devm_ioremap_resource res 1 failed\n"); + if (IS_ERR(cdns_ctrl->io.virt)) return PTR_ERR(cdns_ctrl->io.virt); - } dt->clk = devm_clk_get(cdns_ctrl->dev, "nf_clk"); if (IS_ERR(dt->clk)) diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c index 92173790f20b..2b94f385a1a8 100644 --- a/drivers/mtd/nand/raw/cafe_nand.c +++ b/drivers/mtd/nand/raw/cafe_nand.c @@ -629,7 +629,8 @@ static int cafe_nand_attach_chip(struct nand_chip *chip) goto out_free_dma; } - cafe->nand.ecc.mode = NAND_ECC_HW_SYNDROME; + cafe->nand.ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; + cafe->nand.ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED; cafe->nand.ecc.size = mtd->writesize; cafe->nand.ecc.bytes = 14; cafe->nand.ecc.strength = 4; diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c index 9472bf798ed5..b7f3f6347761 100644 --- a/drivers/mtd/nand/raw/cs553x_nand.c +++ b/drivers/mtd/nand/raw/cs553x_nand.c @@ -286,7 +286,7 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr) goto out_mtd; } - this->ecc.mode = NAND_ECC_HW; + this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; this->ecc.size = 256; this->ecc.bytes = 3; this->ecc.hwctl = cs_enable_hwecc; diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c index d975a62caaa5..427f320fb79b 100644 --- a/drivers/mtd/nand/raw/davinci_nand.c +++ b/drivers/mtd/nand/raw/davinci_nand.c @@ -168,7 +168,7 @@ static int nand_davinci_correct_1bit(struct nand_chip *chip, u_char *dat, /* * 4-bit hardware ECC ... context maintained over entire AEMIF * - * This is a syndrome engine, but we avoid NAND_ECC_HW_SYNDROME + * This is a syndrome engine, but we avoid NAND_ECC_PLACEMENT_INTERLEAVED * since that forces use of a problematic "infix OOB" layout. * Among other things, it trashes manufacturer bad block markers. * Also, and specific to this hardware, it ECC-protects the "prepad" @@ -530,11 +530,11 @@ static struct davinci_nand_pdata if (!of_property_read_string(pdev->dev.of_node, "ti,davinci-ecc-mode", &mode)) { if (!strncmp("none", mode, 4)) - pdata->ecc_mode = NAND_ECC_NONE; + pdata->engine_type = NAND_ECC_ENGINE_TYPE_NONE; if (!strncmp("soft", mode, 4)) - pdata->ecc_mode = NAND_ECC_SOFT; + pdata->engine_type = NAND_ECC_ENGINE_TYPE_SOFT; if (!strncmp("hw", mode, 2)) - pdata->ecc_mode = NAND_ECC_HW; + pdata->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; } if (!of_property_read_u32(pdev->dev.of_node, "ti,davinci-ecc-bits", &prop)) @@ -585,21 +585,21 @@ static int davinci_nand_attach_chip(struct nand_chip *chip) if (IS_ERR(pdata)) return PTR_ERR(pdata); - switch (info->chip.ecc.mode) { - case NAND_ECC_NONE: + switch (info->chip.ecc.engine_type) { + case NAND_ECC_ENGINE_TYPE_NONE: pdata->ecc_bits = 0; break; - case NAND_ECC_SOFT: + case NAND_ECC_ENGINE_TYPE_SOFT: pdata->ecc_bits = 0; /* - * This driver expects Hamming based ECC when ecc_mode is set - * to NAND_ECC_SOFT. Force ecc.algo to NAND_ECC_HAMMING to - * avoid adding an extra ->ecc_algo field to - * davinci_nand_pdata. + * This driver expects Hamming based ECC when engine_type is set + * to NAND_ECC_ENGINE_TYPE_SOFT. Force ecc.algo to + * NAND_ECC_ALGO_HAMMING to avoid adding an extra ->ecc_algo + * field to davinci_nand_pdata. */ - info->chip.ecc.algo = NAND_ECC_HAMMING; + info->chip.ecc.algo = NAND_ECC_ALGO_HAMMING; break; - case NAND_ECC_HW: + case NAND_ECC_ENGINE_TYPE_ON_HOST: if (pdata->ecc_bits == 4) { int chunks = mtd->writesize / 512; @@ -629,7 +629,7 @@ static int davinci_nand_attach_chip(struct nand_chip *chip) info->chip.ecc.hwctl = nand_davinci_hwctl_4bit; info->chip.ecc.bytes = 10; info->chip.ecc.options = NAND_ECC_GENERIC_ERASED_CHECK; - info->chip.ecc.algo = NAND_ECC_BCH; + info->chip.ecc.algo = NAND_ECC_ALGO_BCH; /* * Update ECC layout if needed ... for 1-bit HW ECC, the @@ -645,7 +645,8 @@ static int davinci_nand_attach_chip(struct nand_chip *chip) mtd_set_ooblayout(mtd, &hwecc4_small_ooblayout_ops); } else if (chunks == 4 || chunks == 8) { - mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); + mtd_set_ooblayout(mtd, + nand_get_large_page_ooblayout()); info->chip.ecc.read_page = nand_davinci_read_page_hwecc_oob_first; } else { return -EIO; @@ -656,7 +657,7 @@ static int davinci_nand_attach_chip(struct nand_chip *chip) info->chip.ecc.correct = nand_davinci_correct_1bit; info->chip.ecc.hwctl = nand_davinci_hwctl_1bit; info->chip.ecc.bytes = 3; - info->chip.ecc.algo = NAND_ECC_HAMMING; + info->chip.ecc.algo = NAND_ECC_ALGO_HAMMING; } info->chip.ecc.size = 512; info->chip.ecc.strength = pdata->ecc_bits; @@ -850,7 +851,8 @@ static int nand_davinci_probe(struct platform_device *pdev) info->mask_cle = pdata->mask_cle ? : MASK_CLE; /* Use board-specific ECC config */ - info->chip.ecc.mode = pdata->ecc_mode; + info->chip.ecc.engine_type = pdata->engine_type; + info->chip.ecc.placement = pdata->ecc_placement; spin_lock_irq(&davinci_nand_lock); @@ -897,7 +899,7 @@ static int nand_davinci_remove(struct platform_device *pdev) int ret; spin_lock_irq(&davinci_nand_lock); - if (info->chip.ecc.mode == NAND_ECC_HW_SYNDROME) + if (info->chip.ecc.placement == NAND_ECC_PLACEMENT_INTERLEAVED) ecc4_busy = false; spin_unlock_irq(&davinci_nand_lock); diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c index 9d99dade95ce..fa2439cb4daa 100644 --- a/drivers/mtd/nand/raw/denali.c +++ b/drivers/mtd/nand/raw/denali.c @@ -1237,7 +1237,8 @@ int denali_chip_init(struct denali_controller *denali, chip->bbt_options |= NAND_BBT_USE_FLASH; chip->bbt_options |= NAND_BBT_NO_OOB; chip->options |= NAND_NO_SUBPAGE_WRITE; - chip->ecc.mode = NAND_ECC_HW_SYNDROME; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; + chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED; chip->ecc.read_page = denali_read_page; chip->ecc.write_page = denali_write_page; chip->ecc.read_page_raw = denali_read_page_raw; diff --git a/drivers/mtd/nand/raw/denali_pci.c b/drivers/mtd/nand/raw/denali_pci.c index 2f77ee55e1bf..20c085a30adc 100644 --- a/drivers/mtd/nand/raw/denali_pci.c +++ b/drivers/mtd/nand/raw/denali_pci.c @@ -100,7 +100,7 @@ static int denali_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) goto out_remove_denali; } - dchip->chip.ecc.options |= NAND_ECC_MAXIMIZE; + dchip->chip.base.ecc.user_conf.flags |= NAND_ECC_MAXIMIZE_STRENGTH; dchip->nsels = nsels; diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c index 43721863a0d8..94432a453e5e 100644 --- a/drivers/mtd/nand/raw/diskonchip.c +++ b/drivers/mtd/nand/raw/diskonchip.c @@ -1456,7 +1456,8 @@ static int __init doc_probe(unsigned long physadr) nand->ecc.calculate = doc200x_calculate_ecc; nand->ecc.correct = doc200x_correct_data; - nand->ecc.mode = NAND_ECC_HW_SYNDROME; + nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; + nand->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED; nand->ecc.size = 512; nand->ecc.bytes = 6; nand->ecc.strength = 2; diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c index 088692b2e27a..b2af7f81fdf8 100644 --- a/drivers/mtd/nand/raw/fsl_elbc_nand.c +++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c @@ -244,7 +244,7 @@ static int fsl_elbc_run_command(struct mtd_info *mtd) return -EIO; } - if (chip->ecc.mode != NAND_ECC_HW) + if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) return 0; elbc_fcm_ctrl->max_bitflips = 0; @@ -727,12 +727,12 @@ static int fsl_elbc_attach_chip(struct nand_chip *chip) struct fsl_lbc_regs __iomem *lbc = ctrl->regs; unsigned int al; - switch (chip->ecc.mode) { + switch (chip->ecc.engine_type) { /* * if ECC was not chosen in DT, decide whether to use HW or SW ECC from * CS Base Register */ - case NAND_ECC_NONE: + case NAND_ECC_ENGINE_TYPE_NONE: /* If CS Base Register selects full hardware ECC then use it */ if ((in_be32(&lbc->bank[priv->bank].br) & BR_DECC) == BR_DECC_CHK_GEN) { @@ -740,23 +740,23 @@ static int fsl_elbc_attach_chip(struct nand_chip *chip) chip->ecc.write_page = fsl_elbc_write_page; chip->ecc.write_subpage = fsl_elbc_write_subpage; - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; mtd_set_ooblayout(mtd, &fsl_elbc_ooblayout_ops); chip->ecc.size = 512; chip->ecc.bytes = 3; chip->ecc.strength = 1; } else { /* otherwise fall back to default software ECC */ - chip->ecc.mode = NAND_ECC_SOFT; - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; } break; /* if SW ECC was chosen in DT, we do not need to set anything here */ - case NAND_ECC_SOFT: + case NAND_ECC_ENGINE_TYPE_SOFT: break; - /* should we also implement NAND_ECC_HW to do as the code above? */ + /* should we also implement *_ECC_ENGINE_CONTROLLER to do as above? */ default: return -EINVAL; } @@ -786,8 +786,8 @@ static int fsl_elbc_attach_chip(struct nand_chip *chip) chip->page_shift); dev_dbg(priv->dev, "fsl_elbc_init: nand->phys_erase_shift = %d\n", chip->phys_erase_shift); - dev_dbg(priv->dev, "fsl_elbc_init: nand->ecc.mode = %d\n", - chip->ecc.mode); + dev_dbg(priv->dev, "fsl_elbc_init: nand->ecc.engine_type = %d\n", + chip->ecc.engine_type); dev_dbg(priv->dev, "fsl_elbc_init: nand->ecc.steps = %d\n", chip->ecc.steps); dev_dbg(priv->dev, "fsl_elbc_init: nand->ecc.bytes = %d\n", diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c index 00ae7a910b03..0e7a9b64301e 100644 --- a/drivers/mtd/nand/raw/fsl_ifc_nand.c +++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c @@ -309,7 +309,7 @@ static void fsl_ifc_cmdfunc(struct nand_chip *chip, unsigned int command, ifc_nand_ctrl->read_bytes = mtd->writesize + mtd->oobsize; ifc_nand_ctrl->index += column; - if (chip->ecc.mode == NAND_ECC_HW) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) ifc_nand_ctrl->eccread = 1; fsl_ifc_do_read(chip, 0, mtd); @@ -724,8 +724,8 @@ static int fsl_ifc_attach_chip(struct nand_chip *chip) chip->page_shift); dev_dbg(priv->dev, "%s: nand->phys_erase_shift = %d\n", __func__, chip->phys_erase_shift); - dev_dbg(priv->dev, "%s: nand->ecc.mode = %d\n", __func__, - chip->ecc.mode); + dev_dbg(priv->dev, "%s: nand->ecc.engine_type = %d\n", __func__, + chip->ecc.engine_type); dev_dbg(priv->dev, "%s: nand->ecc.steps = %d\n", __func__, chip->ecc.steps); dev_dbg(priv->dev, "%s: nand->ecc.bytes = %d\n", __func__, @@ -912,7 +912,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) /* Must also set CSOR_NAND_ECC_ENC_EN if DEC_EN set */ if (csor & CSOR_NAND_ECC_DEC_EN) { - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; mtd_set_ooblayout(mtd, &fsl_ifc_ooblayout_ops); /* Hardware generates ECC per 512 Bytes */ @@ -925,8 +925,8 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv) chip->ecc.strength = 8; } } else { - chip->ecc.mode = NAND_ECC_SOFT; - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; } ret = fsl_ifc_sram_init(priv); diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c index 197850aeb261..d5813b9abc8e 100644 --- a/drivers/mtd/nand/raw/fsl_upm.c +++ b/drivers/mtd/nand/raw/fsl_upm.c @@ -47,8 +47,8 @@ static int fun_chip_init(struct fsl_upm_nand *fun, int ret; struct device_node *flash_np; - fun->chip.ecc.mode = NAND_ECC_SOFT; - fun->chip.ecc.algo = NAND_ECC_HAMMING; + fun->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + fun->chip.ecc.algo = NAND_ECC_ALGO_HAMMING; fun->chip.controller = &fun->base; mtd->dev.parent = fun->dev; diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index 92ddc41d0ff0..4191831df182 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -900,8 +900,8 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand) return 0; } - switch (nand->ecc.mode) { - case NAND_ECC_HW: + switch (nand->ecc.engine_type) { + case NAND_ECC_ENGINE_TYPE_ON_HOST: dev_info(host->dev, "Using 1-bit HW ECC scheme\n"); nand->ecc.calculate = fsmc_read_hwecc_ecc1; nand->ecc.correct = nand_correct_data; @@ -910,14 +910,14 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand) nand->ecc.options |= NAND_ECC_SOFT_HAMMING_SM_ORDER; break; - case NAND_ECC_SOFT: - if (nand->ecc.algo == NAND_ECC_BCH) { + case NAND_ECC_ENGINE_TYPE_SOFT: + if (nand->ecc.algo == NAND_ECC_ALGO_BCH) { dev_info(host->dev, "Using 4-bit SW BCH ECC scheme\n"); break; } - case NAND_ECC_ON_DIE: + case NAND_ECC_ENGINE_TYPE_ON_DIE: break; default: @@ -929,7 +929,7 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand) * Don't set layout for BCH4 SW ECC. This will be * generated later in nand_bch_init() later. */ - if (nand->ecc.mode == NAND_ECC_HW) { + if (nand->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) { switch (mtd->oobsize) { case 16: case 64: @@ -1059,7 +1059,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev) * Setup default ECC mode. nand_dt_init() called from nand_scan_ident() * can overwrite this value if the DT provides a different value. */ - nand->ecc.mode = NAND_ECC_HW; + nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; nand->ecc.hwctl = fsmc_enable_hwecc; nand->ecc.size = 512; nand->badblockbits = 7; diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c index 3bd847ccc3f3..4ec0a1e10867 100644 --- a/drivers/mtd/nand/raw/gpio.c +++ b/drivers/mtd/nand/raw/gpio.c @@ -342,8 +342,8 @@ static int gpio_nand_probe(struct platform_device *pdev) gpiomtd->base.ops = &gpio_nand_ops; nand_set_flash_node(chip, pdev->dev.of_node); - chip->ecc.mode = NAND_ECC_SOFT; - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; chip->options = gpiomtd->plat.options; chip->controller = &gpiomtd->base; diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index 5d4aee46cc55..dc8104e67506 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -272,8 +272,8 @@ static int set_geometry_by_ecc_info(struct gpmi_nand_data *this, default: dev_err(this->dev, "unsupported nand chip. ecc bits : %d, ecc size : %d\n", - chip->base.eccreq.strength, - chip->base.eccreq.step_size); + nanddev_get_ecc_requirements(&chip->base)->strength, + nanddev_get_ecc_requirements(&chip->base)->step_size); return -EINVAL; } geo->ecc_chunk_size = ecc_step; @@ -510,6 +510,8 @@ static int legacy_set_geometry(struct gpmi_nand_data *this) static int common_nfc_set_geometry(struct gpmi_nand_data *this) { struct nand_chip *chip = &this->nand; + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(&chip->base); if (chip->ecc.strength > 0 && chip->ecc.size > 0) return set_geometry_by_ecc_info(this, chip->ecc.strength, @@ -517,13 +519,12 @@ static int common_nfc_set_geometry(struct gpmi_nand_data *this) if ((of_property_read_bool(this->dev->of_node, "fsl,use-minimum-ecc")) || legacy_set_geometry(this)) { - if (!(chip->base.eccreq.strength > 0 && - chip->base.eccreq.step_size > 0)) + if (!(requirements->strength > 0 && requirements->step_size > 0)) return -EINVAL; return set_geometry_by_ecc_info(this, - chip->base.eccreq.strength, - chip->base.eccreq.step_size); + requirements->strength, + requirements->step_size); } return 0; @@ -1003,10 +1004,8 @@ static int acquire_dma_channels(struct gpmi_nand_data *this) /* request dma channel */ dma_chan = dma_request_chan(&pdev->dev, "rx-tx"); if (IS_ERR(dma_chan)) { - ret = PTR_ERR(dma_chan); - if (ret != -EPROBE_DEFER) - dev_err(this->dev, "DMA channel request failed: %d\n", - ret); + ret = dev_err_probe(this->dev, PTR_ERR(dma_chan), + "DMA channel request failed\n"); release_dma_channels(this); } else { this->dma_chans[0] = dma_chan; @@ -2032,7 +2031,7 @@ static int gpmi_init_last(struct gpmi_nand_data *this) ecc->write_page_raw = gpmi_ecc_write_page_raw; ecc->read_oob_raw = gpmi_ecc_read_oob_raw; ecc->write_oob_raw = gpmi_ecc_write_oob_raw; - ecc->mode = NAND_ECC_HW; + ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; ecc->size = bch_geo->ecc_chunk_size; ecc->strength = bch_geo->ecc_strength; mtd_set_ooblayout(mtd, &gpmi_ooblayout_ops); diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c index b84238e2268a..8b2122ce6ec3 100644 --- a/drivers/mtd/nand/raw/hisi504_nand.c +++ b/drivers/mtd/nand/raw/hisi504_nand.c @@ -186,7 +186,7 @@ static void hisi_nfc_dma_transfer(struct hinfc_host *host, int todev) hinfc_write(host, host->dma_buffer, HINFC504_DMA_ADDR_DATA); hinfc_write(host, host->dma_oob, HINFC504_DMA_ADDR_OOB); - if (chip->ecc.mode == NAND_ECC_NONE) { + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_NONE) { hinfc_write(host, ((mtd->oobsize & HINFC504_DMA_LEN_OOB_MASK) << HINFC504_DMA_LEN_OOB_SHIFT), HINFC504_DMA_LEN); @@ -468,7 +468,7 @@ static void hisi_nfc_cmdfunc(struct nand_chip *chip, unsigned command, case NAND_CMD_STATUS: flag = hinfc_read(host, HINFC504_CON); - if (chip->ecc.mode == NAND_ECC_HW) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) hinfc_write(host, flag & ~(HINFC504_CON_ECCTYPE_MASK << HINFC504_CON_ECCTYPE_SHIFT), HINFC504_CON); @@ -721,7 +721,7 @@ static int hisi_nfc_attach_chip(struct nand_chip *chip) } hinfc_write(host, flag, HINFC504_CON); - if (chip->ecc.mode == NAND_ECC_HW) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) hisi_nfc_ecc_probe(host); return 0; diff --git a/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c index 69423bb29adb..0e9d426fe4f2 100644 --- a/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c +++ b/drivers/mtd/nand/raw/ingenic/ingenic_nand_drv.c @@ -194,8 +194,8 @@ static int ingenic_nand_attach_chip(struct nand_chip *chip) (chip->ecc.strength / 8); } - switch (chip->ecc.mode) { - case NAND_ECC_HW: + switch (chip->ecc.engine_type) { + case NAND_ECC_ENGINE_TYPE_ON_HOST: if (!nfc->ecc) { dev_err(nfc->dev, "HW ECC selected, but ECC controller not found\n"); return -ENODEV; @@ -205,22 +205,22 @@ static int ingenic_nand_attach_chip(struct nand_chip *chip) chip->ecc.calculate = ingenic_nand_ecc_calculate; chip->ecc.correct = ingenic_nand_ecc_correct; fallthrough; - case NAND_ECC_SOFT: + case NAND_ECC_ENGINE_TYPE_SOFT: dev_info(nfc->dev, "using %s (strength %d, size %d, bytes %d)\n", (nfc->ecc) ? "hardware ECC" : "software ECC", chip->ecc.strength, chip->ecc.size, chip->ecc.bytes); break; - case NAND_ECC_NONE: + case NAND_ECC_ENGINE_TYPE_NONE: dev_info(nfc->dev, "not using ECC\n"); break; default: dev_err(nfc->dev, "ECC mode %d not supported\n", - chip->ecc.mode); + chip->ecc.engine_type); return -EINVAL; } /* The NAND core will generate the ECC layout for SW ECC */ - if (chip->ecc.mode != NAND_ECC_HW) + if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) return 0; /* Generate ECC layout. ECC codes are right aligned in the OOB area. */ @@ -243,8 +243,10 @@ static int ingenic_nand_attach_chip(struct nand_chip *chip) /* For legacy reasons we use a different layout on the qi,lb60 board. */ if (of_machine_is_compatible("qi,lb60")) mtd_set_ooblayout(mtd, &qi_lb60_ooblayout_ops); - else + else if (nfc->soc_info->oob_layout) mtd_set_ooblayout(mtd, nfc->soc_info->oob_layout); + else + mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout()); return 0; } @@ -404,7 +406,7 @@ static int ingenic_nand_init_chip(struct platform_device *pdev, mtd->dev.parent = dev; chip->options = NAND_NO_SUBPAGE_WRITE; - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->controller = &nfc->controller; nand_set_flash_node(chip, np); @@ -532,7 +534,6 @@ static const struct jz_soc_info jz4740_soc_info = { .data_offset = 0x00000000, .cmd_offset = 0x00008000, .addr_offset = 0x00010000, - .oob_layout = &nand_ooblayout_lp_ops, }; static const struct jz_soc_info jz4725b_soc_info = { @@ -546,7 +547,6 @@ static const struct jz_soc_info jz4780_soc_info = { .data_offset = 0x00000000, .cmd_offset = 0x00400000, .addr_offset = 0x00800000, - .oob_layout = &nand_ooblayout_lp_ops, }; static const struct of_device_id ingenic_nand_dt_match[] = { diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c index 7521038af2ef..4940bb2e3c07 100644 --- a/drivers/mtd/nand/raw/lpc32xx_mlc.c +++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c @@ -656,7 +656,7 @@ static int lpc32xx_nand_attach_chip(struct nand_chip *chip) if (!host->dummy_buf) return -ENOMEM; - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.size = 512; mtd_set_ooblayout(mtd, &lpc32xx_ooblayout_ops); host->mlcsubpages = mtd->writesize / 512; diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c index b151fd000815..6db9d2ed6881 100644 --- a/drivers/mtd/nand/raw/lpc32xx_slc.c +++ b/drivers/mtd/nand/raw/lpc32xx_slc.c @@ -881,7 +881,8 @@ static int lpc32xx_nand_probe(struct platform_device *pdev) platform_set_drvdata(pdev, host); /* NAND callbacks for LPC32xx SLC hardware */ - chip->ecc.mode = NAND_ECC_HW_SYNDROME; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; + chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED; chip->legacy.read_byte = lpc32xx_nand_read_byte; chip->legacy.read_buf = lpc32xx_nand_read_buf; chip->legacy.write_buf = lpc32xx_nand_write_buf; diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 8482d3bd8b1f..f5ca2002d08e 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -227,6 +227,8 @@ #define XTYPE_MASK 7 /** + * struct marvell_hw_ecc_layout - layout of Marvell ECC + * * Marvell ECC engine works differently than the others, in order to limit the * size of the IP, hardware engineers chose to set a fixed strength at 16 bits * per subpage, and depending on a the desired strength needed by the NAND chip, @@ -292,6 +294,8 @@ static const struct marvell_hw_ecc_layout marvell_nfc_layouts[] = { }; /** + * struct marvell_nand_chip_sel - CS line description + * * The Nand Flash Controller has up to 4 CE and 2 RB pins. The CE selection * is made by a field in NDCB0 register, and in another field in NDCB2 register. * The datasheet describes the logic with an error: ADDR5 field is once @@ -312,14 +316,15 @@ struct marvell_nand_chip_sel { }; /** - * NAND chip structure: stores NAND chip device related information + * struct marvell_nand_chip - stores NAND chip device related information * * @chip: Base NAND chip structure * @node: Used to store NAND chips into a list - * @layout NAND layout when using hardware ECC + * @layout: NAND layout when using hardware ECC * @ndcr: Controller register value for this NAND chip * @ndtr0: Timing registers 0 value for this NAND chip * @ndtr1: Timing registers 1 value for this NAND chip + * @addr_cyc: Amount of cycles needed to pass column address * @selected_die: Current active CS * @nsels: Number of CS lines required by the NAND chip * @sels: Array of CS lines descriptions @@ -349,7 +354,8 @@ static inline struct marvell_nand_chip_sel *to_nand_sel(struct marvell_nand_chip } /** - * NAND controller capabilities for distinction between compatible strings + * struct marvell_nfc_caps - NAND controller capabilities for distinction + * between compatible strings * * @max_cs_nb: Number of Chip Select lines available * @max_rb_nb: Number of Ready/Busy lines available @@ -372,7 +378,7 @@ struct marvell_nfc_caps { }; /** - * NAND controller structure: stores Marvell NAND controller information + * struct marvell_nfc - stores Marvell NAND controller information * * @controller: Base controller structure * @dev: Parent device (used to print error messages) @@ -383,7 +389,9 @@ struct marvell_nfc_caps { * @assigned_cs: Bitmask describing already assigned CS lines * @chips: List containing all the NAND chips attached to * this NAND controller + * @selected_chip: Currently selected target chip * @caps: NAND controller capabilities for each compatible string + * @use_dma: Whetner DMA is used * @dma_chan: DMA channel (NFCv1 only) * @dma_buf: 32-bit aligned buffer for DMA transfers (NFCv1 only) */ @@ -411,7 +419,8 @@ static inline struct marvell_nfc *to_marvell_nfc(struct nand_controller *ctrl) } /** - * NAND controller timings expressed in NAND Controller clock cycles + * struct marvell_nfc_timings - NAND controller timings expressed in NAND + * Controller clock cycles * * @tRP: ND_nRE pulse width * @tRH: ND_nRE high duration @@ -455,8 +464,8 @@ struct marvell_nfc_timings { period_ns)) /** - * NAND driver structure filled during the parsing of the ->exec_op() subop - * subset of instructions. + * struct marvell_nfc_op - filled during the parsing of the ->exec_op() + * subop subset of instructions. * * @ndcb: Array of values written to NDCBx registers * @cle_ale_delay_ns: Optional delay after the last CMD or ADDR cycle @@ -685,9 +694,31 @@ static int marvell_nfc_wait_cmdd(struct nand_chip *chip) return marvell_nfc_end_cmd(chip, cs_flag, "CMDD"); } +static int marvell_nfc_poll_status(struct marvell_nfc *nfc, u32 mask, + u32 expected_val, unsigned long timeout_ms) +{ + unsigned long limit; + u32 st; + + limit = jiffies + msecs_to_jiffies(timeout_ms); + do { + st = readl_relaxed(nfc->regs + NDSR); + if (st & NDSR_RDY(1)) + st |= NDSR_RDY(0); + + if ((st & mask) == expected_val) + return 0; + + cpu_relax(); + } while (time_after(limit, jiffies)); + + return -ETIMEDOUT; +} + static int marvell_nfc_wait_op(struct nand_chip *chip, unsigned int timeout_ms) { struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); + struct mtd_info *mtd = nand_to_mtd(chip); u32 pending; int ret; @@ -695,12 +726,18 @@ static int marvell_nfc_wait_op(struct nand_chip *chip, unsigned int timeout_ms) if (!timeout_ms) timeout_ms = IRQ_TIMEOUT; - init_completion(&nfc->complete); + if (mtd->oops_panic_write) { + ret = marvell_nfc_poll_status(nfc, NDSR_RDY(0), + NDSR_RDY(0), + timeout_ms); + } else { + init_completion(&nfc->complete); - marvell_nfc_enable_int(nfc, NDCR_RDYM); - ret = wait_for_completion_timeout(&nfc->complete, - msecs_to_jiffies(timeout_ms)); - marvell_nfc_disable_int(nfc, NDCR_RDYM); + marvell_nfc_enable_int(nfc, NDCR_RDYM); + ret = wait_for_completion_timeout(&nfc->complete, + msecs_to_jiffies(timeout_ms)); + marvell_nfc_disable_int(nfc, NDCR_RDYM); + } pending = marvell_nfc_clear_int(nfc, NDSR_RDY(0) | NDSR_RDY(1)); /* @@ -780,7 +817,7 @@ static void marvell_nfc_enable_hw_ecc(struct nand_chip *chip) * When enabling BCH, set threshold to 0 to always know the * number of corrected bitflips. */ - if (chip->ecc.algo == NAND_ECC_BCH) + if (chip->ecc.algo == NAND_ECC_ALGO_BCH) writel_relaxed(NDECCCTRL_BCH_EN, nfc->regs + NDECCCTRL); } } @@ -792,7 +829,7 @@ static void marvell_nfc_disable_hw_ecc(struct nand_chip *chip) if (ndcr & NDCR_ECC_EN) { writel_relaxed(ndcr & ~NDCR_ECC_EN, nfc->regs + NDCR); - if (chip->ecc.algo == NAND_ECC_BCH) + if (chip->ecc.algo == NAND_ECC_ALGO_BCH) writel_relaxed(0, nfc->regs + NDECCCTRL); } } @@ -966,7 +1003,7 @@ static int marvell_nfc_hw_ecc_check_bitflips(struct nand_chip *chip, if (ndsr & NDSR_CORERR) { writel_relaxed(ndsr, nfc->regs + NDSR); - if (chip->ecc.algo == NAND_ECC_BCH) + if (chip->ecc.algo == NAND_ECC_ALGO_BCH) bf = NDSR_ERRCNT(ndsr); else bf = 1; @@ -2218,7 +2255,7 @@ static int marvell_nand_hw_ecc_controller_init(struct mtd_info *mtd, ecc->size = l->data_bytes; if (ecc->strength == 1) { - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; ecc->read_page_raw = marvell_nfc_hw_ecc_hmg_read_page_raw; ecc->read_page = marvell_nfc_hw_ecc_hmg_read_page; ecc->read_oob_raw = marvell_nfc_hw_ecc_hmg_read_oob_raw; @@ -2228,7 +2265,7 @@ static int marvell_nand_hw_ecc_controller_init(struct mtd_info *mtd, ecc->write_oob_raw = marvell_nfc_hw_ecc_hmg_write_oob_raw; ecc->write_oob = ecc->write_oob_raw; } else { - chip->ecc.algo = NAND_ECC_BCH; + chip->ecc.algo = NAND_ECC_ALGO_BCH; ecc->strength = 16; ecc->read_page_raw = marvell_nfc_hw_ecc_bch_read_page_raw; ecc->read_page = marvell_nfc_hw_ecc_bch_read_page; @@ -2247,13 +2284,16 @@ static int marvell_nand_ecc_init(struct mtd_info *mtd, struct nand_ecc_ctrl *ecc) { struct nand_chip *chip = mtd_to_nand(mtd); + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(&chip->base); struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); int ret; - if (ecc->mode != NAND_ECC_NONE && (!ecc->size || !ecc->strength)) { - if (chip->base.eccreq.step_size && chip->base.eccreq.strength) { - ecc->size = chip->base.eccreq.step_size; - ecc->strength = chip->base.eccreq.strength; + if (ecc->engine_type != NAND_ECC_ENGINE_TYPE_NONE && + (!ecc->size || !ecc->strength)) { + if (requirements->step_size && requirements->strength) { + ecc->size = requirements->step_size; + ecc->strength = requirements->strength; } else { dev_info(nfc->dev, "No minimum ECC strength, using 1b/512B\n"); @@ -2262,15 +2302,15 @@ static int marvell_nand_ecc_init(struct mtd_info *mtd, } } - switch (ecc->mode) { - case NAND_ECC_HW: + switch (ecc->engine_type) { + case NAND_ECC_ENGINE_TYPE_ON_HOST: ret = marvell_nand_hw_ecc_controller_init(mtd, ecc); if (ret) return ret; break; - case NAND_ECC_NONE: - case NAND_ECC_SOFT: - case NAND_ECC_ON_DIE: + case NAND_ECC_ENGINE_TYPE_NONE: + case NAND_ECC_ENGINE_TYPE_SOFT: + case NAND_ECC_ENGINE_TYPE_ON_DIE: if (!nfc->caps->is_nfcv2 && mtd->writesize != SZ_512 && mtd->writesize != SZ_2K) { dev_err(nfc->dev, "NFCv1 cannot write %d bytes pages\n", @@ -2467,7 +2507,7 @@ static int marvell_nand_attach_chip(struct nand_chip *chip) return ret; } - if (chip->ecc.mode == NAND_ECC_HW) { + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) { /* * Subpage write not available with hardware ECC, prohibit also * subpage read as in userspace subpage access would still be @@ -2642,7 +2682,7 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc, * Default to HW ECC engine mode. If the nand-ecc-mode property is given * in the DT node, this entry will be overwritten in nand_scan_ident(). */ - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; /* * Save a reference value for timing registers before @@ -2759,10 +2799,7 @@ static int marvell_nfc_init_dma(struct marvell_nfc *nfc) if (IS_ERR(nfc->dma_chan)) { ret = PTR_ERR(nfc->dma_chan); nfc->dma_chan = NULL; - if (ret != -EPROBE_DEFER) - dev_err(nfc->dev, "DMA channel request failed: %d\n", - ret); - return ret; + return dev_err_probe(nfc->dev, ret, "DMA channel request failed\n"); } r = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index 0e5829a2b54f..48e6dac96be6 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -1197,7 +1197,7 @@ static int meson_nand_attach_chip(struct nand_chip *nand) if (ret) return -EINVAL; - nand->ecc.mode = NAND_ECC_HW; + nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; nand->ecc.write_page_raw = meson_nfc_write_page_raw; nand->ecc.write_page = meson_nfc_write_page_hwecc; nand->ecc.write_oob_raw = nand_write_oob_std; diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c index 18ecb096a32d..dfd0d3ed5ed0 100644 --- a/drivers/mtd/nand/raw/mpc5121_nfc.c +++ b/drivers/mtd/nand/raw/mpc5121_nfc.c @@ -688,8 +688,8 @@ static int mpc5121_nfc_probe(struct platform_device *op) chip->legacy.set_features = nand_get_set_features_notsupp; chip->legacy.get_features = nand_get_set_features_notsupp; chip->bbt_options = NAND_BBT_USE_FLASH; - chip->ecc.mode = NAND_ECC_SOFT; - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; /* Support external chip-select logic on ADS5121 board */ if (of_machine_is_compatible("fsl,mpc5121ads")) { diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c index ad1b55dab211..57f1f1708994 100644 --- a/drivers/mtd/nand/raw/mtk_nand.c +++ b/drivers/mtd/nand/raw/mtk_nand.c @@ -1253,21 +1253,23 @@ static int mtk_nfc_set_spare_per_sector(u32 *sps, struct mtd_info *mtd) static int mtk_nfc_ecc_init(struct device *dev, struct mtd_info *mtd) { struct nand_chip *nand = mtd_to_nand(mtd); + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(&nand->base); struct mtk_nfc *nfc = nand_get_controller_data(nand); u32 spare; int free, ret; /* support only ecc hw mode */ - if (nand->ecc.mode != NAND_ECC_HW) { - dev_err(dev, "ecc.mode not supported\n"); + if (nand->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) { + dev_err(dev, "ecc.engine_type not supported\n"); return -EINVAL; } /* if optional dt settings not present */ if (!nand->ecc.size || !nand->ecc.strength) { /* use datasheet requirements */ - nand->ecc.strength = nand->base.eccreq.strength; - nand->ecc.size = nand->base.eccreq.step_size; + nand->ecc.strength = requirements->strength; + nand->ecc.size = requirements->step_size; /* * align eccstrength and eccsize @@ -1416,7 +1418,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc, nand->options |= NAND_USES_DMA | NAND_SUBPAGE_READ; /* set default mode in case dt entry is missing */ - nand->ecc.mode = NAND_ECC_HW; + nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; nand->ecc.write_subpage = mtk_nfc_write_subpage_hwecc; nand->ecc.write_page_raw = mtk_nfc_write_page_raw; diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c index a043d76b48cb..d4200eb2ad32 100644 --- a/drivers/mtd/nand/raw/mxc_nand.c +++ b/drivers/mtd/nand/raw/mxc_nand.c @@ -669,7 +669,7 @@ static void mxc_nand_enable_hwecc_v1_v2(struct nand_chip *chip, bool enable) struct mxc_nand_host *host = nand_get_controller_data(chip); uint16_t config1; - if (chip->ecc.mode != NAND_ECC_HW) + if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) return; config1 = readw(NFC_V1_V2_CONFIG1); @@ -687,7 +687,7 @@ static void mxc_nand_enable_hwecc_v3(struct nand_chip *chip, bool enable) struct mxc_nand_host *host = nand_get_controller_data(chip); uint32_t config2; - if (chip->ecc.mode != NAND_ECC_HW) + if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) return; config2 = readl(NFC_V3_CONFIG2); @@ -1117,7 +1117,8 @@ static void preset_v1(struct mtd_info *mtd) struct mxc_nand_host *host = nand_get_controller_data(nand_chip); uint16_t config1 = 0; - if (nand_chip->ecc.mode == NAND_ECC_HW && mtd->writesize) + if (nand_chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST && + mtd->writesize) config1 |= NFC_V1_V2_CONFIG1_ECC_EN; if (!host->devtype_data->irqpending_quirk) @@ -1227,7 +1228,7 @@ static void preset_v2(struct mtd_info *mtd) if (mtd->writesize) { uint16_t pages_per_block = mtd->erasesize / mtd->writesize; - if (nand_chip->ecc.mode == NAND_ECC_HW) + if (nand_chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) config1 |= NFC_V1_V2_CONFIG1_ECC_EN; host->eccsize = get_eccsize(mtd); @@ -1303,7 +1304,7 @@ static void preset_v3(struct mtd_info *mtd) } if (mtd->writesize) { - if (chip->ecc.mode == NAND_ECC_HW) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) config2 |= NFC_V3_CONFIG2_ECC_EN; config2 |= NFC_V3_CONFIG2_PPB( @@ -1680,8 +1681,8 @@ static int mxcnd_attach_chip(struct nand_chip *chip) struct mxc_nand_host *host = nand_get_controller_data(chip); struct device *dev = mtd->dev.parent; - switch (chip->ecc.mode) { - case NAND_ECC_HW: + switch (chip->ecc.engine_type) { + case NAND_ECC_ENGINE_TYPE_ON_HOST: chip->ecc.read_page = mxc_nand_read_page; chip->ecc.read_page_raw = mxc_nand_read_page_raw; chip->ecc.read_oob = mxc_nand_read_oob; @@ -1690,7 +1691,7 @@ static int mxcnd_attach_chip(struct nand_chip *chip) chip->ecc.write_oob = mxc_nand_write_oob; break; - case NAND_ECC_SOFT: + case NAND_ECC_ENGINE_TYPE_SOFT: break; default: @@ -1728,7 +1729,7 @@ static int mxcnd_attach_chip(struct nand_chip *chip) */ host->used_oobsize = min(mtd->oobsize, 218U); - if (chip->ecc.mode == NAND_ECC_HW) { + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) { if (is_imx21_nfc(host) || is_imx27_nfc(host)) chip->ecc.strength = 1; else @@ -1843,10 +1844,10 @@ static int mxcnd_probe(struct platform_device *pdev) mtd_set_ooblayout(mtd, host->devtype_data->ooblayout); if (host->pdata.hw_ecc) { - this->ecc.mode = NAND_ECC_HW; + this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; } else { - this->ecc.mode = NAND_ECC_SOFT; - this->ecc.algo = NAND_ECC_HAMMING; + this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + this->ecc.algo = NAND_ECC_ALGO_HAMMING; } /* NAND bus width determines access functions used by upper layer */ diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 0c768cb88f96..1f0d542d5923 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -45,166 +46,6 @@ #include "internals.h" -/* Define default oob placement schemes for large and small page devices */ -static int nand_ooblayout_ecc_sp(struct mtd_info *mtd, int section, - struct mtd_oob_region *oobregion) -{ - struct nand_chip *chip = mtd_to_nand(mtd); - struct nand_ecc_ctrl *ecc = &chip->ecc; - - if (section > 1) - return -ERANGE; - - if (!section) { - oobregion->offset = 0; - if (mtd->oobsize == 16) - oobregion->length = 4; - else - oobregion->length = 3; - } else { - if (mtd->oobsize == 8) - return -ERANGE; - - oobregion->offset = 6; - oobregion->length = ecc->total - 4; - } - - return 0; -} - -static int nand_ooblayout_free_sp(struct mtd_info *mtd, int section, - struct mtd_oob_region *oobregion) -{ - if (section > 1) - return -ERANGE; - - if (mtd->oobsize == 16) { - if (section) - return -ERANGE; - - oobregion->length = 8; - oobregion->offset = 8; - } else { - oobregion->length = 2; - if (!section) - oobregion->offset = 3; - else - oobregion->offset = 6; - } - - return 0; -} - -const struct mtd_ooblayout_ops nand_ooblayout_sp_ops = { - .ecc = nand_ooblayout_ecc_sp, - .free = nand_ooblayout_free_sp, -}; -EXPORT_SYMBOL_GPL(nand_ooblayout_sp_ops); - -static int nand_ooblayout_ecc_lp(struct mtd_info *mtd, int section, - struct mtd_oob_region *oobregion) -{ - struct nand_chip *chip = mtd_to_nand(mtd); - struct nand_ecc_ctrl *ecc = &chip->ecc; - - if (section || !ecc->total) - return -ERANGE; - - oobregion->length = ecc->total; - oobregion->offset = mtd->oobsize - oobregion->length; - - return 0; -} - -static int nand_ooblayout_free_lp(struct mtd_info *mtd, int section, - struct mtd_oob_region *oobregion) -{ - struct nand_chip *chip = mtd_to_nand(mtd); - struct nand_ecc_ctrl *ecc = &chip->ecc; - - if (section) - return -ERANGE; - - oobregion->length = mtd->oobsize - ecc->total - 2; - oobregion->offset = 2; - - return 0; -} - -const struct mtd_ooblayout_ops nand_ooblayout_lp_ops = { - .ecc = nand_ooblayout_ecc_lp, - .free = nand_ooblayout_free_lp, -}; -EXPORT_SYMBOL_GPL(nand_ooblayout_lp_ops); - -/* - * Support the old "large page" layout used for 1-bit Hamming ECC where ECC - * are placed at a fixed offset. - */ -static int nand_ooblayout_ecc_lp_hamming(struct mtd_info *mtd, int section, - struct mtd_oob_region *oobregion) -{ - struct nand_chip *chip = mtd_to_nand(mtd); - struct nand_ecc_ctrl *ecc = &chip->ecc; - - if (section) - return -ERANGE; - - switch (mtd->oobsize) { - case 64: - oobregion->offset = 40; - break; - case 128: - oobregion->offset = 80; - break; - default: - return -EINVAL; - } - - oobregion->length = ecc->total; - if (oobregion->offset + oobregion->length > mtd->oobsize) - return -ERANGE; - - return 0; -} - -static int nand_ooblayout_free_lp_hamming(struct mtd_info *mtd, int section, - struct mtd_oob_region *oobregion) -{ - struct nand_chip *chip = mtd_to_nand(mtd); - struct nand_ecc_ctrl *ecc = &chip->ecc; - int ecc_offset = 0; - - if (section < 0 || section > 1) - return -ERANGE; - - switch (mtd->oobsize) { - case 64: - ecc_offset = 40; - break; - case 128: - ecc_offset = 80; - break; - default: - return -EINVAL; - } - - if (section == 0) { - oobregion->offset = 2; - oobregion->length = ecc_offset - 2; - } else { - oobregion->offset = ecc_offset + ecc->total; - oobregion->length = mtd->oobsize - oobregion->offset; - } - - return 0; -} - -static const struct mtd_ooblayout_ops nand_ooblayout_lp_hamming_ops = { - .ecc = nand_ooblayout_ecc_lp_hamming, - .free = nand_ooblayout_free_lp_hamming, -}; - static int nand_pairing_dist3_get_info(struct mtd_info *mtd, int page, struct mtd_pairing_info *info) { @@ -4750,6 +4591,8 @@ static inline bool is_full_id_nand(struct nand_flash_dev *type) static bool find_full_id_nand(struct nand_chip *chip, struct nand_flash_dev *type) { + struct nand_device *base = &chip->base; + struct nand_ecc_props requirements; struct mtd_info *mtd = nand_to_mtd(chip); struct nand_memory_organization *memorg; u8 *id_data = chip->id.data; @@ -4771,8 +4614,9 @@ static bool find_full_id_nand(struct nand_chip *chip, memorg->pagesize * memorg->pages_per_eraseblock); chip->options |= type->options; - chip->base.eccreq.strength = NAND_ECC_STRENGTH(type); - chip->base.eccreq.step_size = NAND_ECC_STEP(type); + requirements.strength = NAND_ECC_STRENGTH(type); + requirements.step_size = NAND_ECC_STEP(type); + nanddev_set_ecc_requirements(base, &requirements); chip->parameters.model = kstrdup(type->name, GFP_KERNEL); if (!chip->parameters.model) @@ -5033,91 +4877,101 @@ free_detect_allocation: return ret; } -static const char * const nand_ecc_modes[] = { - [NAND_ECC_NONE] = "none", - [NAND_ECC_SOFT] = "soft", - [NAND_ECC_HW] = "hw", - [NAND_ECC_HW_SYNDROME] = "hw_syndrome", - [NAND_ECC_ON_DIE] = "on-die", -}; - -static int of_get_nand_ecc_mode(struct device_node *np) +static enum nand_ecc_engine_type +of_get_rawnand_ecc_engine_type_legacy(struct device_node *np) { - const char *pm; - int err, i; - - err = of_property_read_string(np, "nand-ecc-mode", &pm); - if (err < 0) - return err; - - for (i = NAND_ECC_NONE; i < ARRAY_SIZE(nand_ecc_modes); i++) - if (!strcasecmp(pm, nand_ecc_modes[i])) - return i; - - /* - * For backward compatibility we support few obsoleted values that don't - * have their mappings into the nand_ecc_mode enum anymore (they were - * merged with other enums). - */ - if (!strcasecmp(pm, "soft_bch")) - return NAND_ECC_SOFT; - - return -ENODEV; -} - -static const char * const nand_ecc_algos[] = { - [NAND_ECC_HAMMING] = "hamming", - [NAND_ECC_BCH] = "bch", - [NAND_ECC_RS] = "rs", -}; - -static enum nand_ecc_algo of_get_nand_ecc_algo(struct device_node *np) -{ - enum nand_ecc_algo ecc_algo; + enum nand_ecc_legacy_mode { + NAND_ECC_INVALID, + NAND_ECC_NONE, + NAND_ECC_SOFT, + NAND_ECC_SOFT_BCH, + NAND_ECC_HW, + NAND_ECC_HW_SYNDROME, + NAND_ECC_ON_DIE, + }; + const char * const nand_ecc_legacy_modes[] = { + [NAND_ECC_NONE] = "none", + [NAND_ECC_SOFT] = "soft", + [NAND_ECC_SOFT_BCH] = "soft_bch", + [NAND_ECC_HW] = "hw", + [NAND_ECC_HW_SYNDROME] = "hw_syndrome", + [NAND_ECC_ON_DIE] = "on-die", + }; + enum nand_ecc_legacy_mode eng_type; const char *pm; int err; - err = of_property_read_string(np, "nand-ecc-algo", &pm); - if (!err) { - for (ecc_algo = NAND_ECC_HAMMING; - ecc_algo < ARRAY_SIZE(nand_ecc_algos); - ecc_algo++) { - if (!strcasecmp(pm, nand_ecc_algos[ecc_algo])) - return ecc_algo; + err = of_property_read_string(np, "nand-ecc-mode", &pm); + if (err) + return NAND_ECC_ENGINE_TYPE_INVALID; + + for (eng_type = NAND_ECC_NONE; + eng_type < ARRAY_SIZE(nand_ecc_legacy_modes); eng_type++) { + if (!strcasecmp(pm, nand_ecc_legacy_modes[eng_type])) { + switch (eng_type) { + case NAND_ECC_NONE: + return NAND_ECC_ENGINE_TYPE_NONE; + case NAND_ECC_SOFT: + case NAND_ECC_SOFT_BCH: + return NAND_ECC_ENGINE_TYPE_SOFT; + case NAND_ECC_HW: + case NAND_ECC_HW_SYNDROME: + return NAND_ECC_ENGINE_TYPE_ON_HOST; + case NAND_ECC_ON_DIE: + return NAND_ECC_ENGINE_TYPE_ON_DIE; + default: + break; + } } } - /* - * For backward compatibility we also read "nand-ecc-mode" checking - * for some obsoleted values that were specifying ECC algorithm. - */ + return NAND_ECC_ENGINE_TYPE_INVALID; +} + +static enum nand_ecc_placement +of_get_rawnand_ecc_placement_legacy(struct device_node *np) +{ + const char *pm; + int err; + + err = of_property_read_string(np, "nand-ecc-mode", &pm); + if (!err) { + if (!strcasecmp(pm, "hw_syndrome")) + return NAND_ECC_PLACEMENT_INTERLEAVED; + } + + return NAND_ECC_PLACEMENT_UNKNOWN; +} + +static enum nand_ecc_algo of_get_rawnand_ecc_algo_legacy(struct device_node *np) +{ + const char *pm; + int err; + err = of_property_read_string(np, "nand-ecc-mode", &pm); if (!err) { if (!strcasecmp(pm, "soft")) - return NAND_ECC_HAMMING; + return NAND_ECC_ALGO_HAMMING; else if (!strcasecmp(pm, "soft_bch")) - return NAND_ECC_BCH; + return NAND_ECC_ALGO_BCH; } - return NAND_ECC_UNKNOWN; + return NAND_ECC_ALGO_UNKNOWN; } -static int of_get_nand_ecc_step_size(struct device_node *np) +static void of_get_nand_ecc_legacy_user_config(struct nand_chip *chip) { - int ret; - u32 val; + struct device_node *dn = nand_get_flash_node(chip); + struct nand_ecc_props *user_conf = &chip->base.ecc.user_conf; - ret = of_property_read_u32(np, "nand-ecc-step-size", &val); - return ret ? ret : val; -} + if (user_conf->engine_type == NAND_ECC_ENGINE_TYPE_INVALID) + user_conf->engine_type = of_get_rawnand_ecc_engine_type_legacy(dn); -static int of_get_nand_ecc_strength(struct device_node *np) -{ - int ret; - u32 val; + if (user_conf->algo == NAND_ECC_ALGO_UNKNOWN) + user_conf->algo = of_get_rawnand_ecc_algo_legacy(dn); - ret = of_property_read_u32(np, "nand-ecc-strength", &val); - return ret ? ret : val; + if (user_conf->placement == NAND_ECC_PLACEMENT_UNKNOWN) + user_conf->placement = of_get_rawnand_ecc_placement_legacy(dn); } static int of_get_nand_bus_width(struct device_node *np) @@ -5141,11 +4995,10 @@ static bool of_get_nand_on_flash_bbt(struct device_node *np) return of_property_read_bool(np, "nand-on-flash-bbt"); } -static int nand_dt_init(struct nand_chip *chip) +static int rawnand_dt_init(struct nand_chip *chip) { + struct nand_device *nand = mtd_to_nanddev(nand_to_mtd(chip)); struct device_node *dn = nand_get_flash_node(chip); - enum nand_ecc_algo ecc_algo; - int ecc_mode, ecc_strength, ecc_step; if (!dn) return 0; @@ -5159,25 +5012,29 @@ static int nand_dt_init(struct nand_chip *chip) if (of_get_nand_on_flash_bbt(dn)) chip->bbt_options |= NAND_BBT_USE_FLASH; - ecc_mode = of_get_nand_ecc_mode(dn); - ecc_algo = of_get_nand_ecc_algo(dn); - ecc_strength = of_get_nand_ecc_strength(dn); - ecc_step = of_get_nand_ecc_step_size(dn); + of_get_nand_ecc_user_config(nand); + of_get_nand_ecc_legacy_user_config(chip); - if (ecc_mode >= 0) - chip->ecc.mode = ecc_mode; + /* + * If neither the user nor the NAND controller have requested a specific + * ECC engine type, we will default to NAND_ECC_ENGINE_TYPE_ON_HOST. + */ + nand->ecc.defaults.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; - if (ecc_algo != NAND_ECC_UNKNOWN) - chip->ecc.algo = ecc_algo; + /* + * Use the user requested engine type, unless there is none, in this + * case default to the NAND controller choice, otherwise fallback to + * the raw NAND default one. + */ + if (nand->ecc.user_conf.engine_type != NAND_ECC_ENGINE_TYPE_INVALID) + chip->ecc.engine_type = nand->ecc.user_conf.engine_type; + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_INVALID) + chip->ecc.engine_type = nand->ecc.defaults.engine_type; - if (ecc_strength >= 0) - chip->ecc.strength = ecc_strength; - - if (ecc_step > 0) - chip->ecc.size = ecc_step; - - if (of_property_read_bool(dn, "nand-ecc-maximize")) - chip->ecc.options |= NAND_ECC_MAXIMIZE; + chip->ecc.placement = nand->ecc.user_conf.placement; + chip->ecc.algo = nand->ecc.user_conf.algo; + chip->ecc.strength = nand->ecc.user_conf.strength; + chip->ecc.size = nand->ecc.user_conf.step_size; return 0; } @@ -5215,7 +5072,7 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips, /* Enforce the right timings for reset/detection */ chip->current_interface_config = nand_get_reset_interface_config(); - ret = nand_dt_init(chip); + ret = rawnand_dt_init(chip); if (ret) return ret; @@ -5282,16 +5139,76 @@ static void nand_scan_ident_cleanup(struct nand_chip *chip) kfree(chip->parameters.onfi); } +static int nand_set_ecc_on_host_ops(struct nand_chip *chip) +{ + struct nand_ecc_ctrl *ecc = &chip->ecc; + + switch (ecc->placement) { + case NAND_ECC_PLACEMENT_UNKNOWN: + case NAND_ECC_PLACEMENT_OOB: + /* Use standard hwecc read page function? */ + if (!ecc->read_page) + ecc->read_page = nand_read_page_hwecc; + if (!ecc->write_page) + ecc->write_page = nand_write_page_hwecc; + if (!ecc->read_page_raw) + ecc->read_page_raw = nand_read_page_raw; + if (!ecc->write_page_raw) + ecc->write_page_raw = nand_write_page_raw; + if (!ecc->read_oob) + ecc->read_oob = nand_read_oob_std; + if (!ecc->write_oob) + ecc->write_oob = nand_write_oob_std; + if (!ecc->read_subpage) + ecc->read_subpage = nand_read_subpage; + if (!ecc->write_subpage && ecc->hwctl && ecc->calculate) + ecc->write_subpage = nand_write_subpage_hwecc; + fallthrough; + + case NAND_ECC_PLACEMENT_INTERLEAVED: + if ((!ecc->calculate || !ecc->correct || !ecc->hwctl) && + (!ecc->read_page || + ecc->read_page == nand_read_page_hwecc || + !ecc->write_page || + ecc->write_page == nand_write_page_hwecc)) { + WARN(1, "No ECC functions supplied; hardware ECC not possible\n"); + return -EINVAL; + } + /* Use standard syndrome read/write page function? */ + if (!ecc->read_page) + ecc->read_page = nand_read_page_syndrome; + if (!ecc->write_page) + ecc->write_page = nand_write_page_syndrome; + if (!ecc->read_page_raw) + ecc->read_page_raw = nand_read_page_raw_syndrome; + if (!ecc->write_page_raw) + ecc->write_page_raw = nand_write_page_raw_syndrome; + if (!ecc->read_oob) + ecc->read_oob = nand_read_oob_syndrome; + if (!ecc->write_oob) + ecc->write_oob = nand_write_oob_syndrome; + break; + + default: + pr_warn("Invalid NAND_ECC_PLACEMENT %d\n", + ecc->placement); + return -EINVAL; + } + + return 0; +} + static int nand_set_ecc_soft_ops(struct nand_chip *chip) { struct mtd_info *mtd = nand_to_mtd(chip); + struct nand_device *nanddev = mtd_to_nanddev(mtd); struct nand_ecc_ctrl *ecc = &chip->ecc; - if (WARN_ON(ecc->mode != NAND_ECC_SOFT)) + if (WARN_ON(ecc->engine_type != NAND_ECC_ENGINE_TYPE_SOFT)) return -EINVAL; switch (ecc->algo) { - case NAND_ECC_HAMMING: + case NAND_ECC_ALGO_HAMMING: ecc->calculate = nand_calculate_ecc; ecc->correct = nand_correct_data; ecc->read_page = nand_read_page_swecc; @@ -5312,7 +5229,7 @@ static int nand_set_ecc_soft_ops(struct nand_chip *chip) ecc->options |= NAND_ECC_SOFT_HAMMING_SM_ORDER; return 0; - case NAND_ECC_BCH: + case NAND_ECC_ALGO_BCH: if (!mtd_nand_has_bch()) { WARN(1, "CONFIG_MTD_NAND_ECC_SW_BCH not enabled\n"); return -EINVAL; @@ -5350,7 +5267,7 @@ static int nand_set_ecc_soft_ops(struct nand_chip *chip) return -EINVAL; } - mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); + mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout()); } @@ -5359,8 +5276,8 @@ static int nand_set_ecc_soft_ops(struct nand_chip *chip) * used, otherwise we don't know how many bytes can really be * used. */ - if (mtd->ooblayout == &nand_ooblayout_lp_ops && - ecc->options & NAND_ECC_MAXIMIZE) { + if (mtd->ooblayout == nand_get_large_page_ooblayout() && + nanddev->ecc.user_conf.flags & NAND_ECC_MAXIMIZE_STRENGTH) { int steps, bytes; /* Always prefer 1k blocks over 512bytes ones */ @@ -5454,10 +5371,12 @@ static int nand_match_ecc_req(struct nand_chip *chip, const struct nand_ecc_caps *caps, int oobavail) { + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(&chip->base); struct mtd_info *mtd = nand_to_mtd(chip); const struct nand_ecc_step_info *stepinfo; - int req_step = chip->base.eccreq.step_size; - int req_strength = chip->base.eccreq.strength; + int req_step = requirements->step_size; + int req_strength = requirements->strength; int req_corr, step_size, strength, nsteps, ecc_bytes, ecc_bytes_total; int best_step, best_strength, best_ecc_bytes; int best_ecc_bytes_total = INT_MAX; @@ -5598,11 +5517,12 @@ nand_maximize_ecc(struct nand_chip *chip, * @caps: ECC engine caps info structure * @oobavail: OOB size that the ECC engine can use * - * Choose the ECC configuration according to following logic + * Choose the ECC configuration according to following logic. * * 1. If both ECC step size and ECC strength are already set (usually by DT) * then check if it is supported by this controller. - * 2. If NAND_ECC_MAXIMIZE is set, then select maximum ECC strength. + * 2. If the user provided the nand-ecc-maximize property, then select maximum + * ECC strength. * 3. Otherwise, try to match the ECC step size and ECC strength closest * to the chip's requirement. If available OOB size can't fit the chip * requirement then fallback to the maximum ECC step size and ECC strength. @@ -5613,6 +5533,7 @@ int nand_ecc_choose_conf(struct nand_chip *chip, const struct nand_ecc_caps *caps, int oobavail) { struct mtd_info *mtd = nand_to_mtd(chip); + struct nand_device *nanddev = mtd_to_nanddev(mtd); if (WARN_ON(oobavail < 0 || oobavail > mtd->oobsize)) return -EINVAL; @@ -5620,7 +5541,7 @@ int nand_ecc_choose_conf(struct nand_chip *chip, if (chip->ecc.size && chip->ecc.strength) return nand_check_ecc_caps(chip, caps, oobavail); - if (chip->ecc.options & NAND_ECC_MAXIMIZE) + if (nanddev->ecc.user_conf.flags & NAND_ECC_MAXIMIZE_STRENGTH) return nand_maximize_ecc(chip, caps, oobavail); if (!nand_match_ecc_req(chip, caps, oobavail)) @@ -5630,41 +5551,6 @@ int nand_ecc_choose_conf(struct nand_chip *chip, } EXPORT_SYMBOL_GPL(nand_ecc_choose_conf); -/* - * Check if the chip configuration meet the datasheet requirements. - - * If our configuration corrects A bits per B bytes and the minimum - * required correction level is X bits per Y bytes, then we must ensure - * both of the following are true: - * - * (1) A / B >= X / Y - * (2) A >= X - * - * Requirement (1) ensures we can correct for the required bitflip density. - * Requirement (2) ensures we can correct even when all bitflips are clumped - * in the same sector. - */ -static bool nand_ecc_strength_good(struct nand_chip *chip) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - struct nand_ecc_ctrl *ecc = &chip->ecc; - int corr, ds_corr; - - if (ecc->size == 0 || chip->base.eccreq.step_size == 0) - /* Not enough information */ - return true; - - /* - * We get the number of corrected bits per page to compare - * the correction density. - */ - corr = (mtd->writesize * ecc->strength) / ecc->size; - ds_corr = (mtd->writesize * chip->base.eccreq.strength) / - chip->base.eccreq.step_size; - - return corr >= ds_corr && ecc->strength >= chip->base.eccreq.strength; -} - static int rawnand_erase(struct nand_device *nand, const struct nand_pos *pos) { struct nand_chip *chip = container_of(nand, struct nand_chip, @@ -5752,15 +5638,17 @@ static int nand_scan_tail(struct nand_chip *chip) * If no default placement scheme is given, select an appropriate one. */ if (!mtd->ooblayout && - !(ecc->mode == NAND_ECC_SOFT && ecc->algo == NAND_ECC_BCH)) { + !(ecc->engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + ecc->algo == NAND_ECC_ALGO_BCH)) { switch (mtd->oobsize) { case 8: case 16: - mtd_set_ooblayout(mtd, &nand_ooblayout_sp_ops); + mtd_set_ooblayout(mtd, nand_get_small_page_ooblayout()); break; case 64: case 128: - mtd_set_ooblayout(mtd, &nand_ooblayout_lp_hamming_ops); + mtd_set_ooblayout(mtd, + nand_get_large_page_hamming_ooblayout()); break; default: /* @@ -5770,9 +5658,9 @@ static int nand_scan_tail(struct nand_chip *chip) * page with ECC layout when ->oobsize <= 128 for * compatibility reasons. */ - if (ecc->mode == NAND_ECC_NONE) { + if (ecc->engine_type == NAND_ECC_ENGINE_TYPE_NONE) { mtd_set_ooblayout(mtd, - &nand_ooblayout_lp_ops); + nand_get_large_page_ooblayout()); break; } @@ -5788,49 +5676,11 @@ static int nand_scan_tail(struct nand_chip *chip) * selected and we have 256 byte pagesize fallback to software ECC */ - switch (ecc->mode) { - case NAND_ECC_HW: - /* Use standard hwecc read page function? */ - if (!ecc->read_page) - ecc->read_page = nand_read_page_hwecc; - if (!ecc->write_page) - ecc->write_page = nand_write_page_hwecc; - if (!ecc->read_page_raw) - ecc->read_page_raw = nand_read_page_raw; - if (!ecc->write_page_raw) - ecc->write_page_raw = nand_write_page_raw; - if (!ecc->read_oob) - ecc->read_oob = nand_read_oob_std; - if (!ecc->write_oob) - ecc->write_oob = nand_write_oob_std; - if (!ecc->read_subpage) - ecc->read_subpage = nand_read_subpage; - if (!ecc->write_subpage && ecc->hwctl && ecc->calculate) - ecc->write_subpage = nand_write_subpage_hwecc; - fallthrough; - case NAND_ECC_HW_SYNDROME: - if ((!ecc->calculate || !ecc->correct || !ecc->hwctl) && - (!ecc->read_page || - ecc->read_page == nand_read_page_hwecc || - !ecc->write_page || - ecc->write_page == nand_write_page_hwecc)) { - WARN(1, "No ECC functions supplied; hardware ECC not possible\n"); - ret = -EINVAL; + switch (ecc->engine_type) { + case NAND_ECC_ENGINE_TYPE_ON_HOST: + ret = nand_set_ecc_on_host_ops(chip); + if (ret) goto err_nand_manuf_cleanup; - } - /* Use standard syndrome read/write page function? */ - if (!ecc->read_page) - ecc->read_page = nand_read_page_syndrome; - if (!ecc->write_page) - ecc->write_page = nand_write_page_syndrome; - if (!ecc->read_page_raw) - ecc->read_page_raw = nand_read_page_raw_syndrome; - if (!ecc->write_page_raw) - ecc->write_page_raw = nand_write_page_raw_syndrome; - if (!ecc->read_oob) - ecc->read_oob = nand_read_oob_syndrome; - if (!ecc->write_oob) - ecc->write_oob = nand_write_oob_syndrome; if (mtd->writesize >= ecc->size) { if (!ecc->strength) { @@ -5842,18 +5692,17 @@ static int nand_scan_tail(struct nand_chip *chip) } pr_warn("%d byte HW ECC not possible on %d byte page size, fallback to SW ECC\n", ecc->size, mtd->writesize); - ecc->mode = NAND_ECC_SOFT; - ecc->algo = NAND_ECC_HAMMING; + ecc->engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + ecc->algo = NAND_ECC_ALGO_HAMMING; fallthrough; - case NAND_ECC_SOFT: + + case NAND_ECC_ENGINE_TYPE_SOFT: ret = nand_set_ecc_soft_ops(chip); - if (ret) { - ret = -EINVAL; + if (ret) goto err_nand_manuf_cleanup; - } break; - case NAND_ECC_ON_DIE: + case NAND_ECC_ENGINE_TYPE_ON_DIE: if (!ecc->read_page || !ecc->write_page) { WARN(1, "No ECC functions supplied; on-die ECC not possible\n"); ret = -EINVAL; @@ -5865,8 +5714,8 @@ static int nand_scan_tail(struct nand_chip *chip) ecc->write_oob = nand_write_oob_std; break; - case NAND_ECC_NONE: - pr_warn("NAND_ECC_NONE selected by board driver. This is not recommended!\n"); + case NAND_ECC_ENGINE_TYPE_NONE: + pr_warn("NAND_ECC_ENGINE_TYPE_NONE selected by board driver. This is not recommended!\n"); ecc->read_page = nand_read_page_raw; ecc->write_page = nand_write_page_raw; ecc->read_oob = nand_read_oob_std; @@ -5879,7 +5728,7 @@ static int nand_scan_tail(struct nand_chip *chip) break; default: - WARN(1, "Invalid NAND_ECC_MODE %d\n", ecc->mode); + WARN(1, "Invalid NAND_ECC_MODE %d\n", ecc->engine_type); ret = -EINVAL; goto err_nand_manuf_cleanup; } @@ -5913,7 +5762,10 @@ static int nand_scan_tail(struct nand_chip *chip) ret = -EINVAL; goto err_nand_manuf_cleanup; } + ecc->total = ecc->steps * ecc->bytes; + chip->base.ecc.ctx.total = ecc->total; + if (ecc->total > mtd->oobsize) { WARN(1, "Total number of ECC bytes exceeded oobsize\n"); ret = -EINVAL; @@ -5931,11 +5783,11 @@ static int nand_scan_tail(struct nand_chip *chip) mtd->oobavail = ret; /* ECC sanity check: warn if it's too weak */ - if (!nand_ecc_strength_good(chip)) + if (!nand_ecc_is_strong_enough(&chip->base)) pr_warn("WARNING: %s: the ECC used on your system (%db/%dB) is too weak compared to the one required by the NAND chip (%db/%dB)\n", mtd->name, chip->ecc.strength, chip->ecc.size, - chip->base.eccreq.strength, - chip->base.eccreq.step_size); + nanddev_get_ecc_requirements(&chip->base)->strength, + nanddev_get_ecc_requirements(&chip->base)->step_size); /* Allow subpage writes up to ecc.steps. Not possible for MLC flash */ if (!(chip->options & NAND_NO_SUBPAGE_WRITE) && nand_is_slc(chip)) { @@ -5956,8 +5808,8 @@ static int nand_scan_tail(struct nand_chip *chip) chip->pagecache.page = -1; /* Large page NAND with SOFT_ECC should support subpage reads */ - switch (ecc->mode) { - case NAND_ECC_SOFT: + switch (ecc->engine_type) { + case NAND_ECC_ENGINE_TYPE_SOFT: if (chip->page_shift > 9) chip->options |= NAND_SUBPAGE_READ; break; @@ -6101,8 +5953,8 @@ EXPORT_SYMBOL(nand_scan_with_ids); */ void nand_cleanup(struct nand_chip *chip) { - if (chip->ecc.mode == NAND_ECC_SOFT && - chip->ecc.algo == NAND_ECC_BCH) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + chip->ecc.algo == NAND_ECC_ALGO_BCH) nand_bch_free((struct nand_bch_control *)chip->ecc.priv); nanddev_cleanup(&chip->base); diff --git a/drivers/mtd/nand/raw/nand_bch.c b/drivers/mtd/nand/raw/nand_bch.c index d5af8c5fd02f..9d19ac14c196 100644 --- a/drivers/mtd/nand/raw/nand_bch.c +++ b/drivers/mtd/nand/raw/nand_bch.c @@ -165,6 +165,7 @@ struct nand_bch_control *nand_bch_init(struct mtd_info *mtd) */ nand->ecc.steps = eccsteps; nand->ecc.total = eccsteps * eccbytes; + nand->base.ecc.ctx.total = nand->ecc.total; if (mtd_ooblayout_count_eccbytes(mtd) != (eccsteps*eccbytes)) { pr_warn("invalid ecc layout\n"); goto fail; diff --git a/drivers/mtd/nand/raw/nand_esmt.c b/drivers/mtd/nand/raw/nand_esmt.c index 3338c68aaaf1..4412c407aef3 100644 --- a/drivers/mtd/nand/raw/nand_esmt.c +++ b/drivers/mtd/nand/raw/nand_esmt.c @@ -10,27 +10,32 @@ static void esmt_nand_decode_id(struct nand_chip *chip) { + struct nand_device *base = &chip->base; + struct nand_ecc_props requirements = {}; + nand_decode_ext_id(chip); /* Extract ECC requirements from 5th id byte. */ if (chip->id.len >= 5 && nand_is_slc(chip)) { - chip->base.eccreq.step_size = 512; + requirements.step_size = 512; switch (chip->id.data[4] & 0x3) { case 0x0: - chip->base.eccreq.strength = 4; + requirements.strength = 4; break; case 0x1: - chip->base.eccreq.strength = 2; + requirements.strength = 2; break; case 0x2: - chip->base.eccreq.strength = 1; + requirements.strength = 1; break; default: WARN(1, "Could not get ECC info"); - chip->base.eccreq.step_size = 0; + requirements.step_size = 0; break; } } + + nanddev_set_ecc_requirements(base, &requirements); } static int esmt_nand_init(struct nand_chip *chip) diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c index 6d08eb834456..a9f50c9af109 100644 --- a/drivers/mtd/nand/raw/nand_hynix.c +++ b/drivers/mtd/nand/raw/nand_hynix.c @@ -495,34 +495,36 @@ static void hynix_nand_extract_oobsize(struct nand_chip *chip, static void hynix_nand_extract_ecc_requirements(struct nand_chip *chip, bool valid_jedecid) { + struct nand_device *base = &chip->base; + struct nand_ecc_props requirements = {}; u8 ecc_level = (chip->id.data[4] >> 4) & 0x7; if (valid_jedecid) { /* Reference: H27UCG8T2E datasheet */ - chip->base.eccreq.step_size = 1024; + requirements.step_size = 1024; switch (ecc_level) { case 0: - chip->base.eccreq.step_size = 0; - chip->base.eccreq.strength = 0; + requirements.step_size = 0; + requirements.strength = 0; break; case 1: - chip->base.eccreq.strength = 4; + requirements.strength = 4; break; case 2: - chip->base.eccreq.strength = 24; + requirements.strength = 24; break; case 3: - chip->base.eccreq.strength = 32; + requirements.strength = 32; break; case 4: - chip->base.eccreq.strength = 40; + requirements.strength = 40; break; case 5: - chip->base.eccreq.strength = 50; + requirements.strength = 50; break; case 6: - chip->base.eccreq.strength = 60; + requirements.strength = 60; break; default: /* @@ -543,14 +545,14 @@ static void hynix_nand_extract_ecc_requirements(struct nand_chip *chip, if (nand_tech < 3) { /* > 26nm, reference: H27UBG8T2A datasheet */ if (ecc_level < 5) { - chip->base.eccreq.step_size = 512; - chip->base.eccreq.strength = 1 << ecc_level; + requirements.step_size = 512; + requirements.strength = 1 << ecc_level; } else if (ecc_level < 7) { if (ecc_level == 5) - chip->base.eccreq.step_size = 2048; + requirements.step_size = 2048; else - chip->base.eccreq.step_size = 1024; - chip->base.eccreq.strength = 24; + requirements.step_size = 1024; + requirements.strength = 24; } else { /* * We should never reach this case, but if that @@ -563,18 +565,20 @@ static void hynix_nand_extract_ecc_requirements(struct nand_chip *chip, } else { /* <= 26nm, reference: H27UBG8T2B datasheet */ if (!ecc_level) { - chip->base.eccreq.step_size = 0; - chip->base.eccreq.strength = 0; + requirements.step_size = 0; + requirements.strength = 0; } else if (ecc_level < 5) { - chip->base.eccreq.step_size = 512; - chip->base.eccreq.strength = 1 << (ecc_level - 1); + requirements.step_size = 512; + requirements.strength = 1 << (ecc_level - 1); } else { - chip->base.eccreq.step_size = 1024; - chip->base.eccreq.strength = 24 + + requirements.step_size = 1024; + requirements.strength = 24 + (8 * (ecc_level - 5)); } } } + + nanddev_set_ecc_requirements(base, &requirements); } static void hynix_nand_extract_scrambling_requirements(struct nand_chip *chip, diff --git a/drivers/mtd/nand/raw/nand_jedec.c b/drivers/mtd/nand/raw/nand_jedec.c index b15c42f48755..85b6d9372d80 100644 --- a/drivers/mtd/nand/raw/nand_jedec.c +++ b/drivers/mtd/nand/raw/nand_jedec.c @@ -23,6 +23,7 @@ */ int nand_jedec_detect(struct nand_chip *chip) { + struct nand_device *base = &chip->base; struct mtd_info *mtd = nand_to_mtd(chip); struct nand_memory_organization *memorg; struct nand_jedec_params *p; @@ -120,8 +121,12 @@ int nand_jedec_detect(struct nand_chip *chip) ecc = &p->ecc_info[0]; if (ecc->codeword_size >= 9) { - chip->base.eccreq.strength = ecc->ecc_bits; - chip->base.eccreq.step_size = 1 << ecc->codeword_size; + struct nand_ecc_props requirements = { + .strength = ecc->ecc_bits, + .step_size = 1 << ecc->codeword_size, + }; + + nanddev_set_ecc_requirements(base, &requirements); } else { pr_warn("Invalid codeword size\n"); } diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c index 4385092a9325..c0192881906b 100644 --- a/drivers/mtd/nand/raw/nand_micron.c +++ b/drivers/mtd/nand/raw/nand_micron.c @@ -413,6 +413,8 @@ enum { */ static int micron_supports_on_die_ecc(struct nand_chip *chip) { + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(&chip->base); u8 id[5]; int ret; @@ -425,7 +427,7 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip) /* * We only support on-die ECC of 4/512 or 8/512 */ - if (chip->base.eccreq.strength != 4 && chip->base.eccreq.strength != 8) + if (requirements->strength != 4 && requirements->strength != 8) return MICRON_ON_DIE_UNSUPPORTED; /* 0x2 means on-die ECC is available. */ @@ -466,7 +468,7 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip) /* * We only support on-die ECC of 4/512 or 8/512 */ - if (chip->base.eccreq.strength != 4 && chip->base.eccreq.strength != 8) + if (requirements->strength != 4 && requirements->strength != 8) return MICRON_ON_DIE_UNSUPPORTED; return MICRON_ON_DIE_SUPPORTED; @@ -474,6 +476,9 @@ static int micron_supports_on_die_ecc(struct nand_chip *chip) static int micron_nand_init(struct nand_chip *chip) { + struct nand_device *base = &chip->base; + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(base); struct mtd_info *mtd = nand_to_mtd(chip); struct micron_nand *micron; int ondie; @@ -497,13 +502,13 @@ static int micron_nand_init(struct nand_chip *chip) ondie = micron_supports_on_die_ecc(chip); if (ondie == MICRON_ON_DIE_MANDATORY && - chip->ecc.mode != NAND_ECC_ON_DIE) { + chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_DIE) { pr_err("On-die ECC forcefully enabled, not supported\n"); ret = -EINVAL; goto err_free_manuf_data; } - if (chip->ecc.mode == NAND_ECC_ON_DIE) { + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_DIE) { if (ondie == MICRON_ON_DIE_UNSUPPORTED) { pr_err("On-die ECC selected but not supported\n"); ret = -EINVAL; @@ -523,7 +528,7 @@ static int micron_nand_init(struct nand_chip *chip) * That's not needed for 8-bit ECC, because the status expose * a better approximation of the number of bitflips in a page. */ - if (chip->base.eccreq.strength == 4) { + if (requirements->strength == 4) { micron->ecc.rawbuf = kmalloc(mtd->writesize + mtd->oobsize, GFP_KERNEL); @@ -533,17 +538,17 @@ static int micron_nand_init(struct nand_chip *chip) } } - if (chip->base.eccreq.strength == 4) + if (requirements->strength == 4) mtd_set_ooblayout(mtd, µn_nand_on_die_4_ooblayout_ops); else mtd_set_ooblayout(mtd, µn_nand_on_die_8_ooblayout_ops); - chip->ecc.bytes = chip->base.eccreq.strength * 2; + chip->ecc.bytes = requirements->strength * 2; chip->ecc.size = 512; - chip->ecc.strength = chip->base.eccreq.strength; - chip->ecc.algo = NAND_ECC_BCH; + chip->ecc.strength = requirements->strength; + chip->ecc.algo = NAND_ECC_ALGO_BCH; chip->ecc.read_page = micron_nand_read_page_on_die_ecc; chip->ecc.write_page = micron_nand_write_page_on_die_ecc; diff --git a/drivers/mtd/nand/raw/nand_onfi.c b/drivers/mtd/nand/raw/nand_onfi.c index be3456627288..45649e03797d 100644 --- a/drivers/mtd/nand/raw/nand_onfi.c +++ b/drivers/mtd/nand/raw/nand_onfi.c @@ -34,6 +34,8 @@ u16 onfi_crc16(u16 crc, u8 const *p, size_t len) static int nand_flash_detect_ext_param_page(struct nand_chip *chip, struct nand_onfi_params *p) { + struct nand_device *base = &chip->base; + struct nand_ecc_props requirements; struct onfi_ext_param_page *ep; struct onfi_ext_section *s; struct onfi_ext_ecc_info *ecc; @@ -94,8 +96,10 @@ static int nand_flash_detect_ext_param_page(struct nand_chip *chip, goto ext_out; } - chip->base.eccreq.strength = ecc->ecc_bits; - chip->base.eccreq.step_size = 1 << ecc->codeword_size; + requirements.strength = ecc->ecc_bits; + requirements.step_size = 1 << ecc->codeword_size; + nanddev_set_ecc_requirements(base, &requirements); + ret = 0; ext_out: @@ -139,6 +143,7 @@ static void nand_bit_wise_majority(const void **srcbufs, */ int nand_onfi_detect(struct nand_chip *chip) { + struct nand_device *base = &chip->base; struct mtd_info *mtd = nand_to_mtd(chip); struct nand_memory_organization *memorg; struct nand_onfi_params *p = NULL, *pbuf; @@ -265,8 +270,12 @@ int nand_onfi_detect(struct nand_chip *chip) chip->options |= NAND_BUSWIDTH_16; if (p->ecc_bits != 0xff) { - chip->base.eccreq.strength = p->ecc_bits; - chip->base.eccreq.step_size = 512; + struct nand_ecc_props requirements = { + .strength = p->ecc_bits, + .step_size = 512, + }; + + nanddev_set_ecc_requirements(base, &requirements); } else if (onfi_version >= 21 && (le16_to_cpu(p->features) & ONFI_FEATURE_EXT_PARAM_PAGE)) { diff --git a/drivers/mtd/nand/raw/nand_samsung.c b/drivers/mtd/nand/raw/nand_samsung.c index 3a4a19e808f6..0be6b7563805 100644 --- a/drivers/mtd/nand/raw/nand_samsung.c +++ b/drivers/mtd/nand/raw/nand_samsung.c @@ -10,6 +10,8 @@ static void samsung_nand_decode_id(struct nand_chip *chip) { + struct nand_device *base = &chip->base; + struct nand_ecc_props requirements = {}; struct mtd_info *mtd = nand_to_mtd(chip); struct nand_memory_organization *memorg; @@ -71,23 +73,23 @@ static void samsung_nand_decode_id(struct nand_chip *chip) /* Extract ECC requirements from 5th id byte*/ extid = (chip->id.data[4] >> 4) & 0x07; if (extid < 5) { - chip->base.eccreq.step_size = 512; - chip->base.eccreq.strength = 1 << extid; + requirements.step_size = 512; + requirements.strength = 1 << extid; } else { - chip->base.eccreq.step_size = 1024; + requirements.step_size = 1024; switch (extid) { case 5: - chip->base.eccreq.strength = 24; + requirements.strength = 24; break; case 6: - chip->base.eccreq.strength = 40; + requirements.strength = 40; break; case 7: - chip->base.eccreq.strength = 60; + requirements.strength = 60; break; default: WARN(1, "Could not decode ECC info"); - chip->base.eccreq.step_size = 0; + requirements.step_size = 0; } } } else { @@ -97,8 +99,8 @@ static void samsung_nand_decode_id(struct nand_chip *chip) switch (chip->id.data[1]) { /* K9F4G08U0D-S[I|C]B0(T00) */ case 0xDC: - chip->base.eccreq.step_size = 512; - chip->base.eccreq.strength = 1; + requirements.step_size = 512; + requirements.strength = 1; break; /* K9F1G08U0E 21nm chips do not support subpage write */ @@ -112,6 +114,8 @@ static void samsung_nand_decode_id(struct nand_chip *chip) } } } + + nanddev_set_ecc_requirements(base, &requirements); } static int samsung_nand_init(struct nand_chip *chip) diff --git a/drivers/mtd/nand/raw/nand_toshiba.c b/drivers/mtd/nand/raw/nand_toshiba.c index f746c19f3b2c..cf4f37959421 100644 --- a/drivers/mtd/nand/raw/nand_toshiba.c +++ b/drivers/mtd/nand/raw/nand_toshiba.c @@ -140,11 +140,13 @@ static void toshiba_nand_benand_init(struct nand_chip *chip) chip->options |= NAND_SUBPAGE_READ; - mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); + mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout()); } static void toshiba_nand_decode_id(struct nand_chip *chip) { + struct nand_device *base = &chip->base; + struct nand_ecc_props requirements = {}; struct mtd_info *mtd = nand_to_mtd(chip); struct nand_memory_organization *memorg; @@ -175,23 +177,25 @@ static void toshiba_nand_decode_id(struct nand_chip *chip) * - 24nm: 8 bit ECC for each 512Byte is required. */ if (chip->id.len >= 6 && nand_is_slc(chip)) { - chip->base.eccreq.step_size = 512; + requirements.step_size = 512; switch (chip->id.data[5] & 0x7) { case 0x4: - chip->base.eccreq.strength = 1; + requirements.strength = 1; break; case 0x5: - chip->base.eccreq.strength = 4; + requirements.strength = 4; break; case 0x6: - chip->base.eccreq.strength = 8; + requirements.strength = 8; break; default: WARN(1, "Could not get ECC info"); - chip->base.eccreq.step_size = 0; + requirements.step_size = 0; break; } } + + nanddev_set_ecc_requirements(base, &requirements); } static int @@ -273,7 +277,8 @@ static int toshiba_nand_init(struct nand_chip *chip) chip->options |= NAND_BBM_FIRSTPAGE | NAND_BBM_SECONDPAGE; /* Check that chip is BENAND and ECC mode is on-die */ - if (nand_is_slc(chip) && chip->ecc.mode == NAND_ECC_ON_DIE && + if (nand_is_slc(chip) && + chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_DIE && chip->id.data[4] & TOSHIBA_NAND_ID4_IS_BENAND) toshiba_nand_benand_init(chip); diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c index f5a53aac3c5f..a8048cb8d220 100644 --- a/drivers/mtd/nand/raw/nandsim.c +++ b/drivers/mtd/nand/raw/nandsim.c @@ -2234,8 +2234,8 @@ static int ns_attach_chip(struct nand_chip *chip) return -EINVAL; } - chip->ecc.mode = NAND_ECC_SOFT; - chip->ecc.algo = NAND_ECC_BCH; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = NAND_ECC_ALGO_BCH; chip->ecc.size = 512; chip->ecc.strength = bch; chip->ecc.bytes = eccbytes; @@ -2274,8 +2274,8 @@ static int __init ns_init_module(void) nsmtd = nand_to_mtd(chip); nand_set_controller_data(chip, (void *)ns); - chip->ecc.mode = NAND_ECC_SOFT; - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; /* The NAND_SKIP_BBTSCAN option is necessary for 'overridesize' */ /* and 'badblocks' parameters to work */ chip->options |= NAND_SKIP_BBTSCAN; diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c index ed38338c1383..0fb4ba93c41e 100644 --- a/drivers/mtd/nand/raw/ndfc.c +++ b/drivers/mtd/nand/raw/ndfc.c @@ -149,7 +149,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc, chip->ecc.correct = nand_correct_data; chip->ecc.hwctl = ndfc_enable_hwecc; chip->ecc.calculate = ndfc_calculate_ecc; - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.size = 256; chip->ecc.bytes = 3; chip->ecc.strength = 1; diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c index eb7fcfd9276b..512f60780a50 100644 --- a/drivers/mtd/nand/raw/omap2.c +++ b/drivers/mtd/nand/raw/omap2.c @@ -884,8 +884,8 @@ static int omap_correct_data(struct nand_chip *chip, u_char *dat, int stat = 0; /* Ex NAND_ECC_HW12_2048 */ - if ((info->nand.ecc.mode == NAND_ECC_HW) && - (info->nand.ecc.size == 2048)) + if (info->nand.ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST && + info->nand.ecc.size == 2048) blockCnt = 4; else blockCnt = 1; @@ -2006,12 +2006,12 @@ static int omap_nand_attach_chip(struct nand_chip *chip) return -EINVAL; /* - * Bail out earlier to let NAND_ECC_SOFT code create its own + * Bail out earlier to let NAND_ECC_ENGINE_TYPE_SOFT code create its own * ooblayout instead of using ours. */ if (info->ecc_opt == OMAP_ECC_HAM1_CODE_SW) { - chip->ecc.mode = NAND_ECC_SOFT; - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; } @@ -2019,7 +2019,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip) switch (info->ecc_opt) { case OMAP_ECC_HAM1_CODE_HW: dev_info(dev, "nand: using OMAP_ECC_HAM1_CODE_HW\n"); - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.bytes = 3; chip->ecc.size = 512; chip->ecc.strength = 1; @@ -2036,7 +2036,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip) case OMAP_ECC_BCH4_CODE_HW_DETECTION_SW: pr_info("nand: using OMAP_ECC_BCH4_CODE_HW_DETECTION_SW\n"); - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.size = 512; chip->ecc.bytes = 7; chip->ecc.strength = 4; @@ -2056,7 +2056,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip) case OMAP_ECC_BCH4_CODE_HW: pr_info("nand: using OMAP_ECC_BCH4_CODE_HW ECC scheme\n"); - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.size = 512; /* 14th bit is kept reserved for ROM-code compatibility */ chip->ecc.bytes = 7 + 1; @@ -2078,7 +2078,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip) case OMAP_ECC_BCH8_CODE_HW_DETECTION_SW: pr_info("nand: using OMAP_ECC_BCH8_CODE_HW_DETECTION_SW\n"); - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.size = 512; chip->ecc.bytes = 13; chip->ecc.strength = 8; @@ -2098,7 +2098,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip) case OMAP_ECC_BCH8_CODE_HW: pr_info("nand: using OMAP_ECC_BCH8_CODE_HW ECC scheme\n"); - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.size = 512; /* 14th bit is kept reserved for ROM-code compatibility */ chip->ecc.bytes = 13 + 1; @@ -2121,7 +2121,7 @@ static int omap_nand_attach_chip(struct nand_chip *chip) case OMAP_ECC_BCH16_CODE_HW: pr_info("Using OMAP_ECC_BCH16_CODE_HW ECC scheme\n"); - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.size = 512; chip->ecc.bytes = 26; chip->ecc.strength = 16; diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c index 880b54ca1b41..df9c0f8e4b4e 100644 --- a/drivers/mtd/nand/raw/orion_nand.c +++ b/drivers/mtd/nand/raw/orion_nand.c @@ -139,8 +139,8 @@ static int __init orion_nand_probe(struct platform_device *pdev) nc->legacy.IO_ADDR_R = nc->legacy.IO_ADDR_W = io_base; nc->legacy.cmd_ctrl = orion_nand_cmd_ctrl; nc->legacy.read_buf = orion_nand_read_buf; - nc->ecc.mode = NAND_ECC_SOFT; - nc->ecc.algo = NAND_ECC_HAMMING; + nc->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + nc->ecc.algo = NAND_ECC_ALGO_HAMMING; if (board->chip_delay) nc->legacy.chip_delay = board->chip_delay; diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c index d8eca8c3fdcd..2b8f155cc0c5 100644 --- a/drivers/mtd/nand/raw/pasemi_nand.c +++ b/drivers/mtd/nand/raw/pasemi_nand.c @@ -68,7 +68,7 @@ static void pasemi_hwcontrol(struct nand_chip *chip, int cmd, inl(lpcctl); } -int pasemi_device_ready(struct nand_chip *chip) +static int pasemi_device_ready(struct nand_chip *chip) { return !!(inl(lpcctl) & LBICTRL_LPCCTL_NR); } @@ -132,8 +132,8 @@ static int pasemi_nand_probe(struct platform_device *ofdev) chip->legacy.read_buf = pasemi_read_buf; chip->legacy.write_buf = pasemi_write_buf; chip->legacy.chip_delay = 0; - chip->ecc.mode = NAND_ECC_SOFT; - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; /* Enable the following for a flash based bad block table */ chip->bbt_options = NAND_BBT_USE_FLASH; diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c index 556182f26057..b98c0d5c413f 100644 --- a/drivers/mtd/nand/raw/plat_nand.c +++ b/drivers/mtd/nand/raw/plat_nand.c @@ -66,8 +66,8 @@ static int plat_nand_probe(struct platform_device *pdev) data->chip.options |= pdata->chip.options; data->chip.bbt_options |= pdata->chip.bbt_options; - data->chip.ecc.mode = NAND_ECC_SOFT; - data->chip.ecc.algo = NAND_ECC_HAMMING; + data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + data->chip.ecc.algo = NAND_ECC_ALGO_HAMMING; platform_set_drvdata(pdev, data); diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index bd7a7251429b..777fb0de0680 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2550,7 +2550,7 @@ static int qcom_nand_attach_chip(struct nand_chip *chip) ecc->write_page_raw = qcom_nandc_write_page_raw; ecc->write_oob = qcom_nandc_write_oob; - ecc->mode = NAND_ECC_HW; + ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; mtd_set_ooblayout(mtd, &qcom_nand_ooblayout_ops); @@ -2702,10 +2702,8 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc) if (IS_ERR(nandc->tx_chan)) { ret = PTR_ERR(nandc->tx_chan); nandc->tx_chan = NULL; - if (ret != -EPROBE_DEFER) - dev_err(nandc->dev, - "tx DMA channel request failed: %d\n", - ret); + dev_err_probe(nandc->dev, ret, + "tx DMA channel request failed\n"); goto unalloc; } @@ -2713,10 +2711,8 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc) if (IS_ERR(nandc->rx_chan)) { ret = PTR_ERR(nandc->rx_chan); nandc->rx_chan = NULL; - if (ret != -EPROBE_DEFER) - dev_err(nandc->dev, - "rx DMA channel request failed: %d\n", - ret); + dev_err_probe(nandc->dev, ret, + "rx DMA channel request failed\n"); goto unalloc; } @@ -2724,10 +2720,8 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc) if (IS_ERR(nandc->cmd_chan)) { ret = PTR_ERR(nandc->cmd_chan); nandc->cmd_chan = NULL; - if (ret != -EPROBE_DEFER) - dev_err(nandc->dev, - "cmd DMA channel request failed: %d\n", - ret); + dev_err_probe(nandc->dev, ret, + "cmd DMA channel request failed\n"); goto unalloc; } @@ -2750,10 +2744,8 @@ static int qcom_nandc_alloc(struct qcom_nand_controller *nandc) if (IS_ERR(nandc->chan)) { ret = PTR_ERR(nandc->chan); nandc->chan = NULL; - if (ret != -EPROBE_DEFER) - dev_err(nandc->dev, - "rxtx DMA channel request failed: %d\n", - ret); + dev_err_probe(nandc->dev, ret, + "rxtx DMA channel request failed\n"); return ret; } } diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c index f865e3a47b01..6b7addd2c420 100644 --- a/drivers/mtd/nand/raw/r852.c +++ b/drivers/mtd/nand/raw/r852.c @@ -859,7 +859,8 @@ static int r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) chip->legacy.write_buf = r852_write_buf; /* ecc */ - chip->ecc.mode = NAND_ECC_HW_SYNDROME; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; + chip->ecc.placement = NAND_ECC_PLACEMENT_INTERLEAVED; chip->ecc.size = R852_DMA_LEN; chip->ecc.bytes = SM_OOB_SIZE; chip->ecc.strength = 2; diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c index 105522205979..fbd0fa48e063 100644 --- a/drivers/mtd/nand/raw/s3c2410.c +++ b/drivers/mtd/nand/raw/s3c2410.c @@ -904,7 +904,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info, nmtd->info = info; nmtd->set = set; - chip->ecc.mode = info->platform->ecc_mode; + chip->ecc.engine_type = info->platform->engine_type; /* * If you use u-boot BBT creation code, specifying this flag will @@ -929,24 +929,24 @@ static int s3c2410_nand_attach_chip(struct nand_chip *chip) struct mtd_info *mtd = nand_to_mtd(chip); struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd); - switch (chip->ecc.mode) { + switch (chip->ecc.engine_type) { - case NAND_ECC_NONE: + case NAND_ECC_ENGINE_TYPE_NONE: dev_info(info->device, "ECC disabled\n"); break; - case NAND_ECC_SOFT: + case NAND_ECC_ENGINE_TYPE_SOFT: /* - * This driver expects Hamming based ECC when ecc_mode is set - * to NAND_ECC_SOFT. Force ecc.algo to NAND_ECC_HAMMING to - * avoid adding an extra ecc_algo field to - * s3c2410_platform_nand. + * This driver expects Hamming based ECC when engine_type is set + * to NAND_ECC_ENGINE_TYPE_SOFT. Force ecc.algo to + * NAND_ECC_ALGO_HAMMING to avoid adding an extra ecc_algo field + * to s3c2410_platform_nand. */ - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; dev_info(info->device, "soft ECC\n"); break; - case NAND_ECC_HW: + case NAND_ECC_ENGINE_TYPE_ON_HOST: chip->ecc.calculate = s3c2410_nand_calculate_ecc; chip->ecc.correct = s3c2410_nand_correct_data; chip->ecc.strength = 1; diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c index a661b8bb2dd5..13df4bdf792a 100644 --- a/drivers/mtd/nand/raw/sh_flctl.c +++ b/drivers/mtd/nand/raw/sh_flctl.c @@ -1039,13 +1039,13 @@ static int flctl_chip_attach_chip(struct nand_chip *chip) chip->ecc.strength = 4; chip->ecc.read_page = flctl_read_page_hwecc; chip->ecc.write_page = flctl_write_page_hwecc; - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; /* 4 symbols ECC enabled */ flctl->flcmncr_base |= _4ECCEN; } else { - chip->ecc.mode = NAND_ECC_SOFT; - chip->ecc.algo = NAND_ECC_HAMMING; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + chip->ecc.algo = NAND_ECC_ALGO_HAMMING; } return 0; diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c index 51286f7acf54..1327bfb3d5d3 100644 --- a/drivers/mtd/nand/raw/sharpsl.c +++ b/drivers/mtd/nand/raw/sharpsl.c @@ -157,7 +157,7 @@ static int sharpsl_nand_probe(struct platform_device *pdev) /* 15 us command delay time */ this->legacy.chip_delay = 15; /* set eccmode using hardware ECC */ - this->ecc.mode = NAND_ECC_HW; + this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; this->ecc.size = 256; this->ecc.bytes = 3; this->ecc.strength = 1; diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c index 243b34cfbc1b..0f63ff6f7fe7 100644 --- a/drivers/mtd/nand/raw/socrates_nand.c +++ b/drivers/mtd/nand/raw/socrates_nand.c @@ -153,8 +153,9 @@ static int socrates_nand_probe(struct platform_device *ofdev) nand_chip->legacy.read_buf = socrates_nand_read_buf; nand_chip->legacy.dev_ready = socrates_nand_device_ready; - nand_chip->ecc.mode = NAND_ECC_SOFT; /* enable ECC */ - nand_chip->ecc.algo = NAND_ECC_HAMMING; + /* enable ECC */ + nand_chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + nand_chip->ecc.algo = NAND_ECC_ALGO_HAMMING; /* TODO: I have no idea what real delay is. */ nand_chip->legacy.chip_delay = 20; /* 20us command delay time */ diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c index 7f4546ae9130..b31a5818234d 100644 --- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c +++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c @@ -1696,14 +1696,15 @@ static int stm32_fmc2_nfc_attach_chip(struct nand_chip *chip) int ret; /* - * Only NAND_ECC_HW mode is actually supported + * Only NAND_ECC_ENGINE_TYPE_ON_HOST mode is actually supported * Hamming => ecc.strength = 1 * BCH4 => ecc.strength = 4 * BCH8 => ecc.strength = 8 * ECC sector size = 512 */ - if (chip->ecc.mode != NAND_ECC_HW) { - dev_err(nfc->dev, "nand_ecc_mode is not well defined in the DT\n"); + if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) { + dev_err(nfc->dev, + "nand_ecc_engine_type is not well defined in the DT\n"); return -EINVAL; } @@ -1762,7 +1763,7 @@ static int stm32_fmc2_nfc_parse_child(struct stm32_fmc2_nfc *nfc, return ret; } - if (cs > FMC2_MAX_CE) { + if (cs >= FMC2_MAX_CE) { dev_err(nfc->dev, "invalid reg value: %d\n", cs); return -EINVAL; } @@ -1952,7 +1953,7 @@ static int stm32_fmc2_nfc_probe(struct platform_device *pdev) NAND_USES_DMA; /* Default ECC settings */ - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.size = FMC2_ECC_STEP_SIZE; chip->ecc.strength = FMC2_ECC_BCH8; diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c index 9c50c2b965e1..2a7ca3072f35 100644 --- a/drivers/mtd/nand/raw/sunxi_nand.c +++ b/drivers/mtd/nand/raw/sunxi_nand.c @@ -1575,7 +1575,7 @@ static int sunxi_nand_ooblayout_free(struct mtd_info *mtd, int section, * only have 2 bytes available in the first user data * section. */ - if (!section && ecc->mode == NAND_ECC_HW) { + if (!section && ecc->engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) { oobregion->offset = 2; oobregion->length = 2; @@ -1609,12 +1609,13 @@ static int sunxi_nand_hw_ecc_ctrl_init(struct nand_chip *nand, static const u8 strengths[] = { 16, 24, 28, 32, 40, 48, 56, 60, 64 }; struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller); struct mtd_info *mtd = nand_to_mtd(nand); + struct nand_device *nanddev = mtd_to_nanddev(mtd); struct sunxi_nand_hw_ecc *data; int nsectors; int ret; int i; - if (ecc->options & NAND_ECC_MAXIMIZE) { + if (nanddev->ecc.user_conf.flags & NAND_ECC_MAXIMIZE_STRENGTH) { int bytes; ecc->size = 1024; @@ -1720,11 +1721,11 @@ err: static void sunxi_nand_ecc_cleanup(struct nand_ecc_ctrl *ecc) { - switch (ecc->mode) { - case NAND_ECC_HW: + switch (ecc->engine_type) { + case NAND_ECC_ENGINE_TYPE_ON_HOST: sunxi_nand_hw_ecc_ctrl_cleanup(ecc); break; - case NAND_ECC_NONE: + case NAND_ECC_ENGINE_TYPE_NONE: default: break; } @@ -1732,6 +1733,8 @@ static void sunxi_nand_ecc_cleanup(struct nand_ecc_ctrl *ecc) static int sunxi_nand_attach_chip(struct nand_chip *nand) { + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(&nand->base); struct nand_ecc_ctrl *ecc = &nand->ecc; struct device_node *np = nand_get_flash_node(nand); int ret; @@ -1745,21 +1748,21 @@ static int sunxi_nand_attach_chip(struct nand_chip *nand) nand->options |= NAND_SUBPAGE_READ; if (!ecc->size) { - ecc->size = nand->base.eccreq.step_size; - ecc->strength = nand->base.eccreq.strength; + ecc->size = requirements->step_size; + ecc->strength = requirements->strength; } if (!ecc->size || !ecc->strength) return -EINVAL; - switch (ecc->mode) { - case NAND_ECC_HW: + switch (ecc->engine_type) { + case NAND_ECC_ENGINE_TYPE_ON_HOST: ret = sunxi_nand_hw_ecc_ctrl_init(nand, ecc, np); if (ret) return ret; break; - case NAND_ECC_NONE: - case NAND_ECC_SOFT: + case NAND_ECC_ENGINE_TYPE_NONE: + case NAND_ECC_ENGINE_TYPE_SOFT: break; default: return -EINVAL; @@ -1991,7 +1994,7 @@ static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc, * Set the ECC mode to the default value in case nothing is specified * in the DT. */ - nand->ecc.mode = NAND_ECC_HW; + nand->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; nand_set_flash_node(nand, np); mtd = nand_to_mtd(nand); diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c index bdb965ae7a4a..359187b5a4be 100644 --- a/drivers/mtd/nand/raw/tango_nand.c +++ b/drivers/mtd/nand/raw/tango_nand.c @@ -549,8 +549,8 @@ static int tango_attach_chip(struct nand_chip *chip) { struct nand_ecc_ctrl *ecc = &chip->ecc; - ecc->mode = NAND_ECC_HW; - ecc->algo = NAND_ECC_BCH; + ecc->engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; + ecc->algo = NAND_ECC_ALGO_BCH; ecc->bytes = DIV_ROUND_UP(ecc->strength * FIELD_ORDER, BITS_PER_BYTE); ecc->read_page_raw = tango_read_page_raw; diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c index 6b6212ffa01c..fbf67722a049 100644 --- a/drivers/mtd/nand/raw/tegra_nand.c +++ b/drivers/mtd/nand/raw/tegra_nand.c @@ -479,7 +479,7 @@ static void tegra_nand_hw_ecc(struct tegra_nand_controller *ctrl, { struct tegra_nand_chip *nand = to_tegra_chip(chip); - if (chip->ecc.algo == NAND_ECC_BCH && enable) + if (chip->ecc.algo == NAND_ECC_ALGO_BCH && enable) writel_relaxed(nand->bch_config, ctrl->regs + BCH_CONFIG); else writel_relaxed(0, ctrl->regs + BCH_CONFIG); @@ -840,7 +840,10 @@ static int tegra_nand_get_strength(struct nand_chip *chip, const int *strength, int strength_len, int bits_per_step, int oobsize) { - bool maximize = chip->ecc.options & NAND_ECC_MAXIMIZE; + struct nand_device *base = mtd_to_nanddev(nand_to_mtd(chip)); + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(base); + bool maximize = base->ecc.user_conf.flags & NAND_ECC_MAXIMIZE_STRENGTH; int i; /* @@ -855,7 +858,7 @@ static int tegra_nand_get_strength(struct nand_chip *chip, const int *strength, } else { strength_sel = strength[i]; - if (strength_sel < chip->base.eccreq.strength) + if (strength_sel < requirements->strength) continue; } @@ -877,7 +880,7 @@ static int tegra_nand_select_strength(struct nand_chip *chip, int oobsize) int strength_len, bits_per_step; switch (chip->ecc.algo) { - case NAND_ECC_RS: + case NAND_ECC_ALGO_RS: bits_per_step = BITS_PER_STEP_RS; if (chip->options & NAND_IS_BOOT_MEDIUM) { strength = rs_strength_bootable; @@ -887,7 +890,7 @@ static int tegra_nand_select_strength(struct nand_chip *chip, int oobsize) strength_len = ARRAY_SIZE(rs_strength); } break; - case NAND_ECC_BCH: + case NAND_ECC_ALGO_BCH: bits_per_step = BITS_PER_STEP_BCH; if (chip->options & NAND_IS_BOOT_MEDIUM) { strength = bch_strength_bootable; @@ -908,6 +911,8 @@ static int tegra_nand_select_strength(struct nand_chip *chip, int oobsize) static int tegra_nand_attach_chip(struct nand_chip *chip) { struct tegra_nand_controller *ctrl = to_tegra_ctrl(chip->controller); + const struct nand_ecc_props *requirements = + nanddev_get_ecc_requirements(&chip->base); struct tegra_nand_chip *nand = to_tegra_chip(chip); struct mtd_info *mtd = nand_to_mtd(chip); int bits_per_step; @@ -916,12 +921,12 @@ static int tegra_nand_attach_chip(struct nand_chip *chip) if (chip->bbt_options & NAND_BBT_USE_FLASH) chip->bbt_options |= NAND_BBT_NO_OOB; - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.size = 512; chip->ecc.steps = mtd->writesize / chip->ecc.size; - if (chip->base.eccreq.step_size != 512) { + if (requirements->step_size != 512) { dev_err(ctrl->dev, "Unsupported step size %d\n", - chip->base.eccreq.step_size); + requirements->step_size); return -EINVAL; } @@ -935,14 +940,14 @@ static int tegra_nand_attach_chip(struct nand_chip *chip) if (chip->options & NAND_BUSWIDTH_16) nand->config |= CONFIG_BUS_WIDTH_16; - if (chip->ecc.algo == NAND_ECC_UNKNOWN) { + if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) { if (mtd->writesize < 2048) - chip->ecc.algo = NAND_ECC_RS; + chip->ecc.algo = NAND_ECC_ALGO_RS; else - chip->ecc.algo = NAND_ECC_BCH; + chip->ecc.algo = NAND_ECC_ALGO_BCH; } - if (chip->ecc.algo == NAND_ECC_BCH && mtd->writesize < 2048) { + if (chip->ecc.algo == NAND_ECC_ALGO_BCH && mtd->writesize < 2048) { dev_err(ctrl->dev, "BCH supports 2K or 4K page size only\n"); return -EINVAL; } @@ -952,7 +957,7 @@ static int tegra_nand_attach_chip(struct nand_chip *chip) if (ret < 0) { dev_err(ctrl->dev, "No valid strength found, minimum %d\n", - chip->base.eccreq.strength); + requirements->strength); return ret; } @@ -963,7 +968,7 @@ static int tegra_nand_attach_chip(struct nand_chip *chip) CONFIG_SKIP_SPARE_SIZE_4; switch (chip->ecc.algo) { - case NAND_ECC_RS: + case NAND_ECC_ALGO_RS: bits_per_step = BITS_PER_STEP_RS * chip->ecc.strength; mtd_set_ooblayout(mtd, &tegra_nand_oob_rs_ops); nand->config_ecc |= CONFIG_HW_ECC | CONFIG_ECC_SEL | @@ -984,7 +989,7 @@ static int tegra_nand_attach_chip(struct nand_chip *chip) return -EINVAL; } break; - case NAND_ECC_BCH: + case NAND_ECC_ALGO_BCH: bits_per_step = BITS_PER_STEP_BCH * chip->ecc.strength; mtd_set_ooblayout(mtd, &tegra_nand_oob_bch_ops); nand->bch_config = BCH_ENABLE; @@ -1013,7 +1018,7 @@ static int tegra_nand_attach_chip(struct nand_chip *chip) } dev_info(ctrl->dev, "Using %s with strength %d per 512 byte step\n", - chip->ecc.algo == NAND_ECC_BCH ? "BCH" : "RS", + chip->ecc.algo == NAND_ECC_ALGO_BCH ? "BCH" : "RS", chip->ecc.strength); chip->ecc.bytes = DIV_ROUND_UP(bits_per_step, BITS_PER_BYTE); diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c index 843a8683b737..235a2f7b1bad 100644 --- a/drivers/mtd/nand/raw/tmio_nand.c +++ b/drivers/mtd/nand/raw/tmio_nand.c @@ -410,7 +410,7 @@ static int tmio_probe(struct platform_device *dev) nand_chip->legacy.read_buf = tmio_nand_read_buf; /* set eccmode using hardware ECC */ - nand_chip->ecc.mode = NAND_ECC_HW; + nand_chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; nand_chip->ecc.size = 512; nand_chip->ecc.bytes = 6; nand_chip->ecc.strength = 2; diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c index 47d966871445..ef81dce6b5c4 100644 --- a/drivers/mtd/nand/raw/txx9ndfmc.c +++ b/drivers/mtd/nand/raw/txx9ndfmc.c @@ -329,7 +329,7 @@ static int __init txx9ndfmc_probe(struct platform_device *dev) chip->ecc.calculate = txx9ndfmc_calculate_ecc; chip->ecc.correct = txx9ndfmc_correct_data; chip->ecc.hwctl = txx9ndfmc_enable_hwecc; - chip->ecc.mode = NAND_ECC_HW; + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; chip->ecc.strength = 1; chip->legacy.chip_delay = 100; chip->controller = &drvdata->controller; diff --git a/drivers/mtd/nand/raw/vf610_nfc.c b/drivers/mtd/nand/raw/vf610_nfc.c index 7248c5901183..40d70f991d89 100644 --- a/drivers/mtd/nand/raw/vf610_nfc.c +++ b/drivers/mtd/nand/raw/vf610_nfc.c @@ -323,11 +323,6 @@ static inline void vf610_nfc_ecc_mode(struct vf610_nfc *nfc, int ecc_mode) CONFIG_ECC_MODE_SHIFT, ecc_mode); } -static inline void vf610_nfc_transfer_size(struct vf610_nfc *nfc, int size) -{ - vf610_nfc_write(nfc, NFC_SECTOR_SIZE, size); -} - static inline void vf610_nfc_run(struct vf610_nfc *nfc, u32 col, u32 row, u32 cmd1, u32 cmd2, u32 trfr_sz) { @@ -732,7 +727,7 @@ static void vf610_nfc_init_controller(struct vf610_nfc *nfc) else vf610_nfc_clear(nfc, NFC_FLASH_CONFIG, CONFIG_16BIT); - if (nfc->chip.ecc.mode == NAND_ECC_HW) { + if (nfc->chip.ecc.engine_type == NAND_ECC_ENGINE_TYPE_ON_HOST) { /* Set ECC status offset in SRAM */ vf610_nfc_set_field(nfc, NFC_FLASH_CONFIG, CONFIG_ECC_SRAM_ADDR_MASK, @@ -761,7 +756,7 @@ static int vf610_nfc_attach_chip(struct nand_chip *chip) return -ENXIO; } - if (chip->ecc.mode != NAND_ECC_HW) + if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) return 0; if (mtd->writesize != PAGE_2K && mtd->oobsize < 64) { @@ -779,7 +774,7 @@ static int vf610_nfc_attach_chip(struct nand_chip *chip) mtd->oobsize = 64; /* Use default large page ECC layout defined in NAND core */ - mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); + mtd_set_ooblayout(mtd, nand_get_large_page_ooblayout()); if (chip->ecc.strength == 32) { nfc->ecc_mode = ECC_60_BYTE; chip->ecc.bytes = 60; @@ -852,8 +847,10 @@ static int vf610_nfc_probe(struct platform_device *pdev) } of_id = of_match_device(vf610_nfc_dt_ids, &pdev->dev); - if (!of_id) - return -ENODEV; + if (!of_id) { + err = -ENODEV; + goto err_disable_clk; + } nfc->variant = (enum vf610_nfc_variant)of_id->data; diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c index 29255476afdb..f2dbd63a5c1f 100644 --- a/drivers/mtd/nand/raw/xway_nand.c +++ b/drivers/mtd/nand/raw/xway_nand.c @@ -180,8 +180,8 @@ static int xway_nand_probe(struct platform_device *pdev) data->chip.legacy.read_byte = xway_read_byte; data->chip.legacy.chip_delay = 30; - data->chip.ecc.mode = NAND_ECC_SOFT; - data->chip.ecc.algo = NAND_ECC_HAMMING; + data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + data->chip.ecc.algo = NAND_ECC_ALGO_HAMMING; platform_set_drvdata(pdev, data); nand_set_controller_data(&data->chip, data); diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index e2c382ffc5b6..c35221794645 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -419,7 +419,7 @@ static int spinand_check_ecc_status(struct spinand_device *spinand, u8 status) * fixed, so let's return the maximum possible value so that * wear-leveling layers move the data immediately. */ - return nand->eccreq.strength; + return nanddev_get_ecc_conf(nand)->strength; case STATUS_ECC_UNCOR_ERROR: return -EBADMSG; @@ -497,7 +497,7 @@ static int spinand_mtd_read(struct mtd_info *mtd, loff_t from, mutex_lock(&spinand->lock); - nanddev_io_for_each_page(nand, from, ops, &iter) { + nanddev_io_for_each_page(nand, NAND_PAGE_READ, from, ops, &iter) { ret = spinand_select_target(spinand, iter.req.pos.target); if (ret) break; @@ -545,7 +545,7 @@ static int spinand_mtd_write(struct mtd_info *mtd, loff_t to, mutex_lock(&spinand->lock); - nanddev_io_for_each_page(nand, to, ops, &iter) { + nanddev_io_for_each_page(nand, NAND_PAGE_WRITE, to, ops, &iter) { ret = spinand_select_target(spinand, iter.req.pos.target); if (ret) break; @@ -902,7 +902,7 @@ int spinand_match_and_init(struct spinand_device *spinand, continue; nand->memorg = table[i].memorg; - nand->eccreq = table[i].eccreq; + nanddev_set_ecc_requirements(nand, &table[i].eccreq); spinand->eccinfo = table[i].eccinfo; spinand->flags = table[i].flags; spinand->id.len = 1 + table[i].devid.len; @@ -1090,8 +1090,8 @@ static int spinand_init(struct spinand_device *spinand) mtd->oobavail = ret; /* Propagate ECC information to mtd_info */ - mtd->ecc_strength = nand->eccreq.strength; - mtd->ecc_step_size = nand->eccreq.step_size; + mtd->ecc_strength = nanddev_get_ecc_conf(nand)->strength; + mtd->ecc_step_size = nanddev_get_ecc_conf(nand)->step_size; return 0; diff --git a/drivers/mtd/nand/spi/gigadevice.c b/drivers/mtd/nand/spi/gigadevice.c index d219c970042a..33c67403c4aa 100644 --- a/drivers/mtd/nand/spi/gigadevice.c +++ b/drivers/mtd/nand/spi/gigadevice.c @@ -21,7 +21,7 @@ #define GD5FXGQ4UXFXXG_STATUS_ECC_UNCOR_ERROR (7 << 4) static SPINAND_OP_VARIANTS(read_cache_variants, - SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0), @@ -29,7 +29,7 @@ static SPINAND_OP_VARIANTS(read_cache_variants, SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0)); static SPINAND_OP_VARIANTS(read_cache_variants_f, - SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 2, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_X4_OP_3A(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 1, NULL, 0), SPINAND_PAGE_READ_FROM_CACHE_X2_OP_3A(0, 1, NULL, 0), @@ -132,6 +132,35 @@ static const struct mtd_ooblayout_ops gd5fxgq4_variant2_ooblayout = { .free = gd5fxgq4_variant2_ooblayout_free, }; +static int gd5fxgq4xc_ooblayout_256_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + if (section) + return -ERANGE; + + oobregion->offset = 128; + oobregion->length = 128; + + return 0; +} + +static int gd5fxgq4xc_ooblayout_256_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *oobregion) +{ + if (section) + return -ERANGE; + + oobregion->offset = 1; + oobregion->length = 127; + + return 0; +} + +static const struct mtd_ooblayout_ops gd5fxgq4xc_oob_256_ops = { + .ecc = gd5fxgq4xc_ooblayout_256_ecc, + .free = gd5fxgq4xc_ooblayout_256_free, +}; + static int gd5fxgq4uexxg_ecc_get_status(struct spinand_device *spinand, u8 status) { @@ -202,7 +231,7 @@ static const struct spinand_info gigadevice_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_variants, &write_cache_variants, &update_cache_variants), - 0, + SPINAND_HAS_QE_BIT, SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout, gd5fxgq4xa_ecc_get_status)), SPINAND_INFO("GD5F2GQ4xA", @@ -212,7 +241,7 @@ static const struct spinand_info gigadevice_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_variants, &write_cache_variants, &update_cache_variants), - 0, + SPINAND_HAS_QE_BIT, SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout, gd5fxgq4xa_ecc_get_status)), SPINAND_INFO("GD5F4GQ4xA", @@ -222,9 +251,29 @@ static const struct spinand_info gigadevice_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_variants, &write_cache_variants, &update_cache_variants), - 0, + SPINAND_HAS_QE_BIT, SPINAND_ECCINFO(&gd5fxgq4xa_ooblayout, gd5fxgq4xa_ecc_get_status)), + SPINAND_INFO("GD5F4GQ4RC", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE, 0xa4, 0x68), + NAND_MEMORG(1, 4096, 256, 64, 2048, 40, 1, 1, 1), + NAND_ECCREQ(8, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants_f, + &write_cache_variants, + &update_cache_variants), + SPINAND_HAS_QE_BIT, + SPINAND_ECCINFO(&gd5fxgq4xc_oob_256_ops, + gd5fxgq4ufxxg_ecc_get_status)), + SPINAND_INFO("GD5F4GQ4UC", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE, 0xb4, 0x68), + NAND_MEMORG(1, 4096, 256, 64, 2048, 40, 1, 1, 1), + NAND_ECCREQ(8, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants_f, + &write_cache_variants, + &update_cache_variants), + SPINAND_HAS_QE_BIT, + SPINAND_ECCINFO(&gd5fxgq4xc_oob_256_ops, + gd5fxgq4ufxxg_ecc_get_status)), SPINAND_INFO("GD5F1GQ4UExxG", SPINAND_ID(SPINAND_READID_METHOD_OPCODE_ADDR, 0xd1), NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1), @@ -232,7 +281,7 @@ static const struct spinand_info gigadevice_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_variants, &write_cache_variants, &update_cache_variants), - 0, + SPINAND_HAS_QE_BIT, SPINAND_ECCINFO(&gd5fxgq4_variant2_ooblayout, gd5fxgq4uexxg_ecc_get_status)), SPINAND_INFO("GD5F1GQ4UFxxG", @@ -242,7 +291,7 @@ static const struct spinand_info gigadevice_spinand_table[] = { SPINAND_INFO_OP_VARIANTS(&read_cache_variants_f, &write_cache_variants, &update_cache_variants), - 0, + SPINAND_HAS_QE_BIT, SPINAND_ECCINFO(&gd5fxgq4_variant2_ooblayout, gd5fxgq4ufxxg_ecc_get_status)), }; diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c index 0f900f3aa21a..8e801e4c3a00 100644 --- a/drivers/mtd/nand/spi/macronix.c +++ b/drivers/mtd/nand/spi/macronix.c @@ -84,10 +84,11 @@ static int mx35lf1ge4ab_ecc_get_status(struct spinand_device *spinand, * data around if it's not necessary. */ if (mx35lf1ge4ab_get_eccsr(spinand, &eccsr)) - return nand->eccreq.strength; + return nanddev_get_ecc_conf(nand)->strength; - if (WARN_ON(eccsr > nand->eccreq.strength || !eccsr)) - return nand->eccreq.strength; + if (WARN_ON(eccsr > nanddev_get_ecc_conf(nand)->strength || + !eccsr)) + return nanddev_get_ecc_conf(nand)->strength; return eccsr; @@ -118,6 +119,26 @@ static const struct spinand_info macronix_spinand_table[] = { &update_cache_variants), SPINAND_HAS_QE_BIT, SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, NULL)), + SPINAND_INFO("MX31LF1GE4BC", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x1e), + NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1), + NAND_ECCREQ(8, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + 0 /*SPINAND_HAS_QE_BIT*/, + SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, + mx35lf1ge4ab_ecc_get_status)), + SPINAND_INFO("MX31UF1GE4BC", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x9e), + NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1), + NAND_ECCREQ(8, 512), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + 0 /*SPINAND_HAS_QE_BIT*/, + SPINAND_ECCINFO(&mx35lfxge4ab_ooblayout, + mx35lf1ge4ab_ecc_get_status)), }; static const struct spinand_manufacturer_ops macronix_spinand_manuf_ops = { diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c index bc801d83343e..21fde2875674 100644 --- a/drivers/mtd/nand/spi/toshiba.c +++ b/drivers/mtd/nand/spi/toshiba.c @@ -90,12 +90,12 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand, * data around if it's not necessary. */ if (spi_mem_exec_op(spinand->spimem, &op)) - return nand->eccreq.strength; + return nanddev_get_ecc_conf(nand)->strength; mbf >>= 4; - if (WARN_ON(mbf > nand->eccreq.strength || !mbf)) - return nand->eccreq.strength; + if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf)) + return nanddev_get_ecc_conf(nand)->strength; return mbf; diff --git a/drivers/mtd/parsers/Kconfig b/drivers/mtd/parsers/Kconfig index f98363c9b363..e72354322f62 100644 --- a/drivers/mtd/parsers/Kconfig +++ b/drivers/mtd/parsers/Kconfig @@ -12,7 +12,7 @@ config MTD_BCM47XX_PARTS boards. config MTD_BCM63XX_PARTS - tristate "BCM63XX CFE partitioning parser" + bool "BCM63XX CFE partitioning parser" depends on BCM63XX || BMIPS_GENERIC || COMPILE_TEST select CRC32 select MTD_PARSER_IMAGETAG diff --git a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c index c72aa1ab71ad..555fe55d14ae 100644 --- a/drivers/mtd/spi-nor/controllers/intel-spi-pci.c +++ b/drivers/mtd/spi-nor/controllers/intel-spi-pci.c @@ -73,6 +73,7 @@ static const struct pci_device_id intel_spi_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x43a4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0x4b24), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0x4da4), (unsigned long)&bxt_info }, + { PCI_VDEVICE(INTEL, 0x7aa4), (unsigned long)&cnl_info }, { PCI_VDEVICE(INTEL, 0xa0a4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0xa1a4), (unsigned long)&bxt_info }, { PCI_VDEVICE(INTEL, 0xa224), (unsigned long)&bxt_info }, diff --git a/drivers/mtd/spi-nor/macronix.c b/drivers/mtd/spi-nor/macronix.c index f97f3d127575..9203abaac229 100644 --- a/drivers/mtd/spi-nor/macronix.c +++ b/drivers/mtd/spi-nor/macronix.c @@ -50,7 +50,7 @@ static const struct flash_info macronix_parts[] = { { "mx25u4035", INFO(0xc22533, 0, 64 * 1024, 8, SECT_4K) }, { "mx25u8035", INFO(0xc22534, 0, 64 * 1024, 16, SECT_4K) }, { "mx25u6435f", INFO(0xc22537, 0, 64 * 1024, 128, SECT_4K) }, - { "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, 0) }, + { "mx25l12805d", INFO(0xc22018, 0, 64 * 1024, 256, SECT_4K) }, { "mx25l12855e", INFO(0xc22618, 0, 64 * 1024, 256, 0) }, { "mx25r1635f", INFO(0xc22815, 0, 64 * 1024, 32, SECT_4K | SPI_NOR_DUAL_READ | diff --git a/drivers/mtd/spi-nor/winbond.c b/drivers/mtd/spi-nor/winbond.c index 6dcde15fb1aa..e5dfa786f190 100644 --- a/drivers/mtd/spi-nor/winbond.c +++ b/drivers/mtd/spi-nor/winbond.c @@ -63,6 +63,15 @@ static const struct flash_info winbond_parts[] = { { "w25q32jwm", INFO(0xef8016, 0, 64 * 1024, 64, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) }, + { "w25q64jwm", INFO(0xef8017, 0, 64 * 1024, 128, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | + SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) }, + { "w25q128jwm", INFO(0xef8018, 0, 64 * 1024, 256, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | + SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) }, + { "w25q256jwm", INFO(0xef8019, 0, 64 * 1024, 512, + SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | + SPI_NOR_HAS_LOCK | SPI_NOR_HAS_TB) }, { "w25x64", INFO(0xef3017, 0, 64 * 1024, 128, SECT_4K) }, { "w25q64", INFO(0xef4017, 0, 64 * 1024, 128, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index 1eef66ee849e..cac8f085b16d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -130,14 +130,6 @@ static int mlx5i_flash_device(struct net_device *netdev, return mlx5e_ethtool_flash_device(priv, flash); } -enum mlx5_ptys_width { - MLX5_PTYS_WIDTH_1X = 1 << 0, - MLX5_PTYS_WIDTH_2X = 1 << 1, - MLX5_PTYS_WIDTH_4X = 1 << 2, - MLX5_PTYS_WIDTH_8X = 1 << 3, - MLX5_PTYS_WIDTH_12X = 1 << 4, -}; - static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width) { switch (width) { @@ -174,24 +166,6 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) } } -static int mlx5i_get_port_settings(struct net_device *netdev, - u16 *ib_link_width_oper, u16 *ib_proto_oper) -{ - struct mlx5e_priv *priv = mlx5i_epriv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; - u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; - int ret; - - ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1); - if (ret) - return ret; - - *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); - *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); - - return 0; -} - static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) { int rate, width; @@ -209,11 +183,14 @@ static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) static int mlx5i_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *link_ksettings) { + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; u16 ib_link_width_oper; u16 ib_proto_oper; int speed, ret; - ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper); + ret = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, &ib_proto_oper, + 1); if (ret) return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index e4186e84b3ff..4bb219565c58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -154,24 +154,8 @@ int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) sizeof(out), MLX5_REG_MLCR, 0, 1); } -int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, - u8 *link_width_oper, u8 local_port) -{ - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; - int err; - - err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, local_port); - if (err) - return err; - - *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); - - return 0; -} -EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); - -int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, - u8 *proto_oper, u8 local_port) +int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, + u16 *proto_oper, u8 local_port) { u32 out[MLX5_ST_SZ_DW(ptys_reg)]; int err; @@ -181,11 +165,12 @@ int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, if (err) return err; + *link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); return 0; } -EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); +EXPORT_SYMBOL(mlx5_query_ib_port_oper); /* This function should be used after setting a port register only */ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index d3136556a1e9..da864d12916b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -504,7 +504,8 @@ static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn, dev->max_mw = 0; dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8); dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE; - dev->max_pkey = QED_RDMA_MAX_P_KEY; + if (QED_IS_ROCE_PERSONALITY(p_hwfn)) + dev->max_pkey = QED_RDMA_MAX_P_KEY; dev->max_srq = p_hwfn->p_rdma_info->num_srqs; dev->max_srq_wr = QED_RDMA_MAX_SRQ_WQE_ELEM; @@ -1519,7 +1520,7 @@ qed_rdma_register_tid(void *rdma_cxt, params->pbl_two_level); SET_FIELD(flags, RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED, - params->zbva); + false); SET_FIELD(flags, RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, params->phy_mr); @@ -1581,15 +1582,7 @@ qed_rdma_register_tid(void *rdma_cxt, p_ramrod->pd = cpu_to_le16(params->pd); p_ramrod->length_hi = (u8)(params->length >> 32); p_ramrod->length_lo = DMA_LO_LE(params->length); - if (params->zbva) { - /* Lower 32 bits of the registered MR address. - * In case of zero based MR, will hold FBO - */ - p_ramrod->va.hi = 0; - p_ramrod->va.lo = cpu_to_le32(params->fbo); - } else { - DMA_REGPAIR_LE(p_ramrod->va, params->vaddr); - } + DMA_REGPAIR_LE(p_ramrod->va, params->vaddr); DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr); /* DIF */ diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index b9aa6384563b..bedbb85a179a 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1026,7 +1026,9 @@ int qede_change_mtu(struct net_device *ndev, int new_mtu) args.u.mtu = new_mtu; args.func = &qede_update_mtu; qede_reload(edev, &args, false); - +#if IS_ENABLED(CONFIG_QED_RDMA) + qede_rdma_event_change_mtu(edev); +#endif edev->ops->common->update_mtu(edev->cdev, new_mtu); return 0; diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 769ec2f4d0b7..2f6598086d9b 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -234,6 +234,15 @@ static void qede_rdma_changeaddr(struct qede_dev *edev) qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR); } +static void qede_rdma_change_mtu(struct qede_dev *edev) +{ + if (qede_rdma_supported(edev)) { + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, + QEDE_CHANGE_MTU); + } +} + static struct qede_rdma_event_work * qede_rdma_get_free_event_node(struct qede_dev *edev) { @@ -287,6 +296,9 @@ static void qede_rdma_handle_event(struct work_struct *work) case QEDE_CHANGE_ADDR: qede_rdma_changeaddr(edev); break; + case QEDE_CHANGE_MTU: + qede_rdma_change_mtu(edev); + break; default: DP_NOTICE(edev, "Invalid rdma event %d", event); } @@ -338,3 +350,8 @@ void qede_rdma_event_changeaddr(struct qede_dev *edev) { qede_rdma_add_event(edev, QEDE_CHANGE_ADDR); } + +void qede_rdma_event_change_mtu(struct qede_dev *edev) +{ + qede_rdma_add_event(edev, QEDE_CHANGE_MTU); +} diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index b668224f906d..7edc8dc6bbab 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -346,13 +346,13 @@ config RCAR_THERMAL thermal framework. config RCAR_GEN3_THERMAL - tristate "Renesas R-Car Gen3 thermal driver" + tristate "Renesas R-Car Gen3 and RZ/G2 thermal driver" depends on ARCH_RENESAS || COMPILE_TEST depends on HAS_IOMEM depends on OF help - Enable this to plug the R-Car Gen3 thermal sensor driver into the Linux - thermal framework. + Enable this to plug the R-Car Gen3 or RZ/G2 thermal sensor driver into + the Linux thermal framework. config KIRKWOOD_THERMAL tristate "Temperature sensor on Marvell Kirkwood SoCs" diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index 6cf23a54e853..cc2959f22f01 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -182,7 +182,6 @@ static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev, /** * cpufreq_get_requested_power() - get the current power * @cdev: &thermal_cooling_device pointer - * @tz: a valid thermal zone device pointer * @power: pointer in which to store the resulting power * * Calculate the current power consumption of the cpus in milliwatts @@ -203,7 +202,6 @@ static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev, * Return: 0 on success, -E* if getting the static power failed. */ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 *power) { unsigned long freq; @@ -253,7 +251,6 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, /** * cpufreq_state2power() - convert a cpu cdev state to power consumed * @cdev: &thermal_cooling_device pointer - * @tz: a valid thermal zone device pointer * @state: cooling device state to be converted * @power: pointer in which to store the resulting power * @@ -266,7 +263,6 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, * when calculating the static power. */ static int cpufreq_state2power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, unsigned long state, u32 *power) { unsigned int freq, num_cpus, idx; @@ -288,7 +284,6 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev, /** * cpufreq_power2state() - convert power to a cooling device state * @cdev: &thermal_cooling_device pointer - * @tz: a valid thermal zone device pointer * @power: power in milliwatts to be converted * @state: pointer in which to store the resulting state * @@ -306,8 +301,7 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev, * device. */ static int cpufreq_power2state(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 power, - unsigned long *state) + u32 power, unsigned long *state) { unsigned int target_freq; u32 last_load, normalised_power; diff --git a/drivers/thermal/cpuidle_cooling.c b/drivers/thermal/cpuidle_cooling.c index 78e3e8238116..7ecab4b16b29 100644 --- a/drivers/thermal/cpuidle_cooling.c +++ b/drivers/thermal/cpuidle_cooling.c @@ -30,7 +30,7 @@ static DEFINE_IDA(cpuidle_ida); /** * cpuidle_cooling_runtime - Running time computation - * @idle_duration_us: the idle cooling device + * @idle_duration_us: CPU idle time to inject in microseconds * @state: a percentile based number * * The running duration is computed from the idle injection duration diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index a12d29096229..dfab49a67252 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -229,7 +229,6 @@ static inline unsigned long get_total_power(struct devfreq_cooling_device *dfc, static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 *power) { struct devfreq_cooling_device *dfc = cdev->devdata; @@ -289,7 +288,6 @@ fail: } static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, unsigned long state, u32 *power) { @@ -308,7 +306,6 @@ static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, } static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 power, unsigned long *state) { struct devfreq_cooling_device *dfc = cdev->devdata; diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 5cb518d8f156..ab0be26f0816 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -96,7 +96,7 @@ static u32 estimate_sustainable_power(struct thermal_zone_device *tz) if (instance->trip != params->trip_max_desired_temperature) continue; - if (power_actor_get_min_power(cdev, tz, &min_power)) + if (power_actor_get_min_power(cdev, &min_power)) continue; sustainable_power += min_power; @@ -388,7 +388,7 @@ static int allocate_power(struct thermal_zone_device *tz, if (!cdev_is_power_actor(cdev)) continue; - if (cdev->ops->get_requested_power(cdev, tz, &req_power[i])) + if (cdev->ops->get_requested_power(cdev, &req_power[i])) continue; if (!total_weight) @@ -398,7 +398,7 @@ static int allocate_power(struct thermal_zone_device *tz, weighted_req_power[i] = frac_to_int(weight * req_power[i]); - if (power_actor_get_max_power(cdev, tz, &max_power[i])) + if (power_actor_get_max_power(cdev, &max_power[i])) continue; total_req_power += req_power[i]; diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c index f5124f14cf81..a1e4f9bb4cb0 100644 --- a/drivers/thermal/imx8mm_thermal.c +++ b/drivers/thermal/imx8mm_thermal.c @@ -146,13 +146,9 @@ static int imx8mm_tmu_probe(struct platform_device *pdev) return PTR_ERR(tmu->base); tmu->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(tmu->clk)) { - ret = PTR_ERR(tmu->clk); - if (ret != -EPROBE_DEFER) - dev_err(&pdev->dev, - "failed to get tmu clock: %d\n", ret); - return ret; - } + if (IS_ERR(tmu->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(tmu->clk), + "failed to get tmu clock\n"); ret = clk_prepare_enable(tmu->clk); if (ret) { diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index 3f74ab4c1ab9..2c7473d86a59 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -716,14 +716,9 @@ static int imx_thermal_probe(struct platform_device *pdev) if (of_find_property(pdev->dev.of_node, "nvmem-cells", NULL)) { ret = imx_init_from_nvmem_cells(pdev); - if (ret) { - if (ret == -EPROBE_DEFER) - return ret; - - dev_err(&pdev->dev, "failed to init from nvmem: %d\n", - ret); - return ret; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, + "failed to init from nvmem\n"); } else { ret = imx_init_from_tempmon_data(pdev); if (ret) { @@ -746,14 +741,9 @@ static int imx_thermal_probe(struct platform_device *pdev) data->socdata->power_down_mask); ret = imx_thermal_register_legacy_cooling(data); - if (ret) { - if (ret == -EPROBE_DEFER) - return ret; - - dev_err(&pdev->dev, - "failed to register cpufreq cooling device: %d\n", ret); - return ret; - } + if (ret) + return dev_err_probe(&pdev->dev, ret, + "failed to register cpufreq cooling device\n"); data->thermal_clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(data->thermal_clk)) { diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 4f5859d4c780..0966551cbaaa 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -14,6 +14,7 @@ #define INT3400_THERMAL_TABLE_CHANGED 0x83 #define INT3400_ODVP_CHANGED 0x88 +#define INT3400_KEEP_ALIVE 0xA0 enum int3400_thermal_uuid { INT3400_THERMAL_PASSIVE_1, @@ -83,8 +84,33 @@ static struct bin_attribute *data_attributes[] = { NULL, }; +static ssize_t imok_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct int3400_thermal_priv *priv = dev_get_drvdata(dev); + acpi_status status; + int input, ret; + + ret = kstrtouint(buf, 10, &input); + if (ret) + return ret; + status = acpi_execute_simple_method(priv->adev->handle, "IMOK", input); + if (ACPI_FAILURE(status)) + return -EIO; + + return count; +} + +static DEVICE_ATTR_WO(imok); + +static struct attribute *imok_attr[] = { + &dev_attr_imok.attr, + NULL +}; + static const struct attribute_group data_attribute_group = { .bin_attrs = data_attributes, + .attrs = imok_attr, }; static ssize_t available_uuids_show(struct device *dev, @@ -349,30 +375,33 @@ static void int3400_notify(acpi_handle handle, { struct int3400_thermal_priv *priv = data; char *thermal_prop[5]; + int therm_event; if (!priv) return; switch (event) { case INT3400_THERMAL_TABLE_CHANGED: - thermal_prop[0] = kasprintf(GFP_KERNEL, "NAME=%s", - priv->thermal->type); - thermal_prop[1] = kasprintf(GFP_KERNEL, "TEMP=%d", - priv->thermal->temperature); - thermal_prop[2] = kasprintf(GFP_KERNEL, "TRIP="); - thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", - THERMAL_TABLE_CHANGED); - thermal_prop[4] = NULL; - kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, - thermal_prop); + therm_event = THERMAL_TABLE_CHANGED; + break; + case INT3400_KEEP_ALIVE: + therm_event = THERMAL_EVENT_KEEP_ALIVE; break; case INT3400_ODVP_CHANGED: evaluate_odvp(priv); + therm_event = THERMAL_DEVICE_POWER_CAPABILITY_CHANGED; break; default: /* Ignore unknown notification codes sent to INT3400 device */ - break; + return; } + + thermal_prop[0] = kasprintf(GFP_KERNEL, "NAME=%s", priv->thermal->type); + thermal_prop[1] = kasprintf(GFP_KERNEL, "TEMP=%d", priv->thermal->temperature); + thermal_prop[2] = kasprintf(GFP_KERNEL, "TRIP="); + thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", therm_event); + thermal_prop[4] = NULL; + kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, thermal_prop); } static int int3400_thermal_get_temp(struct thermal_zone_device *thermal, diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 787710bb88fe..5c2a13bf249c 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -546,11 +546,11 @@ static int rcar_thermal_probe(struct platform_device *pdev) if (ret < 0) goto error_unregister; - if (chip->use_of_thermal) + if (chip->use_of_thermal) { priv->zone = devm_thermal_zone_of_sensor_register( dev, i, priv, &rcar_thermal_zone_of_ops); - else { + } else { priv->zone = thermal_zone_device_register( "rcar_thermal", 1, 0, priv, diff --git a/drivers/thermal/st/Kconfig b/drivers/thermal/st/Kconfig index 3c3b695cc3e9..58ece381956b 100644 --- a/drivers/thermal/st/Kconfig +++ b/drivers/thermal/st/Kconfig @@ -23,5 +23,5 @@ config STM32_THERMAL help Support for thermal framework on STMicroelectronics STM32 series of SoCs. This thermal driver allows to access to general thermal framework - functionalities and to acces to SoC sensor functionalities. This + functionalities and to access to SoC sensor functionalities. This configuration is fully dependent of MACH_STM32MP157. diff --git a/drivers/thermal/st/stm_thermal.c b/drivers/thermal/st/stm_thermal.c index 331e2b768df5..5fd3fb8912a6 100644 --- a/drivers/thermal/st/stm_thermal.c +++ b/drivers/thermal/st/stm_thermal.c @@ -446,14 +446,9 @@ thermal_unprepare: #ifdef CONFIG_PM_SLEEP static int stm_thermal_suspend(struct device *dev) { - int ret; struct stm_thermal_sensor *sensor = dev_get_drvdata(dev); - ret = stm_thermal_sensor_off(sensor); - if (ret) - return ret; - - return 0; + return stm_thermal_sensor_off(sensor); } static int stm_thermal_resume(struct device *dev) diff --git a/drivers/thermal/sun8i_thermal.c b/drivers/thermal/sun8i_thermal.c index 74d73be16496..f8b13071a6f4 100644 --- a/drivers/thermal/sun8i_thermal.c +++ b/drivers/thermal/sun8i_thermal.c @@ -244,7 +244,7 @@ static int sun50i_h6_ths_calibrate(struct ths_device *tmdev, ft_temp = (caldata[0] & FT_TEMP_MASK) * 100; for (i = 0; i < tmdev->chip->sensor_num; i++) { - int sensor_reg = caldata[i + 1]; + int sensor_reg = caldata[i + 1] & TEMP_CALIB_MASK; int cdata, offset; int sensor_temp = tmdev->chip->calc_temp(tmdev, i, sensor_reg); @@ -590,6 +590,19 @@ static const struct ths_thermal_chip sun50i_a64_ths = { .calc_temp = sun8i_ths_calc_temp, }; +static const struct ths_thermal_chip sun50i_a100_ths = { + .sensor_num = 3, + .has_bus_clk_reset = true, + .ft_deviation = 8000, + .offset = 187744, + .scale = 672, + .temp_data_base = SUN50I_H6_THS_TEMP_DATA, + .calibrate = sun50i_h6_ths_calibrate, + .init = sun50i_h6_thermal_init, + .irq_ack = sun50i_h6_irq_ack, + .calc_temp = sun8i_ths_calc_temp, +}; + static const struct ths_thermal_chip sun50i_h5_ths = { .sensor_num = 2, .has_mod_clk = true, @@ -619,6 +632,7 @@ static const struct of_device_id of_ths_match[] = { { .compatible = "allwinner,sun8i-h3-ths", .data = &sun8i_h3_ths }, { .compatible = "allwinner,sun8i-r40-ths", .data = &sun8i_r40_ths }, { .compatible = "allwinner,sun50i-a64-ths", .data = &sun50i_a64_ths }, + { .compatible = "allwinner,sun50i-a100-ths", .data = &sun50i_a100_ths }, { .compatible = "allwinner,sun50i-h5-ths", .data = &sun50i_h5_ths }, { .compatible = "allwinner,sun50i-h6-ths", .data = &sun50i_h6_ths }, { /* sentinel */ }, diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index a6616e530a84..c6d74bc1c90b 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -603,7 +603,6 @@ static void thermal_zone_device_check(struct work_struct *work) /** * power_actor_get_max_power() - get the maximum power that a cdev can consume * @cdev: pointer to &thermal_cooling_device - * @tz: a valid thermal zone device pointer * @max_power: pointer in which to store the maximum power * * Calculate the maximum power consumption in milliwats that the @@ -613,18 +612,17 @@ static void thermal_zone_device_check(struct work_struct *work) * power_actor API or -E* on other error. */ int power_actor_get_max_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 *max_power) + u32 *max_power) { if (!cdev_is_power_actor(cdev)) return -EINVAL; - return cdev->ops->state2power(cdev, tz, 0, max_power); + return cdev->ops->state2power(cdev, 0, max_power); } /** * power_actor_get_min_power() - get the mainimum power that a cdev can consume * @cdev: pointer to &thermal_cooling_device - * @tz: a valid thermal zone device pointer * @min_power: pointer in which to store the minimum power * * Calculate the minimum power consumption in milliwatts that the @@ -634,7 +632,7 @@ int power_actor_get_max_power(struct thermal_cooling_device *cdev, * power_actor API or -E* on other error. */ int power_actor_get_min_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 *min_power) + u32 *min_power) { unsigned long max_state; int ret; @@ -646,7 +644,7 @@ int power_actor_get_min_power(struct thermal_cooling_device *cdev, if (ret) return ret; - return cdev->ops->state2power(cdev, tz, max_state, min_power); + return cdev->ops->state2power(cdev, max_state, min_power); } /** @@ -670,7 +668,7 @@ int power_actor_set_power(struct thermal_cooling_device *cdev, if (!cdev_is_power_actor(cdev)) return -EINVAL; - ret = cdev->ops->power2state(cdev, instance->tz, power, &state); + ret = cdev->ops->power2state(cdev, power, &state); if (ret) return ret; @@ -1652,7 +1650,6 @@ static int __init thermal_init(void) if (result) goto error; - mutex_init(&poweroff_lock); result = thermal_register_governors(); if (result) goto error; diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index e00fc5585ea8..764c2de31771 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -66,9 +66,9 @@ static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) } int power_actor_get_max_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 *max_power); + u32 *max_power); int power_actor_get_min_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 *min_power); + u32 *min_power); int power_actor_set_power(struct thermal_cooling_device *cdev, struct thermal_instance *ti, u32 power); /** diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index da2891fcac89..1234dbe95895 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -78,7 +78,7 @@ int thermal_genl_sampling_temp(int id, int temp) hdr = genlmsg_put(skb, 0, 0, &thermal_gnl_family, 0, THERMAL_GENL_SAMPLING_TEMP); if (!hdr) - return -EMSGSIZE; + goto out_free; if (nla_put_u32(skb, THERMAL_GENL_ATTR_TZ_ID, id)) goto out_cancel; @@ -93,6 +93,7 @@ int thermal_genl_sampling_temp(int id, int temp) return 0; out_cancel: genlmsg_cancel(skb, hdr); +out_free: nlmsg_free(skb); return -EMSGSIZE; diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index 8c231219e15d..a6f371fc9af2 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -448,7 +448,7 @@ static umode_t thermal_zone_passive_is_visible(struct kobject *kobj, struct attribute *attr, int attrno) { - struct device *dev = container_of(kobj, struct device, kobj); + struct device *dev = kobj_to_dev(kobj); struct thermal_zone_device *tz; enum thermal_trip_type trip_type; int count, passive = 0; diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c index ab19ceff6e2a..5e596168ba73 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -25,10 +25,20 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include "ti-bandgap.h" static int ti_bandgap_force_single_read(struct ti_bandgap *bgp, int id); +#ifdef CONFIG_PM_SLEEP +static int bandgap_omap_cpu_notifier(struct notifier_block *nb, + unsigned long cmd, void *v); +#endif /*** Helper functions to access registers and their bitfields ***/ @@ -1008,6 +1018,11 @@ int ti_bandgap_probe(struct platform_device *pdev) } } +#ifdef CONFIG_PM_SLEEP + bgp->nb.notifier_call = bandgap_omap_cpu_notifier; + cpu_pm_register_notifier(&bgp->nb); +#endif + return 0; remove_last_cooling: @@ -1041,7 +1056,9 @@ int ti_bandgap_remove(struct platform_device *pdev) struct ti_bandgap *bgp = platform_get_drvdata(pdev); int i; - /* First thing is to remove sensor interfaces */ + cpu_pm_unregister_notifier(&bgp->nb); + + /* Remove sensor interfaces */ for (i = 0; i < bgp->conf->sensor_count; i++) { if (bgp->conf->sensors[i].unregister_cooling) bgp->conf->sensors[i].unregister_cooling(bgp, i); @@ -1150,9 +1167,43 @@ static int ti_bandgap_suspend(struct device *dev) if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) clk_disable_unprepare(bgp->fclock); + bgp->is_suspended = true; + return err; } +static int bandgap_omap_cpu_notifier(struct notifier_block *nb, + unsigned long cmd, void *v) +{ + struct ti_bandgap *bgp; + + bgp = container_of(nb, struct ti_bandgap, nb); + + spin_lock(&bgp->lock); + switch (cmd) { + case CPU_CLUSTER_PM_ENTER: + if (bgp->is_suspended) + break; + ti_bandgap_save_ctxt(bgp); + ti_bandgap_power(bgp, false); + if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) + clk_disable(bgp->fclock); + break; + case CPU_CLUSTER_PM_ENTER_FAILED: + case CPU_CLUSTER_PM_EXIT: + if (bgp->is_suspended) + break; + if (TI_BANDGAP_HAS(bgp, CLK_CTRL)) + clk_enable(bgp->fclock); + ti_bandgap_power(bgp, true); + ti_bandgap_restore_ctxt(bgp); + break; + } + spin_unlock(&bgp->lock); + + return NOTIFY_OK; +} + static int ti_bandgap_resume(struct device *dev) { struct ti_bandgap *bgp = dev_get_drvdata(dev); @@ -1161,6 +1212,7 @@ static int ti_bandgap_resume(struct device *dev) clk_prepare_enable(bgp->fclock); ti_bandgap_power(bgp, true); + bgp->is_suspended = false; return ti_bandgap_restore_ctxt(bgp); } diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.h b/drivers/thermal/ti-soc-thermal/ti-bandgap.h index fce4657e9486..ed0ea4b17b25 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.h +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.h @@ -12,6 +12,10 @@ #include #include #include +#include +#include +#include +#include struct gpio_desc; @@ -203,6 +207,8 @@ struct ti_bandgap { int irq; struct gpio_desc *tshut_gpiod; u32 clk_rate; + struct notifier_block nb; + unsigned int is_suspended:1; }; /** diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h index 91a8612b8bf9..fb88e23a99d3 100644 --- a/include/linux/idle_inject.h +++ b/include/linux/idle_inject.h @@ -28,6 +28,6 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev, unsigned int *idle_duration_us); void idle_inject_set_latency(struct idle_inject_device *ii_dev, - unsigned int latency_ns); + unsigned int latency_us); #endif /* __IDLE_INJECT_H__ */ diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index de1ffb4804d6..651591a2965d 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -420,7 +420,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_1a[0x2]; u8 ipsec_encrypt[0x1]; u8 ipsec_decrypt[0x1]; - u8 reserved_at_1e[0x2]; + u8 sw_owner_v2[0x1]; + u8 reserved_at_1f[0x1]; u8 termination_table_raw_traffic[0x1]; u8 reserved_at_21[0x1]; @@ -1430,7 +1431,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_bf_reg_size[0x5]; - u8 reserved_at_270[0x8]; + u8 reserved_at_270[0x6]; + u8 lag_dct[0x2]; u8 lag_tx_port_affinity[0x1]; u8 reserved_at_279[0x2]; u8 lag_master[0x1]; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 2d45a6af52a4..23edd2db4803 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -125,6 +125,14 @@ enum mlx5e_connector_type { MLX5E_CONNECTOR_TYPE_NUMBER, }; +enum mlx5_ptys_width { + MLX5_PTYS_WIDTH_1X = 1 << 0, + MLX5_PTYS_WIDTH_2X = 1 << 1, + MLX5_PTYS_WIDTH_4X = 1 << 2, + MLX5_PTYS_WIDTH_8X = 1 << 3, + MLX5_PTYS_WIDTH_12X = 1 << 4, +}; + #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) #define MLX5_GET_ETH_PROTO(reg, out, ext, field) \ (ext ? MLX5_GET(reg, out, ext_##field) : \ @@ -133,10 +141,9 @@ enum mlx5e_connector_type { int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); -int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, - u8 *link_width_oper, u8 local_port); -int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, - u8 *proto_oper, u8 local_port); + +int mlx5_query_ib_port_oper(struct mlx5_core_dev *dev, u16 *link_width_oper, + u16 *proto_oper, u8 local_port); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); diff --git a/include/linux/mtd/hyperbus.h b/include/linux/mtd/hyperbus.h index 2129f7d3b6eb..0ce612428aea 100644 --- a/include/linux/mtd/hyperbus.h +++ b/include/linux/mtd/hyperbus.h @@ -8,6 +8,17 @@ #include +/* HyperBus command bits */ +#define HYPERBUS_RW 0x80 /* R/W# */ +#define HYPERBUS_RW_WRITE 0 +#define HYPERBUS_RW_READ 0x80 +#define HYPERBUS_AS 0x40 /* Address Space */ +#define HYPERBUS_AS_MEM 0 +#define HYPERBUS_AS_REG 0x40 +#define HYPERBUS_BT 0x20 /* Burst Type */ +#define HYPERBUS_BT_WRAPPED 0 +#define HYPERBUS_BT_LINEAR 0x20 + enum hyperbus_memtype { HYPERFLASH, HYPERRAM, @@ -20,6 +31,7 @@ enum hyperbus_memtype { * @mtd: pointer to MTD struct * @ctlr: pointer to HyperBus controller struct * @memtype: type of memory device: HyperFlash or HyperRAM + * @priv: pointer to controller specific per device private data */ struct hyperbus_device { @@ -28,6 +40,7 @@ struct hyperbus_device { struct mtd_info *mtd; struct hyperbus_ctlr *ctlr; enum hyperbus_memtype memtype; + void *priv; }; /** diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index af99041ceaa9..697ea2474a7c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -82,8 +82,19 @@ struct nand_pos { unsigned int page; }; +/** + * enum nand_page_io_req_type - Direction of an I/O request + * @NAND_PAGE_READ: from the chip, to the controller + * @NAND_PAGE_WRITE: from the controller, to the chip + */ +enum nand_page_io_req_type { + NAND_PAGE_READ = 0, + NAND_PAGE_WRITE, +}; + /** * struct nand_page_io_req - NAND I/O request object + * @type: the type of page I/O: read or write * @pos: the position this I/O request is targeting * @dataoffs: the offset within the page * @datalen: number of data bytes to read from/write to this page @@ -99,6 +110,7 @@ struct nand_pos { * specific commands/operations. */ struct nand_page_io_req { + enum nand_page_io_req_type type; struct nand_pos pos; unsigned int dataoffs; unsigned int datalen; @@ -115,18 +127,77 @@ struct nand_page_io_req { int mode; }; +const struct mtd_ooblayout_ops *nand_get_small_page_ooblayout(void); +const struct mtd_ooblayout_ops *nand_get_large_page_ooblayout(void); +const struct mtd_ooblayout_ops *nand_get_large_page_hamming_ooblayout(void); + +/** + * enum nand_ecc_engine_type - NAND ECC engine type + * @NAND_ECC_ENGINE_TYPE_INVALID: Invalid value + * @NAND_ECC_ENGINE_TYPE_NONE: No ECC correction + * @NAND_ECC_ENGINE_TYPE_SOFT: Software ECC correction + * @NAND_ECC_ENGINE_TYPE_ON_HOST: On host hardware ECC correction + * @NAND_ECC_ENGINE_TYPE_ON_DIE: On chip hardware ECC correction + */ +enum nand_ecc_engine_type { + NAND_ECC_ENGINE_TYPE_INVALID, + NAND_ECC_ENGINE_TYPE_NONE, + NAND_ECC_ENGINE_TYPE_SOFT, + NAND_ECC_ENGINE_TYPE_ON_HOST, + NAND_ECC_ENGINE_TYPE_ON_DIE, +}; + +/** + * enum nand_ecc_placement - NAND ECC bytes placement + * @NAND_ECC_PLACEMENT_UNKNOWN: The actual position of the ECC bytes is unknown + * @NAND_ECC_PLACEMENT_OOB: The ECC bytes are located in the OOB area + * @NAND_ECC_PLACEMENT_INTERLEAVED: Syndrome layout, there are ECC bytes + * interleaved with regular data in the main + * area + */ +enum nand_ecc_placement { + NAND_ECC_PLACEMENT_UNKNOWN, + NAND_ECC_PLACEMENT_OOB, + NAND_ECC_PLACEMENT_INTERLEAVED, +}; + +/** + * enum nand_ecc_algo - NAND ECC algorithm + * @NAND_ECC_ALGO_UNKNOWN: Unknown algorithm + * @NAND_ECC_ALGO_HAMMING: Hamming algorithm + * @NAND_ECC_ALGO_BCH: Bose-Chaudhuri-Hocquenghem algorithm + * @NAND_ECC_ALGO_RS: Reed-Solomon algorithm + */ +enum nand_ecc_algo { + NAND_ECC_ALGO_UNKNOWN, + NAND_ECC_ALGO_HAMMING, + NAND_ECC_ALGO_BCH, + NAND_ECC_ALGO_RS, +}; + /** * struct nand_ecc_props - NAND ECC properties + * @engine_type: ECC engine type + * @placement: OOB placement (if relevant) + * @algo: ECC algorithm (if relevant) * @strength: ECC strength * @step_size: Number of bytes per step + * @flags: Misc properties */ struct nand_ecc_props { + enum nand_ecc_engine_type engine_type; + enum nand_ecc_placement placement; + enum nand_ecc_algo algo; unsigned int strength; unsigned int step_size; + unsigned int flags; }; #define NAND_ECCREQ(str, stp) { .strength = (str), .step_size = (stp) } +/* NAND ECC misc flags */ +#define NAND_ECC_MAXIMIZE_STRENGTH BIT(0) + /** * struct nand_bbt - bad block table object * @cache: in memory BBT cache @@ -157,11 +228,80 @@ struct nand_ops { bool (*isbad)(struct nand_device *nand, const struct nand_pos *pos); }; +/** + * struct nand_ecc_context - Context for the ECC engine + * @conf: basic ECC engine parameters + * @total: total number of bytes used for storing ECC codes, this is used by + * generic OOB layouts + * @priv: ECC engine driver private data + */ +struct nand_ecc_context { + struct nand_ecc_props conf; + unsigned int total; + void *priv; +}; + +/** + * struct nand_ecc_engine_ops - ECC engine operations + * @init_ctx: given a desired user configuration for the pointed NAND device, + * requests the ECC engine driver to setup a configuration with + * values it supports. + * @cleanup_ctx: clean the context initialized by @init_ctx. + * @prepare_io_req: is called before reading/writing a page to prepare the I/O + * request to be performed with ECC correction. + * @finish_io_req: is called after reading/writing a page to terminate the I/O + * request and ensure proper ECC correction. + */ +struct nand_ecc_engine_ops { + int (*init_ctx)(struct nand_device *nand); + void (*cleanup_ctx)(struct nand_device *nand); + int (*prepare_io_req)(struct nand_device *nand, + struct nand_page_io_req *req); + int (*finish_io_req)(struct nand_device *nand, + struct nand_page_io_req *req); +}; + +/** + * struct nand_ecc_engine - ECC engine abstraction for NAND devices + * @ops: ECC engine operations + */ +struct nand_ecc_engine { + struct nand_ecc_engine_ops *ops; +}; + +void of_get_nand_ecc_user_config(struct nand_device *nand); +int nand_ecc_init_ctx(struct nand_device *nand); +void nand_ecc_cleanup_ctx(struct nand_device *nand); +int nand_ecc_prepare_io_req(struct nand_device *nand, + struct nand_page_io_req *req); +int nand_ecc_finish_io_req(struct nand_device *nand, + struct nand_page_io_req *req); +bool nand_ecc_is_strong_enough(struct nand_device *nand); + +/** + * struct nand_ecc - Information relative to the ECC + * @defaults: Default values, depend on the underlying subsystem + * @requirements: ECC requirements from the NAND chip perspective + * @user_conf: User desires in terms of ECC parameters + * @ctx: ECC context for the ECC engine, derived from the device @requirements + * the @user_conf and the @defaults + * @ondie_engine: On-die ECC engine reference, if any + * @engine: ECC engine actually bound + */ +struct nand_ecc { + struct nand_ecc_props defaults; + struct nand_ecc_props requirements; + struct nand_ecc_props user_conf; + struct nand_ecc_context ctx; + struct nand_ecc_engine *ondie_engine; + struct nand_ecc_engine *engine; +}; + /** * struct nand_device - NAND device * @mtd: MTD instance attached to the NAND device * @memorg: memory layout - * @eccreq: ECC requirements + * @ecc: NAND ECC object attached to the NAND device * @rowconv: position to row address converter * @bbt: bad block table info * @ops: NAND operations attached to the NAND device @@ -169,8 +309,8 @@ struct nand_ops { * Generic NAND object. Specialized NAND layers (raw NAND, SPI NAND, OneNAND) * should declare their own NAND object embedding a nand_device struct (that's * how inheritance is done). - * struct_nand_device->memorg and struct_nand_device->eccreq should be filled - * at device detection time to reflect the NAND device + * struct_nand_device->memorg and struct_nand_device->ecc.requirements should + * be filled at device detection time to reflect the NAND device * capabilities/requirements. Once this is done nanddev_init() can be called. * It will take care of converting NAND information into MTD ones, which means * the specialized NAND layers should never manually tweak @@ -179,7 +319,7 @@ struct nand_ops { struct nand_device { struct mtd_info mtd; struct nand_memory_organization memorg; - struct nand_ecc_props eccreq; + struct nand_ecc ecc; struct nand_row_converter rowconv; struct nand_bbt bbt; const struct nand_ops *ops; @@ -383,6 +523,40 @@ nanddev_get_memorg(struct nand_device *nand) return &nand->memorg; } +/** + * nanddev_get_ecc_conf() - Extract the ECC configuration from a NAND device + * @nand: NAND device + */ +static inline const struct nand_ecc_props * +nanddev_get_ecc_conf(struct nand_device *nand) +{ + return &nand->ecc.ctx.conf; +} + +/** + * nanddev_get_ecc_requirements() - Extract the ECC requirements from a NAND + * device + * @nand: NAND device + */ +static inline const struct nand_ecc_props * +nanddev_get_ecc_requirements(struct nand_device *nand) +{ + return &nand->ecc.requirements; +} + +/** + * nanddev_set_ecc_requirements() - Assign the ECC requirements of a NAND + * device + * @nand: NAND device + * @reqs: Requirements + */ +static inline void +nanddev_set_ecc_requirements(struct nand_device *nand, + const struct nand_ecc_props *reqs) +{ + nand->ecc.requirements = *reqs; +} + int nanddev_init(struct nand_device *nand, const struct nand_ops *ops, struct module *owner); void nanddev_cleanup(struct nand_device *nand); @@ -624,11 +798,13 @@ static inline void nanddev_pos_next_page(struct nand_device *nand, * layer. */ static inline void nanddev_io_iter_init(struct nand_device *nand, + enum nand_page_io_req_type reqtype, loff_t offs, struct mtd_oob_ops *req, struct nand_io_iter *iter) { struct mtd_info *mtd = nanddev_to_mtd(nand); + iter->req.type = reqtype; iter->req.mode = req->mode; iter->req.dataoffs = nanddev_offs_to_pos(nand, offs, &iter->req.pos); iter->req.ooboffs = req->ooboffs; @@ -698,8 +874,8 @@ static inline bool nanddev_io_iter_end(struct nand_device *nand, * * Should be used for iterate over pages that are contained in an MTD request. */ -#define nanddev_io_for_each_page(nand, start, req, iter) \ - for (nanddev_io_iter_init(nand, start, req, iter); \ +#define nanddev_io_for_each_page(nand, type, start, req, iter) \ + for (nanddev_io_iter_init(nand, type, start, req, iter); \ !nanddev_io_iter_end(nand, iter); \ nanddev_io_iter_next_page(nand, iter)) diff --git a/include/linux/mtd/pfow.h b/include/linux/mtd/pfow.h index 6166e7c60869..146413d4bdb7 100644 --- a/include/linux/mtd/pfow.h +++ b/include/linux/mtd/pfow.h @@ -121,37 +121,4 @@ static inline void send_pfow_command(struct map_info *map, map_write(map, CMD(LPDDR_START_EXECUTION), map->pfow_base + PFOW_COMMAND_EXECUTE); } - -static inline void print_drs_error(unsigned dsr) -{ - int prog_status = (dsr & DSR_RPS) >> 8; - - if (!(dsr & DSR_AVAILABLE)) - printk(KERN_NOTICE"DSR.15: (0) Device not Available\n"); - if (prog_status & 0x03) - printk(KERN_NOTICE"DSR.9,8: (11) Attempt to program invalid " - "half with 41h command\n"); - else if (prog_status & 0x02) - printk(KERN_NOTICE"DSR.9,8: (10) Object Mode Program attempt " - "in region with Control Mode data\n"); - else if (prog_status & 0x01) - printk(KERN_NOTICE"DSR.9,8: (01) Program attempt in region " - "with Object Mode data\n"); - if (!(dsr & DSR_READY_STATUS)) - printk(KERN_NOTICE"DSR.7: (0) Device is Busy\n"); - if (dsr & DSR_ESS) - printk(KERN_NOTICE"DSR.6: (1) Erase Suspended\n"); - if (dsr & DSR_ERASE_STATUS) - printk(KERN_NOTICE"DSR.5: (1) Erase/Blank check error\n"); - if (dsr & DSR_PROGRAM_STATUS) - printk(KERN_NOTICE"DSR.4: (1) Program Error\n"); - if (dsr & DSR_VPPS) - printk(KERN_NOTICE"DSR.3: (1) Vpp low detect, operation " - "aborted\n"); - if (dsr & DSR_PSS) - printk(KERN_NOTICE"DSR.2: (1) Program suspended\n"); - if (dsr & DSR_DPS) - printk(KERN_NOTICE"DSR.1: (1) Aborted Erase/Program attempt " - "on locked block\n"); -} #endif /* __LINUX_MTD_PFOW_H */ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index a725b620aca2..aac07940de09 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -14,6 +14,7 @@ #define __LINUX_MTD_RAWNAND_H #include +#include #include #include #include @@ -80,25 +81,6 @@ struct nand_chip; #define NAND_DATA_IFACE_CHECK_ONLY -1 -/* - * Constants for ECC_MODES - */ -enum nand_ecc_mode { - NAND_ECC_INVALID, - NAND_ECC_NONE, - NAND_ECC_SOFT, - NAND_ECC_HW, - NAND_ECC_HW_SYNDROME, - NAND_ECC_ON_DIE, -}; - -enum nand_ecc_algo { - NAND_ECC_UNKNOWN, - NAND_ECC_HAMMING, - NAND_ECC_BCH, - NAND_ECC_RS, -}; - /* * Constants for Hardware ECC */ @@ -116,7 +98,6 @@ enum nand_ecc_algo { * pages and you want to rely on the default implementation. */ #define NAND_ECC_GENERIC_ERASED_CHECK BIT(0) -#define NAND_ECC_MAXIMIZE BIT(1) /* * Option constants for bizarre disfunctionality and real @@ -310,7 +291,8 @@ static const struct nand_ecc_caps __name = { \ /** * struct nand_ecc_ctrl - Control structure for ECC - * @mode: ECC mode + * @engine_type: ECC engine type + * @placement: OOB bytes placement * @algo: ECC algorithm * @steps: number of ECC steps per page * @size: data bytes per ECC step @@ -338,7 +320,7 @@ static const struct nand_ecc_caps __name = { \ * controller and always return contiguous in-band and * out-of-band data even if they're not stored * contiguously on the NAND chip (e.g. - * NAND_ECC_HW_SYNDROME interleaves in-band and + * NAND_ECC_PLACEMENT_INTERLEAVED interleaves in-band and * out-of-band data). * @write_page_raw: function to write a raw page without ECC. This function * should hide the specific layout used by the ECC @@ -346,7 +328,7 @@ static const struct nand_ecc_caps __name = { \ * in-band and out-of-band data. ECC controller is * responsible for doing the appropriate transformations * to adapt to its specific layout (e.g. - * NAND_ECC_HW_SYNDROME interleaves in-band and + * NAND_ECC_PLACEMENT_INTERLEAVED interleaves in-band and * out-of-band data). * @read_page: function to read a page according to the ECC generator * requirements; returns maximum number of bitflips corrected in @@ -362,7 +344,8 @@ static const struct nand_ecc_caps __name = { \ * @write_oob: function to write chip OOB data */ struct nand_ecc_ctrl { - enum nand_ecc_mode mode; + enum nand_ecc_engine_type engine_type; + enum nand_ecc_placement placement; enum nand_ecc_algo algo; int steps; int size; @@ -1161,9 +1144,6 @@ struct nand_chip { void *priv; }; -extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; -extern const struct mtd_ooblayout_ops nand_ooblayout_lp_ops; - static inline struct nand_chip *mtd_to_nand(struct mtd_info *mtd) { return container_of(mtd, struct nand_chip, base.mtd); diff --git a/include/linux/overflow.h b/include/linux/overflow.h index f1c4e7b56bd9..ef74051d5cfe 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -3,6 +3,7 @@ #define __LINUX_OVERFLOW_H #include +#include /* * In the fallback code below, we need to compute the minimum and diff --git a/include/linux/platform_data/mtd-davinci.h b/include/linux/platform_data/mtd-davinci.h index 03e92c71b3fa..dd474dd44848 100644 --- a/include/linux/platform_data/mtd-davinci.h +++ b/include/linux/platform_data/mtd-davinci.h @@ -60,15 +60,16 @@ struct davinci_nand_pdata { /* platform_data */ struct mtd_partition *parts; unsigned nr_parts; - /* none == NAND_ECC_NONE (strongly *not* advised!!) - * soft == NAND_ECC_SOFT - * else == NAND_ECC_HW, according to ecc_bits + /* none == NAND_ECC_ENGINE_TYPE_NONE (strongly *not* advised!!) + * soft == NAND_ECC_ENGINE_TYPE_SOFT + * else == NAND_ECC_ENGINE_TYPE_ON_HOST, according to ecc_bits * * All DaVinci-family chips support 1-bit hardware ECC. * Newer ones also support 4-bit ECC, but are awkward * using it with large page chips. */ - enum nand_ecc_mode ecc_mode; + enum nand_ecc_engine_type engine_type; + enum nand_ecc_placement ecc_placement; u8 ecc_bits; /* e.g. NAND_BUSWIDTH_16 */ diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h index 08675b16f9e1..25390fc3e795 100644 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ b/include/linux/platform_data/mtd-nand-s3c2410.h @@ -49,7 +49,7 @@ struct s3c2410_platform_nand { unsigned int ignore_unset_ecc:1; - enum nand_ecc_mode ecc_mode; + enum nand_ecc_engine_type engine_type; int nr_sets; struct s3c2410_nand_set *sets; diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h index f464d85e88a4..aeb242cefebf 100644 --- a/include/linux/qed/qed_rdma_if.h +++ b/include/linux/qed/qed_rdma_if.h @@ -242,10 +242,8 @@ struct qed_rdma_register_tid_in_params { bool pbl_two_level; u8 pbl_page_size_log; u8 page_size_log; - u32 fbo; u64 length; u64 vaddr; - bool zbva; bool phy_mr; bool dma_mr; diff --git a/include/linux/qed/qede_rdma.h b/include/linux/qed/qede_rdma.h index 072da2f6da37..0d5564a59a59 100644 --- a/include/linux/qed/qede_rdma.h +++ b/include/linux/qed/qede_rdma.h @@ -20,7 +20,8 @@ enum qede_rdma_event { QEDE_UP, QEDE_DOWN, QEDE_CHANGE_ADDR, - QEDE_CLOSE + QEDE_CLOSE, + QEDE_CHANGE_MTU, }; struct qede_rdma_event_work { @@ -54,6 +55,7 @@ void qede_rdma_dev_event_open(struct qede_dev *dev); void qede_rdma_dev_event_close(struct qede_dev *dev); void qede_rdma_dev_remove(struct qede_dev *dev, bool recovery); void qede_rdma_event_changeaddr(struct qede_dev *edr); +void qede_rdma_event_change_mtu(struct qede_dev *edev); #else static inline int qede_rdma_dev_add(struct qede_dev *dev, diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 45cf7b69d852..36c47e7e66a2 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -165,6 +165,22 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, #define for_each_sgtable_dma_sg(sgt, sg, i) \ for_each_sg((sgt)->sgl, sg, (sgt)->nents, i) +static inline void __sg_chain(struct scatterlist *chain_sg, + struct scatterlist *sgl) +{ + /* + * offset and length are unused for chain entry. Clear them. + */ + chain_sg->offset = 0; + chain_sg->length = 0; + + /* + * Set lowest bit to indicate a link pointer, and make sure to clear + * the termination bit if it happens to be set. + */ + chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END; +} + /** * sg_chain - Chain two sglists together * @prv: First scatterlist @@ -178,18 +194,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, struct scatterlist *sgl) { - /* - * offset and length are unused for chain entry. Clear them. - */ - prv[prv_nents - 1].offset = 0; - prv[prv_nents - 1].length = 0; - - /* - * Set lowest bit to indicate a link pointer, and make sure to clear - * the termination bit if it happens to be set. - */ - prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN) - & ~SG_END; + __sg_chain(&prv[prv_nents - 1], sgl); } /** @@ -286,10 +291,11 @@ void sg_free_table(struct sg_table *); int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); -int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, - unsigned int n_pages, unsigned int offset, - unsigned long size, unsigned int max_segment, - gfp_t gfp_mask); +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, + struct page **pages, unsigned int n_pages, unsigned int offset, + unsigned long size, unsigned int max_segment, + struct scatterlist *prv, unsigned int left_pages, + gfp_t gfp_mask); int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 42ef807e5d84..d07ea27e72a9 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -55,6 +55,7 @@ enum thermal_notify_event { THERMAL_DEVICE_UP, /* Thermal device is up after a down event */ THERMAL_DEVICE_POWER_CAPABILITY_CHANGED, /* power capability changed */ THERMAL_TABLE_CHANGED, /* Thermal table(s) changed */ + THERMAL_EVENT_KEEP_ALIVE, /* Request for user space handler to respond */ }; struct thermal_zone_device_ops { @@ -84,12 +85,9 @@ struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); int (*set_cur_state) (struct thermal_cooling_device *, unsigned long); - int (*get_requested_power)(struct thermal_cooling_device *, - struct thermal_zone_device *, u32 *); - int (*state2power)(struct thermal_cooling_device *, - struct thermal_zone_device *, unsigned long, u32 *); - int (*power2state)(struct thermal_cooling_device *, - struct thermal_zone_device *, u32, unsigned long *); + int (*get_requested_power)(struct thermal_cooling_device *, u32 *); + int (*state2power)(struct thermal_cooling_device *, unsigned long, u32 *); + int (*power2state)(struct thermal_cooling_device *, u32, unsigned long *); }; struct thermal_cooling_device { diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 66a8f369a2fa..bae29f50adff 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -110,5 +110,8 @@ const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index); void rdma_put_gid_attr(const struct ib_gid_attr *attr); void rdma_hold_gid_attr(const struct ib_gid_attr *attr); +ssize_t rdma_query_gid_table(struct ib_device *device, + struct ib_uverbs_gid_entry *entries, + size_t max_entries); #endif /* _IB_CACHE_H */ diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 382427add677..e23eb357b761 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -14,9 +14,6 @@ #include #include -/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */ -extern struct class cm_class; - enum ib_cm_state { IB_CM_IDLE, IB_CM_LISTEN, diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 71f573a418bf..70597508c765 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -17,6 +17,7 @@ struct ib_umem_odp; struct ib_umem { struct ib_device *ibdev; struct mm_struct *owning_mm; + u64 iova; size_t length; unsigned long address; u32 writable : 1; @@ -33,19 +34,46 @@ static inline int ib_umem_offset(struct ib_umem *umem) return umem->address & ~PAGE_MASK; } +static inline size_t ib_umem_num_dma_blocks(struct ib_umem *umem, + unsigned long pgsz) +{ + return (size_t)((ALIGN(umem->iova + umem->length, pgsz) - + ALIGN_DOWN(umem->iova, pgsz))) / + pgsz; +} + static inline size_t ib_umem_num_pages(struct ib_umem *umem) { - return (ALIGN(umem->address + umem->length, PAGE_SIZE) - - ALIGN_DOWN(umem->address, PAGE_SIZE)) >> - PAGE_SHIFT; + return ib_umem_num_dma_blocks(umem, PAGE_SIZE); } +static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, + struct ib_umem *umem, + unsigned long pgsz) +{ + __rdma_block_iter_start(biter, umem->sg_head.sgl, umem->nmap, pgsz); +} + +/** + * rdma_umem_for_each_dma_block - iterate over contiguous DMA blocks of the umem + * @umem: umem to iterate over + * @pgsz: Page size to split the list into + * + * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The + * returned DMA blocks will be aligned to pgsz and span the range: + * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz) + * + * Performs exactly ib_umem_num_dma_blocks() iterations. + */ +#define rdma_umem_for_each_dma_block(umem, biter, pgsz) \ + for (__rdma_umem_block_iter_start(biter, umem, pgsz); \ + __rdma_block_iter_next(biter);) + #ifdef CONFIG_INFINIBAND_USER_MEM struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, size_t size, int access); void ib_umem_release(struct ib_umem *umem); -int ib_umem_page_count(struct ib_umem *umem); int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length); unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, @@ -63,15 +91,15 @@ static inline struct ib_umem *ib_umem_get(struct ib_device *device, return ERR_PTR(-EINVAL); } static inline void ib_umem_release(struct ib_umem *umem) { } -static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; } static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, size_t length) { return -EINVAL; } -static inline int ib_umem_find_best_pgsz(struct ib_umem *umem, - unsigned long pgsz_bitmap, - unsigned long virt) { - return -EINVAL; +static inline unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, + unsigned long pgsz_bitmap, + unsigned long virt) +{ + return 0; } #endif /* CONFIG_INFINIBAND_USER_MEM */ diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h index d16d2c17e733..0844c1d05ac6 100644 --- a/include/rdma/ib_umem_odp.h +++ b/include/rdma/ib_umem_odp.h @@ -14,17 +14,13 @@ struct ib_umem_odp { struct mmu_interval_notifier notifier; struct pid *tgid; + /* An array of the pfns included in the on-demand paging umem. */ + unsigned long *pfn_list; + /* - * An array of the pages included in the on-demand paging umem. - * Indices of pages that are currently not mapped into the device will - * contain NULL. - */ - struct page **page_list; - /* - * An array of the same size as page_list, with DMA addresses mapped - * for pages the pages in page_list. The lower two bits designate - * access permissions. See ODP_READ_ALLOWED_BIT and - * ODP_WRITE_ALLOWED_BIT. + * An array with DMA addresses mapped for pfns in pfn_list. + * The lower two bits designate access permissions. + * See ODP_READ_ALLOWED_BIT and ODP_WRITE_ALLOWED_BIT. */ dma_addr_t *dma_list; /* @@ -97,9 +93,8 @@ ib_umem_odp_alloc_child(struct ib_umem_odp *root_umem, unsigned long addr, const struct mmu_interval_notifier_ops *ops); void ib_umem_odp_release(struct ib_umem_odp *umem_odp); -int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, - u64 bcnt, u64 access_mask, - unsigned long current_seq); +int ib_umem_odp_map_dma_and_lock(struct ib_umem_odp *umem_odp, u64 start_offset, + u64 bcnt, u64 access_mask, bool fault); void ib_umem_odp_unmap_dma_pages(struct ib_umem_odp *umem_odp, u64 start_offset, u64 bound); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c0b2fa7e9b95..9bf6c319a670 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -138,10 +138,9 @@ union ib_gid { extern union ib_gid zgid; enum ib_gid_type { - /* If link layer is Ethernet, this is RoCE V1 */ - IB_GID_TYPE_IB = 0, - IB_GID_TYPE_ROCE = 0, - IB_GID_TYPE_ROCE_UDP_ENCAP = 1, + IB_GID_TYPE_IB = IB_UVERBS_GID_TYPE_IB, + IB_GID_TYPE_ROCE = IB_UVERBS_GID_TYPE_ROCE_V1, + IB_GID_TYPE_ROCE_UDP_ENCAP = IB_UVERBS_GID_TYPE_ROCE_V2, IB_GID_TYPE_SIZE }; @@ -180,7 +179,7 @@ rdma_node_get_transport(unsigned int node_type); enum rdma_network_type { RDMA_NETWORK_IB, - RDMA_NETWORK_ROCE_V1 = RDMA_NETWORK_IB, + RDMA_NETWORK_ROCE_V1, RDMA_NETWORK_IPV4, RDMA_NETWORK_IPV6 }; @@ -190,9 +189,10 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net if (network_type == RDMA_NETWORK_IPV4 || network_type == RDMA_NETWORK_IPV6) return IB_GID_TYPE_ROCE_UDP_ENCAP; - - /* IB_GID_TYPE_IB same as RDMA_NETWORK_ROCE_V1 */ - return IB_GID_TYPE_IB; + else if (network_type == RDMA_NETWORK_ROCE_V1) + return IB_GID_TYPE_ROCE; + else + return IB_GID_TYPE_IB; } static inline enum rdma_network_type @@ -201,6 +201,9 @@ rdma_gid_attr_network_type(const struct ib_gid_attr *attr) if (attr->gid_type == IB_GID_TYPE_IB) return RDMA_NETWORK_IB; + if (attr->gid_type == IB_GID_TYPE_ROCE) + return RDMA_NETWORK_ROCE_V1; + if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid)) return RDMA_NETWORK_IPV4; else @@ -535,7 +538,8 @@ enum ib_port_speed { IB_SPEED_FDR10 = 8, IB_SPEED_FDR = 16, IB_SPEED_EDR = 32, - IB_SPEED_HDR = 64 + IB_SPEED_HDR = 64, + IB_SPEED_NDR = 128, }; /** @@ -669,7 +673,7 @@ struct ib_port_attr { u8 subnet_timeout; u8 init_type_reply; u8 active_width; - u8 active_speed; + u16 active_speed; u8 phys_state; u16 port_cap_flags2; }; @@ -952,13 +956,14 @@ enum ib_wc_status { const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status); enum ib_wc_opcode { - IB_WC_SEND, - IB_WC_RDMA_WRITE, - IB_WC_RDMA_READ, - IB_WC_COMP_SWAP, - IB_WC_FETCH_ADD, - IB_WC_LSO, - IB_WC_LOCAL_INV, + IB_WC_SEND = IB_UVERBS_WC_SEND, + IB_WC_RDMA_WRITE = IB_UVERBS_WC_RDMA_WRITE, + IB_WC_RDMA_READ = IB_UVERBS_WC_RDMA_READ, + IB_WC_COMP_SWAP = IB_UVERBS_WC_COMP_SWAP, + IB_WC_FETCH_ADD = IB_UVERBS_WC_FETCH_ADD, + IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW, + IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV, + IB_WC_LSO = IB_UVERBS_WC_TSO, IB_WC_REG_MR, IB_WC_MASKED_COMP_SWAP, IB_WC_MASKED_FETCH_ADD, @@ -1291,6 +1296,7 @@ enum ib_wr_opcode { IB_WR_RDMA_READ = IB_UVERBS_WR_RDMA_READ, IB_WR_ATOMIC_CMP_AND_SWP = IB_UVERBS_WR_ATOMIC_CMP_AND_SWP, IB_WR_ATOMIC_FETCH_AND_ADD = IB_UVERBS_WR_ATOMIC_FETCH_AND_ADD, + IB_WR_BIND_MW = IB_UVERBS_WR_BIND_MW, IB_WR_LSO = IB_UVERBS_WR_TSO, IB_WR_SEND_WITH_INV = IB_UVERBS_WR_SEND_WITH_INV, IB_WR_RDMA_READ_WITH_INV = IB_UVERBS_WR_RDMA_READ_WITH_INV, @@ -1463,11 +1469,6 @@ enum rdma_remove_reason { RDMA_REMOVE_DRIVER_REMOVE, /* uobj is being cleaned-up before being committed */ RDMA_REMOVE_ABORT, - /* - * uobj has been fully created, with the uobj->object set, but is being - * cleaned up before being comitted - */ - RDMA_REMOVE_ABORT_HWOBJ, }; struct ib_rdmacg_object { @@ -1479,12 +1480,6 @@ struct ib_rdmacg_object { struct ib_ucontext { struct ib_device *device; struct ib_uverbs_file *ufile; - /* - * 'closing' can be read by the driver only during a destroy callback, - * it is set when we are closing the file descriptor and indicates - * that mm_sem may be locked. - */ - bool closing; bool cleanup_retryable; @@ -1863,17 +1858,6 @@ enum ib_flow_spec_type { #define IB_FLOW_SPEC_LAYER_MASK 0xF0 #define IB_FLOW_SPEC_SUPPORT_LAYERS 10 -/* Flow steering rule priority is set according to it's domain. - * Lower domain value means higher priority. - */ -enum ib_flow_domain { - IB_FLOW_DOMAIN_USER, - IB_FLOW_DOMAIN_ETHTOOL, - IB_FLOW_DOMAIN_RFS, - IB_FLOW_DOMAIN_NIC, - IB_FLOW_DOMAIN_NUM /* Must be last */ -}; - enum ib_flow_flags { IB_FLOW_ATTR_FLAGS_DONT_TRAP = 1UL << 1, /* Continue match, no steal */ IB_FLOW_ATTR_FLAGS_EGRESS = 1UL << 2, /* Egress flow */ @@ -2414,12 +2398,12 @@ struct ib_device_ops { void (*mmap_free)(struct rdma_user_mmap_entry *entry); void (*disassociate_ucontext)(struct ib_ucontext *ibcontext); int (*alloc_pd)(struct ib_pd *pd, struct ib_udata *udata); - void (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); + int (*dealloc_pd)(struct ib_pd *pd, struct ib_udata *udata); int (*create_ah)(struct ib_ah *ah, struct rdma_ah_init_attr *attr, struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); - void (*destroy_ah)(struct ib_ah *ah, u32 flags); + int (*destroy_ah)(struct ib_ah *ah, u32 flags); int (*create_srq)(struct ib_srq *srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); @@ -2427,7 +2411,7 @@ struct ib_device_ops { enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); int (*query_srq)(struct ib_srq *srq, struct ib_srq_attr *srq_attr); - void (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); + int (*destroy_srq)(struct ib_srq *srq, struct ib_udata *udata); struct ib_qp *(*create_qp)(struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr, struct ib_udata *udata); @@ -2439,7 +2423,7 @@ struct ib_device_ops { int (*create_cq)(struct ib_cq *cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); - void (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); + int (*destroy_cq)(struct ib_cq *cq, struct ib_udata *udata); int (*resize_cq)(struct ib_cq *cq, int cqe, struct ib_udata *udata); struct ib_mr *(*get_dma_mr)(struct ib_pd *pd, int mr_access_flags); struct ib_mr *(*reg_user_mr)(struct ib_pd *pd, u64 start, u64 length, @@ -2462,16 +2446,15 @@ struct ib_device_ops { unsigned int *sg_offset); int (*check_mr_status)(struct ib_mr *mr, u32 check_mask, struct ib_mr_status *mr_status); - struct ib_mw *(*alloc_mw)(struct ib_pd *pd, enum ib_mw_type type, - struct ib_udata *udata); + int (*alloc_mw)(struct ib_mw *mw, struct ib_udata *udata); int (*dealloc_mw)(struct ib_mw *mw); int (*attach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*detach_mcast)(struct ib_qp *qp, union ib_gid *gid, u16 lid); int (*alloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); - void (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); + int (*dealloc_xrcd)(struct ib_xrcd *xrcd, struct ib_udata *udata); struct ib_flow *(*create_flow)(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain, struct ib_udata *udata); + struct ib_udata *udata); int (*destroy_flow)(struct ib_flow *flow_id); struct ib_flow_action *(*create_flow_action_esp)( struct ib_device *device, @@ -2496,13 +2479,12 @@ struct ib_device_ops { struct ib_wq *(*create_wq)(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata); - void (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata); + int (*destroy_wq)(struct ib_wq *wq, struct ib_udata *udata); int (*modify_wq)(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask, struct ib_udata *udata); - struct ib_rwq_ind_table *(*create_rwq_ind_table)( - struct ib_device *device, - struct ib_rwq_ind_table_init_attr *init_attr, - struct ib_udata *udata); + int (*create_rwq_ind_table)(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); int (*destroy_rwq_ind_table)(struct ib_rwq_ind_table *wq_ind_table); struct ib_dm *(*alloc_dm)(struct ib_device *device, struct ib_ucontext *context, @@ -2514,7 +2496,7 @@ struct ib_device_ops { struct uverbs_attr_bundle *attrs); int (*create_counters)(struct ib_counters *counters, struct uverbs_attr_bundle *attrs); - void (*destroy_counters)(struct ib_counters *counters); + int (*destroy_counters)(struct ib_counters *counters); int (*read_counters)(struct ib_counters *counters, struct ib_counters_read_attr *counters_read_attr, struct uverbs_attr_bundle *attrs); @@ -2624,7 +2606,9 @@ struct ib_device_ops { DECLARE_RDMA_OBJ_SIZE(ib_ah); DECLARE_RDMA_OBJ_SIZE(ib_counters); DECLARE_RDMA_OBJ_SIZE(ib_cq); + DECLARE_RDMA_OBJ_SIZE(ib_mw); DECLARE_RDMA_OBJ_SIZE(ib_pd); + DECLARE_RDMA_OBJ_SIZE(ib_rwq_ind_table); DECLARE_RDMA_OBJ_SIZE(ib_srq); DECLARE_RDMA_OBJ_SIZE(ib_ucontext); DECLARE_RDMA_OBJ_SIZE(ib_xrcd); @@ -2798,7 +2782,8 @@ void ib_dealloc_device(struct ib_device *device); void ib_get_device_fw_str(struct ib_device *device, char *str); -int ib_register_device(struct ib_device *device, const char *name); +int ib_register_device(struct ib_device *device, const char *name, + struct device *dma_device); void ib_unregister_device(struct ib_device *device); void ib_unregister_driver(enum rdma_driver_id driver_id); void ib_unregister_device_and_put(struct ib_device *device); @@ -3351,30 +3336,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num) return rdma_protocol_iwarp(dev, port_num); } -/** - * rdma_find_pg_bit - Find page bit given address and HW supported page sizes - * - * @addr: address - * @pgsz_bitmap: bitmap of HW supported page sizes - */ -static inline unsigned int rdma_find_pg_bit(unsigned long addr, - unsigned long pgsz_bitmap) -{ - unsigned long align; - unsigned long pgsz; - - align = addr & -addr; - - /* Find page bit such that addr is aligned to the highest supported - * HW page size - */ - pgsz = pgsz_bitmap & ~(-align << 1); - if (!pgsz) - return __ffs(pgsz_bitmap); - - return __fls(pgsz); -} - /** * rdma_core_cap_opa_port - Return whether the RDMA Port is OPA or not. * @device: Device @@ -3472,12 +3433,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, #define ib_alloc_pd(device, flags) \ __ib_alloc_pd((device), (flags), KBUILD_MODNAME) -/** - * ib_dealloc_pd_user - Deallocate kernel/user PD - * @pd: The protection domain - * @udata: Valid user data or NULL for kernel objects - */ -void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata); +int ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata); /** * ib_dealloc_pd - Deallocate kernel PD @@ -3487,7 +3443,9 @@ void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata); */ static inline void ib_dealloc_pd(struct ib_pd *pd) { - ib_dealloc_pd_user(pd, NULL); + int ret = ib_dealloc_pd_user(pd, NULL); + + WARN_ONCE(ret, "Destroy of kernel PD shouldn't fail"); } enum rdma_create_ah_flags { @@ -3615,9 +3573,11 @@ int rdma_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata); * * NOTE: for user ah use rdma_destroy_ah_user with valid udata! */ -static inline int rdma_destroy_ah(struct ib_ah *ah, u32 flags) +static inline void rdma_destroy_ah(struct ib_ah *ah, u32 flags) { - return rdma_destroy_ah_user(ah, flags, NULL); + int ret = rdma_destroy_ah_user(ah, flags, NULL); + + WARN_ONCE(ret, "Destroy of kernel AH shouldn't fail"); } struct ib_srq *ib_create_srq_user(struct ib_pd *pd, @@ -3671,9 +3631,11 @@ int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata); * * NOTE: for user srq use ib_destroy_srq_user with valid udata! */ -static inline int ib_destroy_srq(struct ib_srq *srq) +static inline void ib_destroy_srq(struct ib_srq *srq) { - return ib_destroy_srq_user(srq, NULL); + int ret = ib_destroy_srq_user(srq, NULL); + + WARN_ONCE(ret, "Destroy of kernel SRQ shouldn't fail"); } /** @@ -3817,46 +3779,15 @@ static inline int ib_post_recv(struct ib_qp *qp, return qp->device->ops.post_recv(qp, recv_wr, bad_recv_wr ? : &dummy); } -struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private, - int nr_cqe, int comp_vector, - enum ib_poll_context poll_ctx, - const char *caller, struct ib_udata *udata); - -/** - * ib_alloc_cq_user: Allocate kernel/user CQ - * @dev: The IB device - * @private: Private data attached to the CQE - * @nr_cqe: Number of CQEs in the CQ - * @comp_vector: Completion vector used for the IRQs - * @poll_ctx: Context used for polling the CQ - * @udata: Valid user data or NULL for kernel objects - */ -static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev, - void *private, int nr_cqe, - int comp_vector, - enum ib_poll_context poll_ctx, - struct ib_udata *udata) -{ - return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, - KBUILD_MODNAME, udata); -} - -/** - * ib_alloc_cq: Allocate kernel CQ - * @dev: The IB device - * @private: Private data attached to the CQE - * @nr_cqe: Number of CQEs in the CQ - * @comp_vector: Completion vector used for the IRQs - * @poll_ctx: Context used for polling the CQ - * - * NOTE: for user cq use ib_alloc_cq_user with valid udata! - */ +struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, + int comp_vector, enum ib_poll_context poll_ctx, + const char *caller); static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private, int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx) { - return ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx, - NULL); + return __ib_alloc_cq(dev, private, nr_cqe, comp_vector, poll_ctx, + KBUILD_MODNAME); } struct ib_cq *__ib_alloc_cq_any(struct ib_device *dev, void *private, @@ -3878,26 +3809,7 @@ static inline struct ib_cq *ib_alloc_cq_any(struct ib_device *dev, KBUILD_MODNAME); } -/** - * ib_free_cq_user - Free kernel/user CQ - * @cq: The CQ to free - * @udata: Valid user data or NULL for kernel objects - * - * NOTE: This function shouldn't be called on shared CQs. - */ -void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata); - -/** - * ib_free_cq - Free kernel CQ - * @cq: The CQ to free - * - * NOTE: for user cq use ib_free_cq_user with valid udata! - */ -static inline void ib_free_cq(struct ib_cq *cq) -{ - ib_free_cq_user(cq, NULL); -} - +void ib_free_cq(struct ib_cq *cq); int ib_process_cq_direct(struct ib_cq *cq, int budget); /** @@ -3955,7 +3867,9 @@ int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata); */ static inline void ib_destroy_cq(struct ib_cq *cq) { - ib_destroy_cq_user(cq, NULL); + int ret = ib_destroy_cq_user(cq, NULL); + + WARN_ONCE(ret, "Destroy of kernel CQ shouldn't fail"); } /** @@ -4379,10 +4293,9 @@ struct net_device *ib_device_netdev(struct ib_device *dev, u8 port); struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); -int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata); +int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata); int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *attr, u32 wq_attr_mask); -int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table); int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset, unsigned int page_size); @@ -4410,7 +4323,7 @@ void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); -int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width); +int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u16 *speed, u8 *width); static inline u8 *rdma_ah_retrieve_dmac(struct rdma_ah_attr *attr) { @@ -4717,6 +4630,7 @@ bool rdma_dev_access_netns(const struct ib_device *device, const struct net *net); #define IB_ROCE_UDP_ENCAP_VALID_PORT_MIN (0xC000) +#define IB_ROCE_UDP_ENCAP_VALID_PORT_MAX (0xFFFF) #define IB_GRH_FLOWLABEL_MASK (0x000FFFFF) /** diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index cf5da2ae49bf..c672ae1da26b 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -110,11 +110,14 @@ struct rdma_cm_id { u8 port_num; }; -struct rdma_cm_id *__rdma_create_id(struct net *net, - rdma_cm_event_handler event_handler, - void *context, enum rdma_ucm_port_space ps, - enum ib_qp_type qp_type, - const char *caller); +struct rdma_cm_id * +__rdma_create_kernel_id(struct net *net, rdma_cm_event_handler event_handler, + void *context, enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type, const char *caller); +struct rdma_cm_id *rdma_create_user_id(rdma_cm_event_handler event_handler, + void *context, + enum rdma_ucm_port_space ps, + enum ib_qp_type qp_type); /** * rdma_create_id - Create an RDMA identifier. @@ -132,9 +135,9 @@ struct rdma_cm_id *__rdma_create_id(struct net *net, * The event handler callback serializes on the id's mutex and is * allowed to sleep. */ -#define rdma_create_id(net, event_handler, context, ps, qp_type) \ - __rdma_create_id((net), (event_handler), (context), (ps), (qp_type), \ - KBUILD_MODNAME) +#define rdma_create_id(net, event_handler, context, ps, qp_type) \ + __rdma_create_kernel_id(net, event_handler, context, ps, qp_type, \ + KBUILD_MODNAME) /** * rdma_destroy_id - Destroys an RDMA identifier. @@ -250,29 +253,12 @@ int rdma_connect_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, */ int rdma_listen(struct rdma_cm_id *id, int backlog); -int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, - const char *caller); +int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param); -int __rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, - const char *caller, struct rdma_ucm_ece *ece); - -/** - * rdma_accept - Called to accept a connection request or response. - * @id: Connection identifier associated with the request. - * @conn_param: Information needed to establish the connection. This must be - * provided if accepting a connection request. If accepting a connection - * response, this parameter must be NULL. - * - * Typically, this routine is only called by the listener to accept a connection - * request. It must also be called on the active side of a connection if the - * user is performing their own QP transitions. - * - * In the case of error, a reject message is sent to the remote side and the - * state of the qp associated with the id is modified to error, such that any - * previously posted receive buffers would be flushed. - */ -#define rdma_accept(id, conn_param) \ - __rdma_accept((id), (conn_param), KBUILD_MODNAME) +void rdma_lock_handler(struct rdma_cm_id *id); +void rdma_unlock_handler(struct rdma_cm_id *id); +int rdma_accept_ece(struct rdma_cm_id *id, struct rdma_conn_param *conn_param, + struct rdma_ucm_ece *ece); /** * rdma_notify - Notifies the RDMA CM of an asynchronous event that has diff --git a/include/rdma/restrack.h b/include/rdma/restrack.h index 7682d1bcf789..d3a1cc5be7bc 100644 --- a/include/rdma/restrack.h +++ b/include/rdma/restrack.h @@ -106,22 +106,11 @@ struct rdma_restrack_entry { int rdma_restrack_count(struct ib_device *dev, enum rdma_restrack_type type); - -void rdma_restrack_kadd(struct rdma_restrack_entry *res); -void rdma_restrack_uadd(struct rdma_restrack_entry *res); - -/** - * rdma_restrack_del() - delete object from the reource tracking database - * @res: resource entry - * @type: actual type of object to operate - */ -void rdma_restrack_del(struct rdma_restrack_entry *res); - /** * rdma_is_kernel_res() - check the owner of resource * @res: resource entry */ -static inline bool rdma_is_kernel_res(struct rdma_restrack_entry *res) +static inline bool rdma_is_kernel_res(const struct rdma_restrack_entry *res) { return !res->user; } @@ -138,14 +127,6 @@ int __must_check rdma_restrack_get(struct rdma_restrack_entry *res); */ int rdma_restrack_put(struct rdma_restrack_entry *res); -/** - * rdma_restrack_set_task() - set the task for this resource - * @res: resource entry - * @caller: kernel name, the current task will be used if the caller is NULL. - */ -void rdma_restrack_set_task(struct rdma_restrack_entry *res, - const char *caller); - /* * Helper functions for rdma drivers when filling out * nldev driver attributes. diff --git a/include/trace/events/rdma.h b/include/trace/events/rdma.h index aa19afc73a4e..81bb454fc288 100644 --- a/include/trace/events/rdma.h +++ b/include/trace/events/rdma.h @@ -6,7 +6,6 @@ /* * enum ib_event_type, from include/rdma/ib_verbs.h */ - #define IB_EVENT_LIST \ ib_event(CQ_ERR) \ ib_event(QP_FATAL) \ @@ -90,6 +89,46 @@ IB_WC_STATUS_LIST #define rdma_show_wc_status(x) \ __print_symbolic(x, IB_WC_STATUS_LIST) +/* + * enum ib_cm_event_type, from include/rdma/ib_cm.h + */ +#define IB_CM_EVENT_LIST \ + ib_cm_event(REQ_ERROR) \ + ib_cm_event(REQ_RECEIVED) \ + ib_cm_event(REP_ERROR) \ + ib_cm_event(REP_RECEIVED) \ + ib_cm_event(RTU_RECEIVED) \ + ib_cm_event(USER_ESTABLISHED) \ + ib_cm_event(DREQ_ERROR) \ + ib_cm_event(DREQ_RECEIVED) \ + ib_cm_event(DREP_RECEIVED) \ + ib_cm_event(TIMEWAIT_EXIT) \ + ib_cm_event(MRA_RECEIVED) \ + ib_cm_event(REJ_RECEIVED) \ + ib_cm_event(LAP_ERROR) \ + ib_cm_event(LAP_RECEIVED) \ + ib_cm_event(APR_RECEIVED) \ + ib_cm_event(SIDR_REQ_ERROR) \ + ib_cm_event(SIDR_REQ_RECEIVED) \ + ib_cm_event_end(SIDR_REP_RECEIVED) + +#undef ib_cm_event +#undef ib_cm_event_end + +#define ib_cm_event(x) TRACE_DEFINE_ENUM(IB_CM_##x); +#define ib_cm_event_end(x) TRACE_DEFINE_ENUM(IB_CM_##x); + +IB_CM_EVENT_LIST + +#undef ib_cm_event +#undef ib_cm_event_end + +#define ib_cm_event(x) { IB_CM_##x, #x }, +#define ib_cm_event_end(x) { IB_CM_##x, #x } + +#define rdma_show_ib_cm_event(x) \ + __print_symbolic(x, IB_CM_EVENT_LIST) + /* * enum rdma_cm_event_type, from include/rdma/rdma_cm.h */ diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index abe942225637..b6aad52beb62 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -13,6 +13,7 @@ #include #include #include +#include #include /** diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 507a2862bedb..f89fbb5b1e8d 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -105,6 +105,7 @@ struct efa_ibv_create_ah_resp { enum { EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0, + EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1, }; struct efa_ibv_ex_query_device_resp { diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index eb76b38a00d4..9ec85f76e9ac 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -39,6 +39,8 @@ struct hns_roce_ib_create_cq { __aligned_u64 buf_addr; __aligned_u64 db_addr; + __u32 cqe_size; + __u32 reserved; }; struct hns_roce_ib_create_cq_resp { @@ -73,7 +75,7 @@ struct hns_roce_ib_create_qp_resp { struct hns_roce_ib_alloc_ucontext_resp { __u32 qp_tab_size; - __u32 reserved; + __u32 cqe_size; }; struct hns_roce_ib_alloc_pd_resp { diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h index 99dcabf61a71..7968a1845355 100644 --- a/include/uapi/rdma/ib_user_ioctl_cmds.h +++ b/include/uapi/rdma/ib_user_ioctl_cmds.h @@ -70,6 +70,8 @@ enum uverbs_methods_device { UVERBS_METHOD_QUERY_PORT, UVERBS_METHOD_GET_CONTEXT, UVERBS_METHOD_QUERY_CONTEXT, + UVERBS_METHOD_QUERY_GID_TABLE, + UVERBS_METHOD_QUERY_GID_ENTRY, }; enum uverbs_attrs_invoke_write_cmd_attr_ids { @@ -352,4 +354,18 @@ enum uverbs_attrs_async_event_create { UVERBS_ATTR_ASYNC_EVENT_ALLOC_FD_HANDLE, }; +enum uverbs_attrs_query_gid_table_cmd_attr_ids { + UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, + UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, + UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, + UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES, +}; + +enum uverbs_attrs_query_gid_entry_cmd_attr_ids { + UVERBS_ATTR_QUERY_GID_ENTRY_PORT, + UVERBS_ATTR_QUERY_GID_ENTRY_GID_INDEX, + UVERBS_ATTR_QUERY_GID_ENTRY_FLAGS, + UVERBS_ATTR_QUERY_GID_ENTRY_RESP_ENTRY, +}; + #endif diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h index 5debab45ebcb..22483799cd07 100644 --- a/include/uapi/rdma/ib_user_ioctl_verbs.h +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h @@ -208,6 +208,7 @@ enum ib_uverbs_read_counters_flags { enum ib_uverbs_advise_mr_advice { IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH, IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE, + IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT, }; enum ib_uverbs_advise_mr_flag { @@ -250,4 +251,18 @@ enum rdma_driver_id { RDMA_DRIVER_SIW, }; +enum ib_uverbs_gid_type { + IB_UVERBS_GID_TYPE_IB, + IB_UVERBS_GID_TYPE_ROCE_V1, + IB_UVERBS_GID_TYPE_ROCE_V2, +}; + +struct ib_uverbs_gid_entry { + __aligned_u64 gid[2]; + __u32 gid_index; + __u32 port_num; + __u32 gid_type; + __u32 netdev_ifindex; /* It is 0 if there is no netdev associated with it */ +}; + #endif diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 0474c7400268..456438c18c2c 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -457,6 +457,17 @@ struct ib_uverbs_poll_cq { __u32 ne; }; +enum ib_uverbs_wc_opcode { + IB_UVERBS_WC_SEND = 0, + IB_UVERBS_WC_RDMA_WRITE = 1, + IB_UVERBS_WC_RDMA_READ = 2, + IB_UVERBS_WC_COMP_SWAP = 3, + IB_UVERBS_WC_FETCH_ADD = 4, + IB_UVERBS_WC_BIND_MW = 5, + IB_UVERBS_WC_LOCAL_INV = 6, + IB_UVERBS_WC_TSO = 7, +}; + struct ib_uverbs_wc { __aligned_u64 wr_id; __u32 status; diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index aae2e696bb38..e591d8c1f3cf 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -39,6 +39,11 @@ #include #include +enum { + RXE_NETWORK_TYPE_IPV4 = 1, + RXE_NETWORK_TYPE_IPV6 = 2, +}; + union rxe_gid { __u8 raw[16]; struct { @@ -57,6 +62,7 @@ struct rxe_global_route { struct rxe_av { __u8 port_num; + /* From RXE_NETWORK_TYPE_* */ __u8 network_type; __u8 dmac[6]; struct rxe_global_route grh; @@ -99,8 +105,8 @@ struct rxe_send_wr { struct ib_mr *mr; __aligned_u64 reserved; }; - __u32 key; - __u32 access; + __u32 key; + __u32 access; } reg; } wr; }; @@ -112,7 +118,7 @@ struct rxe_sge { }; struct mminfo { - __aligned_u64 offset; + __aligned_u64 offset; __u32 size; __u32 pad; }; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index d94628fa3349..9a4992dc8e8c 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -365,6 +365,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) } EXPORT_SYMBOL(sg_alloc_table); +static struct scatterlist *get_next_sg(struct sg_table *table, + struct scatterlist *cur, + unsigned long needed_sges, + gfp_t gfp_mask) +{ + struct scatterlist *new_sg, *next_sg; + unsigned int alloc_size; + + if (cur) { + next_sg = sg_next(cur); + /* Check if last entry should be keeped for chainning */ + if (!sg_is_last(next_sg) || needed_sges == 1) + return next_sg; + } + + alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC); + new_sg = sg_kmalloc(alloc_size, gfp_mask); + if (!new_sg) + return ERR_PTR(-ENOMEM); + sg_init_table(new_sg, alloc_size); + if (cur) { + __sg_chain(next_sg, new_sg); + table->orig_nents += alloc_size - 1; + } else { + table->sgl = new_sg; + table->orig_nents = alloc_size; + table->nents = 0; + } + return new_sg; +} + /** * __sg_alloc_table_from_pages - Allocate and initialize an sg table from * an array of pages @@ -373,30 +404,69 @@ EXPORT_SYMBOL(sg_alloc_table); * @n_pages: Number of pages in the pages array * @offset: Offset from start of the first page to the start of a buffer * @size: Number of valid bytes in the buffer (after offset) - * @max_segment: Maximum size of a scatterlist node in bytes (page aligned) + * @max_segment: Maximum size of a scatterlist element in bytes + * @prv: Last populated sge in sgt + * @left_pages: Left pages caller have to set after this call * @gfp_mask: GFP allocation mask * - * Description: - * Allocate and initialize an sg table from a list of pages. Contiguous - * ranges of the pages are squashed into a single scatterlist node up to the - * maximum size specified in @max_segment. An user may provide an offset at a - * start and a size of valid data in a buffer specified by the page array. - * The returned sg table is released by sg_free_table. + * Description: + * If @prv is NULL, allocate and initialize an sg table from a list of pages, + * else reuse the scatterlist passed in at @prv. + * Contiguous ranges of the pages are squashed into a single scatterlist + * entry up to the maximum size specified in @max_segment. A user may + * provide an offset at a start and a size of valid data in a buffer + * specified by the page array. * * Returns: - * 0 on success, negative error on failure + * Last SGE in sgt on success, PTR_ERR on otherwise. + * The allocation in @sgt must be released by sg_free_table. + * + * Notes: + * If this function returns non-0 (eg failure), the caller must call + * sg_free_table() to cleanup any leftover allocations. */ -int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, - unsigned int n_pages, unsigned int offset, - unsigned long size, unsigned int max_segment, - gfp_t gfp_mask) +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, + struct page **pages, unsigned int n_pages, unsigned int offset, + unsigned long size, unsigned int max_segment, + struct scatterlist *prv, unsigned int left_pages, + gfp_t gfp_mask) { - unsigned int chunks, cur_page, seg_len, i; - int ret; - struct scatterlist *s; + unsigned int chunks, cur_page, seg_len, i, prv_len = 0; + unsigned int added_nents = 0; + struct scatterlist *s = prv; - if (WARN_ON(!max_segment || offset_in_page(max_segment))) - return -EINVAL; + /* + * The algorithm below requires max_segment to be aligned to PAGE_SIZE + * otherwise it can overshoot. + */ + max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE); + if (WARN_ON(max_segment < PAGE_SIZE)) + return ERR_PTR(-EINVAL); + + if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv) + return ERR_PTR(-EOPNOTSUPP); + + if (prv) { + unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE + + prv->offset + prv->length) / + PAGE_SIZE; + + if (WARN_ON(offset)) + return ERR_PTR(-EINVAL); + + /* Merge contiguous pages into the last SG */ + prv_len = prv->length; + while (n_pages && page_to_pfn(pages[0]) == paddr) { + if (prv->length + PAGE_SIZE > max_segment) + break; + prv->length += PAGE_SIZE; + paddr++; + pages++; + n_pages--; + } + if (!n_pages) + goto out; + } /* compute number of contiguous chunks */ chunks = 1; @@ -410,13 +480,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, } } - ret = sg_alloc_table(sgt, chunks, gfp_mask); - if (unlikely(ret)) - return ret; - /* merging chunks and putting them into the scatterlist */ cur_page = 0; - for_each_sg(sgt->sgl, s, sgt->orig_nents, i) { + for (i = 0; i < chunks; i++) { unsigned int j, chunk_size; /* look for the end of the current chunk */ @@ -429,15 +495,30 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, break; } + /* Pass how many chunks might be left */ + s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask); + if (IS_ERR(s)) { + /* + * Adjust entry length to be as before function was + * called. + */ + if (prv) + prv->length = prv_len; + return s; + } chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset; sg_set_page(s, pages[cur_page], min_t(unsigned long, size, chunk_size), offset); + added_nents++; size -= chunk_size; offset = 0; cur_page = j; } - - return 0; + sgt->nents += added_nents; +out: + if (!left_pages) + sg_mark_end(s); + return s; } EXPORT_SYMBOL(__sg_alloc_table_from_pages); @@ -465,8 +546,8 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask) { - return __sg_alloc_table_from_pages(sgt, pages, n_pages, offset, size, - SCATTERLIST_MAX_SEGMENT, gfp_mask); + return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages, + offset, size, UINT_MAX, NULL, 0, gfp_mask)); } EXPORT_SYMBOL(sg_alloc_table_from_pages); diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 38415d251075..97cbfb31b762 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -38,9 +38,8 @@ FEATURE_TESTS_BASIC := \ get_current_dir_name \ gettid \ glibc \ - gtk2 \ - gtk2-infobar \ libbfd \ + libbfd-buildid \ libcap \ libelf \ libelf-getphdrnum \ @@ -80,6 +79,8 @@ FEATURE_TESTS_EXTRA := \ compile-32 \ compile-x32 \ cplus-demangle \ + gtk2 \ + gtk2-infobar \ hello \ libbabeltrace \ libbfd-liberty \ @@ -110,8 +111,8 @@ FEATURE_DISPLAY ?= \ dwarf \ dwarf_getlocations \ glibc \ - gtk2 \ libbfd \ + libbfd-buildid \ libcap \ libelf \ libnuma \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index b2a2347c67ed..cdde783f3018 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -15,6 +15,7 @@ FILES= \ test-hello.bin \ test-libaudit.bin \ test-libbfd.bin \ + test-libbfd-buildid.bin \ test-disassembler-four-args.bin \ test-reallocarray.bin \ test-libbfd-liberty.bin \ @@ -89,7 +90,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$( ############################### $(OUTPUT)test-all.bin: - $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd $(OUTPUT)test-hello.bin: $(BUILD) @@ -225,6 +226,9 @@ $(OUTPUT)test-libpython-version.bin: $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl +$(OUTPUT)test-libbfd-buildid.bin: + $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl + $(OUTPUT)test-disassembler-four-args.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 5284e6e9c756..a04e81321c66 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -74,18 +74,14 @@ # include "test-libslang.c" #undef main -#define main main_test_gtk2 -# include "test-gtk2.c" -#undef main - -#define main main_test_gtk2_infobar -# include "test-gtk2-infobar.c" -#undef main - #define main main_test_libbfd # include "test-libbfd.c" #undef main +#define main main_test_libbfd_buildid +# include "test-libbfd-buildid.c" +#undef main + #define main main_test_backtrace # include "test-backtrace.c" #undef main @@ -201,9 +197,8 @@ int main(int argc, char *argv[]) main_test_libelf_getshdrstrndx(); main_test_libunwind(); main_test_libslang(); - main_test_gtk2(argc, argv); - main_test_gtk2_infobar(argc, argv); main_test_libbfd(); + main_test_libbfd_buildid(); main_test_backtrace(); main_test_libnuma(); main_test_numa_num_possible_cpus(); diff --git a/tools/build/feature/test-libbfd-buildid.c b/tools/build/feature/test-libbfd-buildid.c new file mode 100644 index 000000000000..157644b04c05 --- /dev/null +++ b/tools/build/feature/test-libbfd-buildid.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + bfd *abfd = bfd_openr("Pedro", 0); + return abfd && (!abfd->build_id || abfd->build_id->size > 0x506564726f); +} diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 2208444ecb44..cfcdbd7be066 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -45,6 +45,9 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, if (!evsel->own_cpus || evlist->has_user_cpus) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evlist->cpus); + } else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) { + perf_cpu_map__put(evsel->cpus); + evsel->cpus = perf_cpu_map__get(evlist->cpus); } else if (evsel->cpus != evsel->own_cpus) { perf_cpu_map__put(evsel->cpus); evsel->cpus = perf_cpu_map__get(evsel->own_cpus); diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 842028858d66..988c539bedb6 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -201,10 +201,20 @@ struct perf_record_header_tracing_data { __u32 size; }; +#define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15) + struct perf_record_header_build_id { struct perf_event_header header; pid_t pid; - __u8 build_id[24]; + union { + __u8 build_id[24]; + struct { + __u8 data[20]; + __u8 size; + __u8 reserved1__; + __u16 reserved2__; + }; + }; char filename[]; }; @@ -324,6 +334,10 @@ struct perf_record_time_conv { __u64 time_shift; __u64 time_mult; __u64 time_zero; + __u64 time_cycles; + __u64 time_mask; + bool cap_user_time_zero; + bool cap_user_time_short; }; struct perf_record_header_feature { diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c index 4faf52a65791..f8361e45d446 100644 --- a/tools/lib/traceevent/event-parse-api.c +++ b/tools/lib/traceevent/event-parse-api.c @@ -92,7 +92,7 @@ bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag) return false; } -unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data) +__hidden unsigned short data2host2(struct tep_handle *tep, unsigned short data) { unsigned short swap; @@ -105,7 +105,7 @@ unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data) return swap; } -unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data) +__hidden unsigned int data2host4(struct tep_handle *tep, unsigned int data) { unsigned int swap; @@ -120,8 +120,8 @@ unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data) return swap; } -unsigned long long -tep_data2host8(struct tep_handle *tep, unsigned long long data) +__hidden unsigned long long +data2host8(struct tep_handle *tep, unsigned long long data) { unsigned long long swap; diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h index d805a920af6f..fd4bbcfbb849 100644 --- a/tools/lib/traceevent/event-parse-local.h +++ b/tools/lib/traceevent/event-parse-local.h @@ -15,6 +15,8 @@ struct event_handler; struct func_resolver; struct tep_plugins_dir; +#define __hidden __attribute__((visibility ("hidden"))) + struct tep_handle { int ref_count; @@ -102,12 +104,20 @@ struct tep_print_parse { struct tep_print_arg *len_as_arg; }; -void tep_free_event(struct tep_event *event); -void tep_free_format_field(struct tep_format_field *field); -void tep_free_plugin_paths(struct tep_handle *tep); +void free_tep_event(struct tep_event *event); +void free_tep_format_field(struct tep_format_field *field); +void free_tep_plugin_paths(struct tep_handle *tep); -unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data); -unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data); -unsigned long long tep_data2host8(struct tep_handle *tep, unsigned long long data); +unsigned short data2host2(struct tep_handle *tep, unsigned short data); +unsigned int data2host4(struct tep_handle *tep, unsigned int data); +unsigned long long data2host8(struct tep_handle *tep, unsigned long long data); + +/* access to the internal parser */ +int peek_char(void); +void init_input_buf(const char *buf, unsigned long long size); +unsigned long long get_input_buf_ptr(void); +const char *get_input_buf(void); +enum tep_event_type read_token(char **tok); +void free_token(char *tok); #endif /* _PARSE_EVENTS_INT_H */ diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 5acc18b32606..fe58843d047c 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -54,19 +54,26 @@ static int show_warning = 1; warning(fmt, ##__VA_ARGS__); \ } while (0) -static void init_input_buf(const char *buf, unsigned long long size) +/** + * init_input_buf - init buffer for parsing + * @buf: buffer to parse + * @size: the size of the buffer + * + * Initializes the internal buffer that tep_read_token() will parse. + */ +__hidden void init_input_buf(const char *buf, unsigned long long size) { input_buf = buf; input_buf_siz = size; input_buf_ptr = 0; } -const char *tep_get_input_buf(void) +__hidden const char *get_input_buf(void) { return input_buf; } -unsigned long long tep_get_input_buf_ptr(void) +__hidden unsigned long long get_input_buf_ptr(void) { return input_buf_ptr; } @@ -100,26 +107,13 @@ process_defined_func(struct trace_seq *s, void *data, int size, static void free_func_handle(struct tep_function_handler *func); -/** - * tep_buffer_init - init buffer for parsing - * @buf: buffer to parse - * @size: the size of the buffer - * - * For use with tep_read_token(), this initializes the internal - * buffer that tep_read_token() will parse. - */ -void tep_buffer_init(const char *buf, unsigned long long size) -{ - init_input_buf(buf, size); -} - void breakpoint(void) { static int x; x++; } -struct tep_print_arg *alloc_arg(void) +static struct tep_print_arg *alloc_arg(void) { return calloc(1, sizeof(struct tep_print_arg)); } @@ -962,7 +956,12 @@ static int __read_char(void) return input_buf[input_buf_ptr++]; } -static int __peek_char(void) +/** + * peek_char - peek at the next character that will be read + * + * Returns the next character read, or -1 if end of buffer. + */ +__hidden int peek_char(void) { if (input_buf_ptr >= input_buf_siz) return -1; @@ -970,16 +969,6 @@ static int __peek_char(void) return input_buf[input_buf_ptr]; } -/** - * tep_peek_char - peek at the next character that will be read - * - * Returns the next character read, or -1 if end of buffer. - */ -int tep_peek_char(void) -{ - return __peek_char(); -} - static int extend_token(char **tok, char *buf, int size) { char *newtok = realloc(*tok, size); @@ -1033,7 +1022,7 @@ static enum tep_event_type __read_token(char **tok) case TEP_EVENT_OP: switch (ch) { case '-': - next_ch = __peek_char(); + next_ch = peek_char(); if (next_ch == '>') { buf[i++] = __read_char(); break; @@ -1045,7 +1034,7 @@ static enum tep_event_type __read_token(char **tok) case '>': case '<': last_ch = ch; - ch = __peek_char(); + ch = peek_char(); if (ch != last_ch) goto test_equal; buf[i++] = __read_char(); @@ -1068,7 +1057,7 @@ static enum tep_event_type __read_token(char **tok) return type; test_equal: - ch = __peek_char(); + ch = peek_char(); if (ch == '=') buf[i++] = __read_char(); goto out; @@ -1122,7 +1111,7 @@ static enum tep_event_type __read_token(char **tok) break; } - while (get_type(__peek_char()) == type) { + while (get_type(peek_char()) == type) { if (i == (BUFSIZ - 1)) { buf[i] = 0; tok_size += BUFSIZ; @@ -1191,13 +1180,26 @@ static enum tep_event_type force_token(const char *str, char **tok) return type; } -static void free_token(char *tok) +/** + * free_token - free a token returned by tep_read_token + * @token: the token to free + */ +__hidden void free_token(char *tok) { if (tok) free(tok); } -static enum tep_event_type read_token(char **tok) +/** + * read_token - access to utilities to use the tep parser + * @tok: The token to return + * + * This will parse tokens from the string given by + * tep_init_data(). + * + * Returns the token type. + */ +__hidden enum tep_event_type read_token(char **tok) { enum tep_event_type type; @@ -1214,29 +1216,6 @@ static enum tep_event_type read_token(char **tok) return TEP_EVENT_NONE; } -/** - * tep_read_token - access to utilities to use the tep parser - * @tok: The token to return - * - * This will parse tokens from the string given by - * tep_init_data(). - * - * Returns the token type. - */ -enum tep_event_type tep_read_token(char **tok) -{ - return read_token(tok); -} - -/** - * tep_free_token - free a token returned by tep_read_token - * @token: the token to free - */ -void tep_free_token(char *token) -{ - free_token(token); -} - /* no newline */ static enum tep_event_type read_token_item(char **tok) { @@ -3459,12 +3438,12 @@ unsigned long long tep_read_number(struct tep_handle *tep, case 1: return *(unsigned char *)ptr; case 2: - return tep_data2host2(tep, *(unsigned short *)ptr); + return data2host2(tep, *(unsigned short *)ptr); case 4: - return tep_data2host4(tep, *(unsigned int *)ptr); + return data2host4(tep, *(unsigned int *)ptr); case 8: memcpy(&val, (ptr), sizeof(unsigned long long)); - return tep_data2host8(tep, val); + return data2host8(tep, val); default: /* BUG! */ return 0; @@ -4190,7 +4169,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, f = tep_find_any_field(event, arg->string.string); arg->string.offset = f->offset; } - str_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->string.offset)); + str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset)); str_offset &= 0xffff; print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); break; @@ -4208,7 +4187,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, f = tep_find_any_field(event, arg->bitmask.bitmask); arg->bitmask.offset = f->offset; } - bitmask_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); + bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); bitmask_size = bitmask_offset >> 16; bitmask_offset &= 0xffff; print_bitmask_to_seq(tep, s, format, len_arg, @@ -6750,7 +6729,7 @@ static int find_event_handle(struct tep_handle *tep, struct tep_event *event) } /** - * __tep_parse_format - parse the event format + * parse_format - parse the event format * @buf: the buffer storing the event format string * @size: the size of @buf * @sys: the system the event belongs to @@ -6762,9 +6741,9 @@ static int find_event_handle(struct tep_handle *tep, struct tep_event *event) * * /sys/kernel/debug/tracing/events/.../.../format */ -enum tep_errno __tep_parse_format(struct tep_event **eventp, - struct tep_handle *tep, const char *buf, - unsigned long size, const char *sys) +static enum tep_errno parse_format(struct tep_event **eventp, + struct tep_handle *tep, const char *buf, + unsigned long size, const char *sys) { struct tep_event *event; int ret; @@ -6879,7 +6858,7 @@ __parse_event(struct tep_handle *tep, const char *buf, unsigned long size, const char *sys) { - int ret = __tep_parse_format(eventp, tep, buf, size, sys); + int ret = parse_format(eventp, tep, buf, size, sys); struct tep_event *event = *eventp; if (event == NULL) @@ -6897,7 +6876,7 @@ __parse_event(struct tep_handle *tep, return 0; event_add_failed: - tep_free_event(event); + free_tep_event(event); return ret; } @@ -7490,7 +7469,7 @@ int tep_get_ref(struct tep_handle *tep) return 0; } -void tep_free_format_field(struct tep_format_field *field) +__hidden void free_tep_format_field(struct tep_format_field *field) { free(field->type); if (field->alias != field->name) @@ -7505,7 +7484,7 @@ static void free_format_fields(struct tep_format_field *field) while (field) { next = field->next; - tep_free_format_field(field); + free_tep_format_field(field); field = next; } } @@ -7516,7 +7495,7 @@ static void free_formats(struct tep_format *format) free_format_fields(format->fields); } -void tep_free_event(struct tep_event *event) +__hidden void free_tep_event(struct tep_event *event) { free(event->name); free(event->system); @@ -7602,7 +7581,7 @@ void tep_free(struct tep_handle *tep) } for (i = 0; i < tep->nr_events; i++) - tep_free_event(tep->events[i]); + free_tep_event(tep->events[i]); while (tep->handlers) { handle = tep->handlers; @@ -7613,7 +7592,7 @@ void tep_free(struct tep_handle *tep) free(tep->events); free(tep->sort_events); free(tep->func_resolver); - tep_free_plugin_paths(tep); + free_tep_plugin_paths(tep); free(tep); } diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index c29b693e31ee..a67ad9a5b835 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -578,14 +578,6 @@ void tep_ref(struct tep_handle *tep); void tep_unref(struct tep_handle *tep); int tep_get_ref(struct tep_handle *tep); -/* access to the internal parser */ -void tep_buffer_init(const char *buf, unsigned long long size); -enum tep_event_type tep_read_token(char **tok); -void tep_free_token(char *token); -int tep_peek_char(void); -const char *tep_get_input_buf(void); -unsigned long long tep_get_input_buf_ptr(void); - /* for debugging */ void tep_print_funcs(struct tep_handle *tep); void tep_print_printk(struct tep_handle *tep); diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index e7c2acb8680f..e7f93d5fe4fd 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -676,7 +676,7 @@ int tep_add_plugin_path(struct tep_handle *tep, char *path, return 0; } -void tep_free_plugin_paths(struct tep_handle *tep) +__hidden void free_tep_plugin_paths(struct tep_handle *tep) { struct tep_plugins_dir *dir; diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index c271aeeb227d..368826bb5a57 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -38,8 +38,8 @@ static void show_error(char *error_buf, const char *fmt, ...) int len; int i; - input = tep_get_input_buf(); - index = tep_get_input_buf_ptr(); + input = get_input_buf(); + index = get_input_buf_ptr(); len = input ? strlen(input) : 0; if (len) { @@ -57,25 +57,20 @@ static void show_error(char *error_buf, const char *fmt, ...) va_end(ap); } -static void free_token(char *token) -{ - tep_free_token(token); -} - -static enum tep_event_type read_token(char **tok) +static enum tep_event_type filter_read_token(char **tok) { enum tep_event_type type; char *token = NULL; do { free_token(token); - type = tep_read_token(&token); + type = read_token(&token); } while (type == TEP_EVENT_NEWLINE || type == TEP_EVENT_SPACE); /* If token is = or ! check to see if the next char is ~ */ if (token && (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) && - tep_peek_char() == '~') { + peek_char() == '~') { /* append it */ *tok = malloc(3); if (*tok == NULL) { @@ -85,7 +80,7 @@ static enum tep_event_type read_token(char **tok) sprintf(*tok, "%c%c", *token, '~'); free_token(token); /* Now remove the '~' from the buffer */ - tep_read_token(&token); + read_token(&token); free_token(token); } else *tok = token; @@ -959,7 +954,7 @@ process_filter(struct tep_event *event, struct tep_filter_arg **parg, do { free(token); - type = read_token(&token); + type = filter_read_token(&token); switch (type) { case TEP_EVENT_SQUOTE: case TEP_EVENT_DQUOTE: @@ -1185,7 +1180,7 @@ process_event(struct tep_event *event, const char *filter_str, { int ret; - tep_buffer_init(filter_str, strlen(filter_str)); + init_input_buf(filter_str, strlen(filter_str)); ret = process_filter(event, parg, error_str, 0); if (ret < 0) @@ -1243,7 +1238,7 @@ filter_event(struct tep_event_filter *filter, struct tep_event *event, static void filter_init_error_buf(struct tep_event_filter *filter) { /* clear buffer to reset show error */ - tep_buffer_init("", 0); + init_input_buf("", 0); filter->error_buffer[0] = '\0'; } diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 98efdab5fbd4..c81d72e3eecf 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -174,34 +174,36 @@ For each cacheline in the 1) list we display following data: Cacheline - cacheline address (hex number) - Total records - - sum of all cachelines accesses - Rmt/Lcl Hitm - cacheline percentage of all Remote/Local HITM accesses - LLC Load Hitm - Total, Lcl, Rmt + LLC Load Hitm - Total, LclHitm, RmtHitm - count of Total/Local/Remote load HITMs - Store Reference - Total, L1Hit, L1Miss - Total - all store accesses - L1Hit - store accesses that hit L1 - L1Hit - store accesses that missed L1 + Total records + - sum of all cachelines accesses - Load Dram - - count of local and remote DRAM accesses - - LLC Ld Miss - - count of all accesses that missed LLC - - Total Loads + Total loads - sum of all load accesses + Total stores + - sum of all store accesses + + Store Reference - L1Hit, L1Miss + L1Hit - store accesses that hit L1 + L1Miss - store accesses that missed L1 + Core Load Hit - FB, L1, L2 - count of load hits in FB (Fill Buffer), L1 and L2 cache - LLC Load Hit - Llc, Rmt - - count of LLC and Remote load hits + LLC Load Hit - LlcHit, LclHitm + - count of LLC load accesses, includes LLC hits and LLC HITMs + + RMT Load Hit - RmtHit, RmtHitm + - count of remote load accesses, includes remote hits and remote HITMs + + Load Dram - Lcl, Rmt + - count of local and remote DRAM accesses For each offset in the 2) list we display following data: diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 76408d986aed..31069d8a5304 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -242,6 +242,11 @@ annotate.*:: These are in control of addresses, jump function, source code in lines of assembly code from a specific program. + annotate.disassembler_style: + Use this to change the default disassembler style to some other value + supported by binutils, such as "intel", see the '-M' option help in the + 'objdump' man page. + annotate.hide_src_code:: If a program which is analyzed has source code, this option lets 'annotate' print a list of assembly code with the source code. diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index f50ca0fef0a4..be65bd55ab2a 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -182,6 +182,10 @@ OPTIONS --tid=:: Only diff samples for given thread ID (comma separated list). +--stream:: + Enable hot streams comparison. Stream can be a callchain which is + aggregated by the branch records from samples. + COMPARISON ---------- The comparison is governed by the baseline file. The baseline perf.data diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 78358af9a1c4..1e91121bac0f 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -33,7 +33,8 @@ OPTIONS -F:: --funcs:: - List all available functions to trace. + List available functions to trace. It accepts a pattern to + only list interested functions. -p:: --pid=:: diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 70969ea73e01..a8eccff21281 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -24,8 +24,12 @@ information could make use of this facility. OPTIONS ------- -b:: ---build-ids=:: +--build-ids:: Inject build-ids into the output stream + +--buildid-all: + Inject build-ids of all DSOs into the output stream + -v:: --verbose:: Be more verbose. diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index d5a266d7f15b..cd362dc2af07 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -112,6 +112,32 @@ The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, system, asynchronous, interrupt, transaction abort, trace begin, trace end, and in transaction, respectively. +perf script also supports higher level ways to dump instruction traces: + + perf script --insn-trace --xed + +Dump all instructions. This requires installing the xed tool (see XED below) +Dumping all instructions in a long trace can be fairly slow. It is usually better +to start with higher level decoding, like + + perf script --call-trace + +or + + perf script --call-ret-trace + +and then select a time range of interest. The time range can then be examined +in detail with + + perf script --time starttime,stoptime --insn-trace --xed + +While examining the trace it's also useful to filter on specific CPUs using +the -C option + + perf script --time starttime,stoptime --insn-trace --xed -C 1 + +Dump all instructions in time range on CPU 1. + Another interesting field that is not printed by default is 'ipc' which can be displayed as follows: @@ -558,7 +584,7 @@ The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g Intel PT modes of operation ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Intel PT can be used in 2 modes: +Intel PT can be used in 3 modes: full-trace mode sample mode snapshot mode @@ -571,7 +597,8 @@ Sample mode attaches a Intel PT sample to other events e.g. perf record --aux-sample -e intel_pt//u -e branch-misses:u -Snapshot mode captures the available data when a signal is sent e.g. +Snapshot mode captures the available data when a signal is sent or "snapshot" +control command is issued. e.g. using a signal perf record -v -e intel_pt//u -S ./loopy 1000000000 & [1] 11435 @@ -582,7 +609,23 @@ Note that the signal sent is SIGUSR2. Note that "Recording AUX area tracing snapshot" is displayed because the -v option is used. -The 2 modes cannot be used together. +The advantage of using "snapshot" control command is that the access is +controlled by access to a FIFO e.g. + + $ mkfifo perf.control + $ mkfifo perf.ack + $ cat perf.ack & + [1] 15235 + $ sudo ~/bin/perf record --control fifo:perf.control,perf.ack -S -e intel_pt//u -- sleep 60 & + [2] 15243 + $ ps -e | grep perf + 15244 pts/1 00:00:00 perf + $ kill -USR2 15244 + bash: kill: (15244) - Operation not permitted + $ echo snapshot > perf.control + ack + +The 3 Intel PT modes of operation cannot be used together. Buffer handling @@ -1093,6 +1136,10 @@ To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g. perf script --itrace=oe +XED +--- + +include::build-xed.txt[] SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 10ed539a8859..4c7db1da8fcc 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -58,6 +58,7 @@ counted. The following modifiers exist: S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU W - group is weak and will fallback to non-group if not schedulable, + e - group or event are exclusive and do not share the PMU The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index bd50cdff08a8..768888b9326a 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -631,43 +631,45 @@ option. The -e option and this one can be mixed and matched. Events can be grouped using the {} notation. endif::HAVE_LIBPFM[] ---control fd:ctl-fd[,ack-fd] +--control=fifo:ctl-fifo[,ack-fifo]:: +--control=fd:ctl-fd[,ack-fd]:: +ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, -'disable': disable events). Measurements can be started with events disabled using ---delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor -to synchronize with the controlling process. Example of bash shell script to enable and -disable events during measurements: +'disable': disable events, 'snapshot': AUX area tracing snapshot). Measurements can be +started with events disabled using --delay=-1 option. Optionally send control command +completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process. +Example of bash shell script to enable and disable events during measurements: -#!/bin/bash + #!/bin/bash -ctl_dir=/tmp/ + ctl_dir=/tmp/ -ctl_fifo=${ctl_dir}perf_ctl.fifo -test -p ${ctl_fifo} && unlink ${ctl_fifo} -mkfifo ${ctl_fifo} -exec {ctl_fd}<>${ctl_fifo} + ctl_fifo=${ctl_dir}perf_ctl.fifo + test -p ${ctl_fifo} && unlink ${ctl_fifo} + mkfifo ${ctl_fifo} + exec {ctl_fd}<>${ctl_fifo} -ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo -test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} -mkfifo ${ctl_ack_fifo} -exec {ctl_fd_ack}<>${ctl_ack_fifo} + ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo + test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} + mkfifo ${ctl_ack_fifo} + exec {ctl_fd_ack}<>${ctl_ack_fifo} -perf record -D -1 -e cpu-cycles -a \ - --control fd:${ctl_fd},${ctl_fd_ack} \ - -- sleep 30 & -perf_pid=$! + perf record -D -1 -e cpu-cycles -a \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & + perf_pid=$! -sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" -sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" + sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" -exec {ctl_fd_ack}>&- -unlink ${ctl_ack_fifo} + exec {ctl_fd_ack}>&- + unlink ${ctl_ack_fifo} -exec {ctl_fd}>&- -unlink ${ctl_fifo} + exec {ctl_fd}>&- + unlink ${ctl_fifo} -wait -n ${perf_pid} -exit $? + wait -n ${perf_pid} + exit $? SEE ALSO diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index db420dd75e43..9f9f29025e49 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -166,6 +166,11 @@ use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'. If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'. +--for-each-cgroup name:: +Expand event list for each cgroup in "name" (allow multiple cgroups separated +by comma). This has same effect that repeating -e option and -G option for +each event x name. This option cannot be used with -G/--cgroup option. + -o file:: --output file:: Print the output into the designated file. @@ -180,43 +185,45 @@ with it. --append may be used here. Examples: 3>results perf stat --log-fd 3 -- $cmd 3>>results perf stat --log-fd 3 --append -- $cmd ---control fd:ctl-fd[,ack-fd] +--control=fifo:ctl-fifo[,ack-fifo]:: +--control=fd:ctl-fd[,ack-fd]:: +ctl-fifo / ack-fifo are opened and used as ctl-fd / ack-fd as follows. Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events). Measurements can be started with events disabled using --delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor to synchronize with the controlling process. Example of bash shell script to enable and disable events during measurements: -#!/bin/bash + #!/bin/bash -ctl_dir=/tmp/ + ctl_dir=/tmp/ -ctl_fifo=${ctl_dir}perf_ctl.fifo -test -p ${ctl_fifo} && unlink ${ctl_fifo} -mkfifo ${ctl_fifo} -exec {ctl_fd}<>${ctl_fifo} + ctl_fifo=${ctl_dir}perf_ctl.fifo + test -p ${ctl_fifo} && unlink ${ctl_fifo} + mkfifo ${ctl_fifo} + exec {ctl_fd}<>${ctl_fifo} -ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo -test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} -mkfifo ${ctl_ack_fifo} -exec {ctl_fd_ack}<>${ctl_ack_fifo} + ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo + test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo} + mkfifo ${ctl_ack_fifo} + exec {ctl_fd_ack}<>${ctl_ack_fifo} -perf stat -D -1 -e cpu-cycles -a -I 1000 \ - --control fd:${ctl_fd},${ctl_fd_ack} \ - -- sleep 30 & -perf_pid=$! + perf stat -D -1 -e cpu-cycles -a -I 1000 \ + --control fd:${ctl_fd},${ctl_fd_ack} \ + -- sleep 30 & + perf_pid=$! -sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" -sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" + sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})" + sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})" -exec {ctl_fd_ack}>&- -unlink ${ctl_ack_fifo} + exec {ctl_fd_ack}>&- + unlink ${ctl_ack_fifo} -exec {ctl_fd}>&- -unlink ${ctl_fifo} + exec {ctl_fd}>&- + unlink ${ctl_fifo} -wait -n ${perf_pid} -exit $? + wait -n ${perf_pid} + exit $? --pre:: @@ -361,6 +368,11 @@ if the workload is actually bound by the CPU and not by something else. For best results it is usually a good idea to use it with interval mode like -I 1000, as the bottleneck of workloads can change often. +This enables --metric-only, unless overridden with --no-metric-only. + +The following restrictions only apply to older Intel CPUs and Atom, +on newer CPUs (IceLake and later) TopDown can be collected for any thread: + The top down metrics are collected per core instead of per CPU thread. Per core mode is automatically enabled and -a (global monitoring) is needed, requiring root rights or @@ -372,8 +384,6 @@ echo 0 > /proc/sys/kernel/nmi_watchdog for best results. Otherwise the bottlenecks may be inconsistent on workload with changing phases. -This enables --metric-only, unless overridden with --no-metric-only. - To interpret the results it is usually needed to know on which CPUs the workload runs on. If needed the CPUs can be forced using taskset. diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 3f37ded13f8c..c130a3c46a90 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -12,32 +12,57 @@ SYNOPSIS OPTIONS ------- ---debug:: - Setup debug variable (see list below) in value - range (0, 10). Use like: - --debug verbose # sets verbose = 1 - --debug verbose=2 # sets verbose = 2 - - List of debug variables allowed to set: - verbose - general debug messages - ordered-events - ordered events object debug messages - data-convert - data convert command debug messages - stderr - write debug output (option -v) to stderr - in browser mode - perf-event-open - Print perf_event_open() arguments and - return value - ---buildid-dir:: - Setup buildid cache directory. It has higher priority than - buildid.dir config file option. +-h:: +--help:: + Run perf help command. -v:: --version:: - Display perf version. + Display perf version. --h:: ---help:: - Run perf help command. +-vv:: + Print the compiled-in status of libraries. + +--exec-path:: + Display or set exec path. + +--html-path:: + Display html documentation path. + +-p:: +--paginate:: + Set up pager. + +--no-pager:: + Do not set pager. + +--buildid-dir:: + Setup buildid cache directory. It has higher priority + than buildid.dir config file option. + +--list-cmds:: + List the most commonly used perf commands. + +--list-opts:: + List available perf options. + +--debugfs-dir:: + Set debugfs directory or set environment variable PERF_DEBUGFS_DIR. + +--debug:: + Setup debug variable (see list below) in value + range (0, 10). Use like: + --debug verbose # sets verbose = 1 + --debug verbose=2 # sets verbose = 2 + + List of debug variables allowed to set: + verbose - general debug messages + ordered-events - ordered events object debug messages + data-convert - data convert command debug messages + stderr - write debug output (option -v) to stderr + in browser mode + perf-event-open - Print perf_event_open() arguments and + return value DESCRIPTION ----------- diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt new file mode 100644 index 000000000000..3c39bb3dc5fa --- /dev/null +++ b/tools/perf/Documentation/topdown.txt @@ -0,0 +1,256 @@ +Using TopDown metrics in user space +----------------------------------- + +Intel CPUs (since Sandy Bridge and Silvermont) support a TopDown +methology to break down CPU pipeline execution into 4 bottlenecks: +frontend bound, backend bound, bad speculation, retiring. + +For more details on Topdown see [1][5] + +Traditionally this was implemented by events in generic counters +and specific formulas to compute the bottlenecks. + +perf stat --topdown implements this. + +Full Top Down includes more levels that can break down the +bottlenecks further. This is not directly implemented in perf, +but available in other tools that can run on top of perf, +such as toplev[2] or vtune[3] + +New Topdown features in Ice Lake +=============================== + +With Ice Lake CPUs the TopDown metrics are directly available as +fixed counters and do not require generic counters. This allows +to collect TopDown always in addition to other events. + +% perf stat -a --topdown -I1000 +# time retiring bad speculation frontend bound backend bound + 1.001281330 23.0% 15.3% 29.6% 32.1% + 2.003009005 5.0% 6.8% 46.6% 41.6% + 3.004646182 6.7% 6.7% 46.0% 40.6% + 4.006326375 5.0% 6.4% 47.6% 41.0% + 5.007991804 5.1% 6.3% 46.3% 42.3% + 6.009626773 6.2% 7.1% 47.3% 39.3% + 7.011296356 4.7% 6.7% 46.2% 42.4% + 8.012951831 4.7% 6.7% 47.5% 41.1% +... + +This also enables measuring TopDown per thread/process instead +of only per core. + +Using TopDown through RDPMC in applications on Ice Lake +====================================================== + +For more fine grained measurements it can be useful to +access the new directly from user space. This is more complicated, +but drastically lowers overhead. + +On Ice Lake, there is a new fixed counter 3: SLOTS, which reports +"pipeline SLOTS" (cycles multiplied by core issue width) and a +metric register that reports slots ratios for the different bottleneck +categories. + +The metrics counter is CPU model specific and is not available on older +CPUs. + +Example code +============ + +Library functions to do the functionality described below +is also available in libjevents [4] + +The application opens a group with fixed counter 3 (SLOTS) and any +metric event, and allow user programs to read the performance counters. + +Fixed counter 3 is mapped to a pseudo event event=0x00, umask=04, +so the perf_event_attr structure should be initialized with +{ .config = 0x0400, .type = PERF_TYPE_RAW } +The metric events are mapped to the pseudo event event=0x00, umask=0x8X. +For example, the perf_event_attr structure can be initialized with +{ .config = 0x8000, .type = PERF_TYPE_RAW } for Retiring metric event +The Fixed counter 3 must be the leader of the group. + +#include +#include +#include + +/* Provide own perf_event_open stub because glibc doesn't */ +__attribute__((weak)) +int perf_event_open(struct perf_event_attr *attr, pid_t pid, + int cpu, int group_fd, unsigned long flags) +{ + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +/* Open slots counter file descriptor for current task. */ +struct perf_event_attr slots = { + .type = PERF_TYPE_RAW, + .size = sizeof(struct perf_event_attr), + .config = 0x400, + .exclude_kernel = 1, +}; + +int slots_fd = perf_event_open(&slots, 0, -1, -1, 0); +if (slots_fd < 0) + ... error ... + +/* + * Open metrics event file descriptor for current task. + * Set slots event as the leader of the group. + */ +struct perf_event_attr metrics = { + .type = PERF_TYPE_RAW, + .size = sizeof(struct perf_event_attr), + .config = 0x8000, + .exclude_kernel = 1, +}; + +int metrics_fd = perf_event_open(&metrics, 0, -1, slots_fd, 0); +if (metrics_fd < 0) + ... error ... + + +The RDPMC instruction (or _rdpmc compiler intrinsic) can now be used +to read slots and the topdown metrics at different points of the program: + +#include +#include + +#define RDPMC_FIXED (1 << 30) /* return fixed counters */ +#define RDPMC_METRIC (1 << 29) /* return metric counters */ + +#define FIXED_COUNTER_SLOTS 3 +#define METRIC_COUNTER_TOPDOWN_L1 0 + +static inline uint64_t read_slots(void) +{ + return _rdpmc(RDPMC_FIXED | FIXED_COUNTER_SLOTS); +} + +static inline uint64_t read_metrics(void) +{ + return _rdpmc(RDPMC_METRIC | METRIC_COUNTER_TOPDOWN_L1); +} + +Then the program can be instrumented to read these metrics at different +points. + +It's not a good idea to do this with too short code regions, +as the parallelism and overlap in the CPU program execution will +cause too much measurement inaccuracy. For example instrumenting +individual basic blocks is definitely too fine grained. + +Decoding metrics values +======================= + +The value reported by read_metrics() contains four 8 bit fields +that represent a scaled ratio that represent the Level 1 bottleneck. +All four fields add up to 0xff (= 100%) + +The binary ratios in the metric value can be converted to float ratios: + +#define GET_METRIC(m, i) (((m) >> (i*8)) & 0xff) + +#define TOPDOWN_RETIRING(val) ((float)GET_METRIC(val, 0) / 0xff) +#define TOPDOWN_BAD_SPEC(val) ((float)GET_METRIC(val, 1) / 0xff) +#define TOPDOWN_FE_BOUND(val) ((float)GET_METRIC(val, 2) / 0xff) +#define TOPDOWN_BE_BOUND(val) ((float)GET_METRIC(val, 3) / 0xff) + +and then converted to percent for printing. + +The ratios in the metric accumulate for the time when the counter +is enabled. For measuring programs it is often useful to measure +specific sections. For this it is needed to deltas on metrics. + +This can be done by scaling the metrics with the slots counter +read at the same time. + +Then it's possible to take deltas of these slots counts +measured at different points, and determine the metrics +for that time period. + + slots_a = read_slots(); + metric_a = read_metrics(); + + ... larger code region ... + + slots_b = read_slots() + metric_b = read_metrics() + + # compute scaled metrics for measurement a + retiring_slots_a = GET_METRIC(metric_a, 0) * slots_a + bad_spec_slots_a = GET_METRIC(metric_a, 1) * slots_a + fe_bound_slots_a = GET_METRIC(metric_a, 2) * slots_a + be_bound_slots_a = GET_METRIC(metric_a, 3) * slots_a + + # compute delta scaled metrics between b and a + retiring_slots = GET_METRIC(metric_b, 0) * slots_b - retiring_slots_a + bad_spec_slots = GET_METRIC(metric_b, 1) * slots_b - bad_spec_slots_a + fe_bound_slots = GET_METRIC(metric_b, 2) * slots_b - fe_bound_slots_a + be_bound_slots = GET_METRIC(metric_b, 3) * slots_b - be_bound_slots_a + +Later the individual ratios for the measurement period can be recreated +from these counts. + + slots_delta = slots_b - slots_a + retiring_ratio = (float)retiring_slots / slots_delta + bad_spec_ratio = (float)bad_spec_slots / slots_delta + fe_bound_ratio = (float)fe_bound_slots / slots_delta + be_bound_ratio = (float)be_bound_slots / slota_delta + + printf("Retiring %.2f%% Bad Speculation %.2f%% FE Bound %.2f%% BE Bound %.2f%%\n", + retiring_ratio * 100., + bad_spec_ratio * 100., + fe_bound_ratio * 100., + be_bound_ratio * 100.); + +Resetting metrics counters +========================== + +Since the individual metrics are only 8bit they lose precision for +short regions over time because the number of cycles covered by each +fraction bit shrinks. So the counters need to be reset regularly. + +When using the kernel perf API the kernel resets on every read. +So as long as the reading is at reasonable intervals (every few +seconds) the precision is good. + +When using perf stat it is recommended to always use the -I option, +with no longer interval than a few seconds + + perf stat -I 1000 --topdown ... + +For user programs using RDPMC directly the counter can +be reset explicitly using ioctl: + + ioctl(perf_fd, PERF_EVENT_IOC_RESET, 0); + +This "opens" a new measurement period. + +A program using RDPMC for TopDown should schedule such a reset +regularly, as in every few seconds. + +Limits on Ice Lake +================== + +Four pseudo TopDown metric events are exposed for the end-users, +topdown-retiring, topdown-bad-spec, topdown-fe-bound and topdown-be-bound. +They can be used to collect the TopDown value under the following +rules: +- All the TopDown metric events must be in a group with the SLOTS event. +- The SLOTS event must be the leader of the group. +- The PERF_FORMAT_GROUP flag must be applied for each TopDown metric + events + +The SLOTS event and the TopDown metric events can be counting members of +a sampling read group. Since the SLOTS event must be the leader of a TopDown +group, the second event of the group is the sampling event. +For example, perf record -e '{slots, $sampling_event, topdown-retiring}:S' + + +[1] https://software.intel.com/en-us/top-down-microarchitecture-analysis-method-win +[2] https://github.com/andikleen/pmu-tools/wiki/toplev-manual +[3] https://software.intel.com/en-us/intel-vtune-amplifier-xe +[4] https://github.com/andikleen/pmu-tools/tree/master/jevents +[5] https://sites.google.com/site/analysismethods/yasin-pubs diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index d1b5fb89992f..6890fc4b063a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -223,14 +223,17 @@ endif # Try different combinations to accommodate systems that only have # python[2][-config] in weird combinations but always preferring -# python2 and python2-config as per pep-0394. If we catch a -# python[-config] in version 3, the version check will kill it. -PYTHON2 := $(if $(call get-executable,python2),python2,python) -override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2)) -PYTHON2_CONFIG := \ +# python2 and python2-config as per pep-0394. If python2 or python +# aren't found, then python3 is used. +PYTHON_AUTO := python +PYTHON_AUTO := $(if $(call get-executable,python3),python3,$(PYTHON_AUTO)) +PYTHON_AUTO := $(if $(call get-executable,python),python,$(PYTHON_AUTO)) +PYTHON_AUTO := $(if $(call get-executable,python2),python2,$(PYTHON_AUTO)) +override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON_AUTO)) +PYTHON_AUTO_CONFIG := \ $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) override PYTHON_CONFIG := \ - $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) + $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON_AUTO_CONFIG)) grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) @@ -720,12 +723,14 @@ ifndef NO_SLANG endif endif -ifndef NO_GTK2 +ifdef GTK2 FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) + $(call feature_check,gtk2) ifneq ($(feature-gtk2), 1) msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev); NO_GTK2 := 1 else + $(call feature_check,gtk2-infobar) ifeq ($(feature-gtk2-infobar), 1) GTK_CFLAGS := -DHAVE_GTK_INFO_BAR_SUPPORT endif @@ -821,6 +826,12 @@ else $(call feature_check,disassembler-four-args) endif +ifeq ($(feature-libbfd-buildid), 1) + CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT +else + msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available); +endif + ifdef NO_DEMANGLE CFLAGS += -DNO_DEMANGLE else diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 6031167939ae..7ce3f2e8b9c7 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -48,7 +48,7 @@ include ../scripts/utilities.mak # # Define NO_SLANG if you do not want TUI support. # -# Define NO_GTK2 if you do not want GTK+ GUI support. +# Define GTK2 if you want GTK+ GUI support. # # Define NO_DEMANGLE if you do not want C++ symbol demangling. # @@ -386,7 +386,7 @@ ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -ifndef NO_GTK2 +ifdef GTK2 ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so GTK_IN := $(OUTPUT)gtk-in.o endif @@ -537,6 +537,12 @@ mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh $(mmap_flags_array): $(linux_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl) $(Q)$(SHELL) '$(mmap_flags_tbl)' $(linux_uapi_dir) $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ +mremap_flags_array := $(beauty_outdir)/mremap_flags_array.c +mremap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mremap_flags.sh + +$(mremap_flags_array): $(linux_uapi_dir)/mman.h $(mremap_flags_tbl) + $(Q)$(SHELL) '$(mremap_flags_tbl)' $(linux_uapi_dir) > $@ + mount_flags_array := $(beauty_outdir)/mount_flags_array.c mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh @@ -549,6 +555,13 @@ move_mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/move_mount_flags.sh $(move_mount_flags_array): $(linux_uapi_dir)/fs.h $(move_mount_flags_tbl) $(Q)$(SHELL) '$(move_mount_flags_tbl)' $(linux_uapi_dir) > $@ + +mmap_prot_array := $(beauty_outdir)/mmap_prot_array.c +mmap_prot_tbl := $(srctree)/tools/perf/trace/beauty/mmap_prot.sh + +$(mmap_prot_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_prot_tbl) + $(Q)$(SHELL) '$(mmap_prot_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ + prctl_option_array := $(beauty_outdir)/prctl_option_array.c prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh @@ -710,6 +723,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ $(mmap_flags_array) \ + $(mmap_prot_array) \ + $(mremap_flags_array) \ $(mount_flags_array) \ $(move_mount_flags_array) \ $(perf_ioctl_array) \ @@ -886,7 +901,7 @@ check: $(OUTPUT)common-cmds.h ### Installation rules -ifndef NO_GTK2 +ifdef GTK2 install-gtk: $(OUTPUT)libperf-gtk.so $(call QUIET_INSTALL, 'GTK UI') \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \ @@ -961,6 +976,7 @@ install-tests: all install-gtk $(call QUIET_INSTALL, tests) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ + $(INSTALL) tests/pe-file.exe* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ @@ -1007,6 +1023,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(fspick_arrays) \ $(OUTPUT)$(madvise_behavior_array) \ $(OUTPUT)$(mmap_flags_array) \ + $(OUTPUT)$(mmap_prot_array) \ + $(OUTPUT)$(mremap_flags_array) \ $(OUTPUT)$(mount_flags_array) \ $(OUTPUT)$(move_mount_flags_array) \ $(OUTPUT)$(drm_ioctl_array) \ diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 5c13438c7bd4..b53294d74b01 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,6 +1,7 @@ perf-y += header.o perf-y += machine.o perf-y += perf_regs.o +perf-y += tsc.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/tsc.c b/tools/perf/arch/arm64/util/tsc.c new file mode 100644 index 000000000000..cc85bd9e73f1 --- /dev/null +++ b/tools/perf/arch/arm64/util/tsc.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include "../../../util/tsc.h" + +u64 rdtsc(void) +{ + u64 val; + + /* + * According to ARM DDI 0487F.c, from Armv8.0 to Armv8.5 inclusive, the + * system counter is at least 56 bits wide; from Armv8.6, the counter + * must be 64 bits wide. So the system counter could be less than 64 + * bits wide and it is attributed with the flag 'cap_user_time_short' + * is true. + */ + asm volatile("mrs %0, cntvct_el0" : "=r" (val)); + + return val; +} diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 1a950171a66f..58b2d610aadb 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -40,8 +40,11 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) return bufp; } -int arch_get_runtimeparam(void) +int arch_get_runtimeparam(struct pmu_event *pe) { int count; - return sysfs__read_int("/devices/hv_24x7/interface/sockets", &count) < 0 ? 1 : count; + char path[PATH_MAX] = "/devices/hv_24x7/interface/"; + + atoi(pe->aggr_mode) == PerChip ? strcat(path, "sockets") : strcat(path, "coresperchip"); + return sysfs__read_int(path, &count) < 0 ? 1 : count; } diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 7eb5621c021d..24ea12ec7e02 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -110,6 +110,7 @@ static struct ins x86__instructions[] = { { .name = "por", .ops = &mov_ops, }, { .name = "rclb", .ops = &mov_ops, }, { .name = "rcll", .ops = &mov_ops, }, + { .name = "ret", .ops = &ret_ops, }, { .name = "retq", .ops = &ret_ops, }, { .name = "sbb", .ops = &mov_ops, }, { .name = "sbbl", .ops = &mov_ops, }, diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 47f9c56e744f..347c39b960eb 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -3,7 +3,7 @@ perf-y += tsc.o perf-y += pmu.o perf-y += kvm-stat.o perf-y += perf_regs.o -perf-y += group.o +perf-y += topdown.o perf-y += machine.o perf-y += event.o diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c deleted file mode 100644 index e2f8034b8973..000000000000 --- a/tools/perf/arch/x86/util/group.c +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include "api/fs/fs.h" -#include "util/group.h" - -/* - * Check whether we can use a group for top down. - * Without a group may get bad results due to multiplexing. - */ -bool arch_topdown_check_group(bool *warn) -{ - int n; - - if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) - return false; - if (n > 0) { - *warn = true; - return false; - } - return true; -} - -void arch_topdown_group_warn(void) -{ - fprintf(stderr, - "nmi_watchdog enabled with topdown. May give wrong results.\n" - "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); -} diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c new file mode 100644 index 000000000000..2f3d96aa92a5 --- /dev/null +++ b/tools/perf/arch/x86/util/topdown.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "api/fs/fs.h" +#include "util/pmu.h" +#include "util/topdown.h" + +/* + * Check whether we can use a group for top down. + * Without a group may get bad results due to multiplexing. + */ +bool arch_topdown_check_group(bool *warn) +{ + int n; + + if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0) + return false; + if (n > 0) { + *warn = true; + return false; + } + return true; +} + +void arch_topdown_group_warn(void) +{ + fprintf(stderr, + "nmi_watchdog enabled with topdown. May give wrong results.\n" + "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n"); +} + +#define TOPDOWN_SLOTS 0x0400 + +static bool is_topdown_slots_event(struct evsel *counter) +{ + if (!counter->pmu_name) + return false; + + if (strcmp(counter->pmu_name, "cpu")) + return false; + + if (counter->core.attr.config == TOPDOWN_SLOTS) + return true; + + return false; +} + +/* + * Check whether a topdown group supports sample-read. + * + * Only Topdown metic supports sample-read. The slots + * event must be the leader of the topdown group. + */ + +bool arch_topdown_sample_read(struct evsel *leader) +{ + if (!pmu_have_event("cpu", "slots")) + return false; + + if (is_topdown_slots_event(leader)) + return true; + + return false; +} diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 2f55afb14e1f..559365f8fe52 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -1,46 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include - -#include -#include - #include -#include -#include "../../../util/debug.h" -#include "../../../util/event.h" -#include "../../../util/synthetic-events.h" + #include "../../../util/tsc.h" -int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, - struct perf_tsc_conversion *tc) -{ - bool cap_user_time_zero; - u32 seq; - int i = 0; - - while (1) { - seq = pc->lock; - rmb(); - tc->time_mult = pc->time_mult; - tc->time_shift = pc->time_shift; - tc->time_zero = pc->time_zero; - cap_user_time_zero = pc->cap_user_time_zero; - rmb(); - if (pc->lock == seq && !(seq & 1)) - break; - if (++i > 10000) { - pr_debug("failed to get perf_event_mmap_page lock\n"); - return -EINVAL; - } - } - - if (!cap_user_time_zero) - return -EOPNOTSUPP; - - return 0; -} - u64 rdtsc(void) { unsigned int low, high; @@ -49,36 +11,3 @@ u64 rdtsc(void) return low | ((u64)high) << 32; } - -int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, - struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) -{ - union perf_event event = { - .time_conv = { - .header = { - .type = PERF_RECORD_TIME_CONV, - .size = sizeof(struct perf_record_time_conv), - }, - }, - }; - struct perf_tsc_conversion tc; - int err; - - if (!pc) - return 0; - err = perf_read_tsc_conversion(pc, &tc); - if (err == -EOPNOTSUPP) - return 0; - if (err) - return err; - - pr_debug2("Synthesizing TSC conversion information\n"); - - event.time_conv.time_mult = tc.time_mult; - event.time_conv.time_shift = tc.time_shift; - event.time_conv.time_zero = tc.time_zero; - - return process(tool, &event, NULL, machine); -} diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 878db6a59a41..e43f46931b41 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -12,6 +12,7 @@ perf-y += epoll-ctl.o perf-y += synthesize.o perf-y += kallsyms-parse.o perf-y += find-bit-bench.o +perf-y += inject-buildid.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 2804812d4154..eac36afab2b3 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -47,6 +47,7 @@ int bench_epoll_wait(int argc, const char **argv); int bench_epoll_ctl(int argc, const char **argv); int bench_synthesize(int argc, const char **argv); int bench_kallsyms_parse(int argc, const char **argv); +int bench_inject_build_id(int argc, const char **argv); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c new file mode 100644 index 000000000000..280227e3ffd7 --- /dev/null +++ b/tools/perf/bench/inject-buildid.c @@ -0,0 +1,476 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bench.h" +#include "util/data.h" +#include "util/stat.h" +#include "util/debug.h" +#include "util/event.h" +#include "util/symbol.h" +#include "util/session.h" +#include "util/build-id.h" +#include "util/synthetic-events.h" + +#define MMAP_DEV_MAJOR 8 +#define DSO_MMAP_RATIO 4 + +static unsigned int iterations = 100; +static unsigned int nr_mmaps = 100; +static unsigned int nr_samples = 100; /* samples per mmap */ + +static u64 bench_sample_type; +static u16 bench_id_hdr_size; + +struct bench_data { + int pid; + int input_pipe[2]; + int output_pipe[2]; + pthread_t th; +}; + +struct bench_dso { + struct list_head list; + char *name; + int ino; +}; + +static int nr_dsos; +static struct bench_dso *dsos; + +extern int cmd_inject(int argc, const char *argv[]); + +static const struct option options[] = { + OPT_UINTEGER('i', "iterations", &iterations, + "Number of iterations used to compute average (default: 100)"), + OPT_UINTEGER('m', "nr-mmaps", &nr_mmaps, + "Number of mmap events for each iteration (default: 100)"), + OPT_UINTEGER('n', "nr-samples", &nr_samples, + "Number of sample events per mmap event (default: 100)"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show iteration count, DSO name, etc)"), + OPT_END() +}; + +static const char *const bench_usage[] = { + "perf bench internals inject-build-id ", + NULL +}; + +/* + * Helper for collect_dso that adds the given file as a dso to dso_list + * if it contains a build-id. Stops after collecting 4 times more than + * we need (for MMAP2 events). + */ +static int add_dso(const char *fpath, const struct stat *sb __maybe_unused, + int typeflag, struct FTW *ftwbuf __maybe_unused) +{ + struct bench_dso *dso = &dsos[nr_dsos]; + struct build_id bid; + + if (typeflag == FTW_D || typeflag == FTW_SL) + return 0; + + if (filename__read_build_id(fpath, &bid) < 0) + return 0; + + dso->name = realpath(fpath, NULL); + if (dso->name == NULL) + return -1; + + dso->ino = nr_dsos++; + pr_debug2(" Adding DSO: %s\n", fpath); + + /* stop if we collected enough DSOs */ + if ((unsigned int)nr_dsos == DSO_MMAP_RATIO * nr_mmaps) + return 1; + + return 0; +} + +static void collect_dso(void) +{ + dsos = calloc(nr_mmaps * DSO_MMAP_RATIO, sizeof(*dsos)); + if (dsos == NULL) { + printf(" Memory allocation failed\n"); + exit(1); + } + + if (nftw("/usr/lib/", add_dso, 10, FTW_PHYS) < 0) + return; + + pr_debug(" Collected %d DSOs\n", nr_dsos); +} + +static void release_dso(void) +{ + int i; + + for (i = 0; i < nr_dsos; i++) { + struct bench_dso *dso = &dsos[i]; + + free(dso->name); + } + free(dsos); +} + +/* Fake address used by mmap and sample events */ +static u64 dso_map_addr(struct bench_dso *dso) +{ + return 0x400000ULL + dso->ino * 8192ULL; +} + +static u32 synthesize_attr(struct bench_data *data) +{ + union perf_event event; + + memset(&event, 0, sizeof(event.attr) + sizeof(u64)); + + event.header.type = PERF_RECORD_HEADER_ATTR; + event.header.size = sizeof(event.attr) + sizeof(u64); + + event.attr.attr.type = PERF_TYPE_SOFTWARE; + event.attr.attr.config = PERF_COUNT_SW_TASK_CLOCK; + event.attr.attr.exclude_kernel = 1; + event.attr.attr.sample_id_all = 1; + event.attr.attr.sample_type = bench_sample_type; + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_fork(struct bench_data *data) +{ + union perf_event event; + + memset(&event, 0, sizeof(event.fork) + bench_id_hdr_size); + + event.header.type = PERF_RECORD_FORK; + event.header.misc = PERF_RECORD_MISC_FORK_EXEC; + event.header.size = sizeof(event.fork) + bench_id_hdr_size; + + event.fork.ppid = 1; + event.fork.ptid = 1; + event.fork.pid = data->pid; + event.fork.tid = data->pid; + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_mmap(struct bench_data *data, struct bench_dso *dso, + u64 timestamp) +{ + union perf_event event; + size_t len = offsetof(struct perf_record_mmap2, filename); + u64 *id_hdr_ptr = (void *)&event; + int ts_idx; + + len += roundup(strlen(dso->name) + 1, 8) + bench_id_hdr_size; + + memset(&event, 0, min(len, sizeof(event.mmap2))); + + event.header.type = PERF_RECORD_MMAP2; + event.header.misc = PERF_RECORD_MISC_USER; + event.header.size = len; + + event.mmap2.pid = data->pid; + event.mmap2.tid = data->pid; + event.mmap2.maj = MMAP_DEV_MAJOR; + event.mmap2.ino = dso->ino; + + strcpy(event.mmap2.filename, dso->name); + + event.mmap2.start = dso_map_addr(dso); + event.mmap2.len = 4096; + event.mmap2.prot = PROT_EXEC; + + if (len > sizeof(event.mmap2)) { + /* write mmap2 event first */ + writen(data->input_pipe[1], &event, len - bench_id_hdr_size); + /* zero-fill sample id header */ + memset(id_hdr_ptr, 0, bench_id_hdr_size); + /* put timestamp in the right position */ + ts_idx = (bench_id_hdr_size / sizeof(u64)) - 2; + id_hdr_ptr[ts_idx] = timestamp; + writen(data->input_pipe[1], id_hdr_ptr, bench_id_hdr_size); + } else { + ts_idx = (len / sizeof(u64)) - 2; + id_hdr_ptr[ts_idx] = timestamp; + writen(data->input_pipe[1], &event, len); + } + return len; +} + +static u32 synthesize_sample(struct bench_data *data, struct bench_dso *dso, + u64 timestamp) +{ + union perf_event event; + struct perf_sample sample = { + .tid = data->pid, + .pid = data->pid, + .ip = dso_map_addr(dso), + .time = timestamp, + }; + + event.header.type = PERF_RECORD_SAMPLE; + event.header.misc = PERF_RECORD_MISC_USER; + event.header.size = perf_event__sample_event_size(&sample, bench_sample_type, 0); + + perf_event__synthesize_sample(&event, bench_sample_type, 0, &sample); + + return writen(data->input_pipe[1], &event, event.header.size); +} + +static u32 synthesize_flush(struct bench_data *data) +{ + struct perf_event_header header = { + .size = sizeof(header), + .type = PERF_RECORD_FINISHED_ROUND, + }; + + return writen(data->input_pipe[1], &header, header.size); +} + +static void *data_reader(void *arg) +{ + struct bench_data *data = arg; + char buf[8192]; + int flag; + int n; + + flag = fcntl(data->output_pipe[0], F_GETFL); + fcntl(data->output_pipe[0], F_SETFL, flag | O_NONBLOCK); + + /* read out data from child */ + while (true) { + n = read(data->output_pipe[0], buf, sizeof(buf)); + if (n > 0) + continue; + if (n == 0) + break; + + if (errno != EINTR && errno != EAGAIN) + break; + + usleep(100); + } + + close(data->output_pipe[0]); + return NULL; +} + +static int setup_injection(struct bench_data *data, bool build_id_all) +{ + int ready_pipe[2]; + int dev_null_fd; + char buf; + + if (pipe(ready_pipe) < 0) + return -1; + + if (pipe(data->input_pipe) < 0) + return -1; + + if (pipe(data->output_pipe) < 0) + return -1; + + data->pid = fork(); + if (data->pid < 0) + return -1; + + if (data->pid == 0) { + const char **inject_argv; + int inject_argc = 2; + + close(data->input_pipe[1]); + close(data->output_pipe[0]); + close(ready_pipe[0]); + + dup2(data->input_pipe[0], STDIN_FILENO); + close(data->input_pipe[0]); + dup2(data->output_pipe[1], STDOUT_FILENO); + close(data->output_pipe[1]); + + dev_null_fd = open("/dev/null", O_WRONLY); + if (dev_null_fd < 0) + exit(1); + + dup2(dev_null_fd, STDERR_FILENO); + + if (build_id_all) + inject_argc++; + + inject_argv = calloc(inject_argc + 1, sizeof(*inject_argv)); + if (inject_argv == NULL) + exit(1); + + inject_argv[0] = strdup("inject"); + inject_argv[1] = strdup("-b"); + if (build_id_all) + inject_argv[2] = strdup("--buildid-all"); + + /* signal that we're ready to go */ + close(ready_pipe[1]); + + cmd_inject(inject_argc, inject_argv); + + exit(0); + } + + pthread_create(&data->th, NULL, data_reader, data); + + close(ready_pipe[1]); + close(data->input_pipe[0]); + close(data->output_pipe[1]); + + /* wait for child ready */ + if (read(ready_pipe[0], &buf, 1) < 0) + return -1; + close(ready_pipe[0]); + + return 0; +} + +static int inject_build_id(struct bench_data *data, u64 *max_rss) +{ + int status; + unsigned int i, k; + struct rusage rusage; + u64 len = 0; + + /* this makes the child to run */ + if (perf_header__write_pipe(data->input_pipe[1]) < 0) + return -1; + + len += synthesize_attr(data); + len += synthesize_fork(data); + + for (i = 0; i < nr_mmaps; i++) { + int idx = rand() % (nr_dsos - 1); + struct bench_dso *dso = &dsos[idx]; + u64 timestamp = rand() % 1000000; + + pr_debug2(" [%d] injecting: %s\n", i+1, dso->name); + len += synthesize_mmap(data, dso, timestamp); + + for (k = 0; k < nr_samples; k++) + len += synthesize_sample(data, dso, timestamp + k * 1000); + + if ((i + 1) % 10 == 0) + len += synthesize_flush(data); + } + + /* tihs makes the child to finish */ + close(data->input_pipe[1]); + + wait4(data->pid, &status, 0, &rusage); + *max_rss = rusage.ru_maxrss; + + pr_debug(" Child %d exited with %d\n", data->pid, status); + + return 0; +} + +static void do_inject_loop(struct bench_data *data, bool build_id_all) +{ + unsigned int i; + struct stats time_stats, mem_stats; + double time_average, time_stddev; + double mem_average, mem_stddev; + + init_stats(&time_stats); + init_stats(&mem_stats); + + pr_debug(" Build-id%s injection benchmark\n", build_id_all ? "-all" : ""); + + for (i = 0; i < iterations; i++) { + struct timeval start, end, diff; + u64 runtime_us, max_rss; + + pr_debug(" Iteration #%d\n", i+1); + + if (setup_injection(data, build_id_all) < 0) { + printf(" Build-id injection setup failed\n"); + break; + } + + gettimeofday(&start, NULL); + if (inject_build_id(data, &max_rss) < 0) { + printf(" Build-id injection failed\n"); + break; + } + + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; + update_stats(&time_stats, runtime_us); + update_stats(&mem_stats, max_rss); + + pthread_join(data->th, NULL); + } + + time_average = avg_stats(&time_stats) / USEC_PER_MSEC; + time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC; + printf(" Average build-id%s injection took: %.3f msec (+- %.3f msec)\n", + build_id_all ? "-all" : "", time_average, time_stddev); + + /* each iteration, it processes MMAP2 + BUILD_ID + nr_samples * SAMPLE */ + time_average = avg_stats(&time_stats) / (nr_mmaps * (nr_samples + 2)); + time_stddev = stddev_stats(&time_stats) / (nr_mmaps * (nr_samples + 2)); + printf(" Average time per event: %.3f usec (+- %.3f usec)\n", + time_average, time_stddev); + + mem_average = avg_stats(&mem_stats); + mem_stddev = stddev_stats(&mem_stats); + printf(" Average memory usage: %.0f KB (+- %.0f KB)\n", + mem_average, mem_stddev); +} + +static int do_inject_loops(struct bench_data *data) +{ + + srand(time(NULL)); + symbol__init(NULL); + + bench_sample_type = PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP; + bench_sample_type |= PERF_SAMPLE_TID | PERF_SAMPLE_TIME; + bench_id_hdr_size = 32; + + collect_dso(); + if (nr_dsos == 0) { + printf(" Cannot collect DSOs for injection\n"); + return -1; + } + + do_inject_loop(data, false); + do_inject_loop(data, true); + + release_dso(); + return 0; +} + +int bench_inject_build_id(int argc, const char **argv) +{ + struct bench_data data; + + argc = parse_options(argc, argv, options, bench_usage, 0); + if (argc) { + usage_with_options(bench_usage, options); + exit(EXIT_FAILURE); + } + + return do_inject_loops(&data); +} + diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index f85bceccc459..11726ec6285f 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -137,12 +137,13 @@ struct global_info { u8 *data; pthread_mutex_t startup_mutex; + pthread_cond_t startup_cond; int nr_tasks_started; - pthread_mutex_t startup_done_mutex; - pthread_mutex_t start_work_mutex; + pthread_cond_t start_work_cond; int nr_tasks_working; + bool start_work; pthread_mutex_t stop_work_mutex; u64 bytes_done; @@ -483,6 +484,18 @@ static void init_global_mutex(pthread_mutex_t *mutex) pthread_mutex_init(mutex, &attr); } +/* + * Return a process-shared (global) condition variable: + */ +static void init_global_cond(pthread_cond_t *cond) +{ + pthread_condattr_t attr; + + pthread_condattr_init(&attr); + pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED); + pthread_cond_init(cond, &attr); +} + static int parse_cpu_list(const char *arg) { p0.cpu_list_str = strdup(arg); @@ -1136,15 +1149,18 @@ static void *worker_thread(void *__tdata) if (g->p.serialize_startup) { pthread_mutex_lock(&g->startup_mutex); g->nr_tasks_started++; + /* The last thread wakes the main process. */ + if (g->nr_tasks_started == g->p.nr_tasks) + pthread_cond_signal(&g->startup_cond); + pthread_mutex_unlock(&g->startup_mutex); /* Here we will wait for the main process to start us all at once: */ pthread_mutex_lock(&g->start_work_mutex); + g->start_work = false; g->nr_tasks_working++; - - /* Last one wake the main process: */ - if (g->nr_tasks_working == g->p.nr_tasks) - pthread_mutex_unlock(&g->startup_done_mutex); + while (!g->start_work) + pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex); pthread_mutex_unlock(&g->start_work_mutex); } @@ -1441,8 +1457,9 @@ static int init(void) /* Startup serialization: */ init_global_mutex(&g->start_work_mutex); + init_global_cond(&g->start_work_cond); init_global_mutex(&g->startup_mutex); - init_global_mutex(&g->startup_done_mutex); + init_global_cond(&g->startup_cond); init_global_mutex(&g->stop_work_mutex); init_thread_data(); @@ -1502,9 +1519,6 @@ static int __bench_numa(const char *name) pids = zalloc(g->p.nr_proc * sizeof(*pids)); pid = -1; - /* All threads try to acquire it, this way we can wait for them to start up: */ - pthread_mutex_lock(&g->start_work_mutex); - if (g->p.serialize_startup) { tprintf(" #\n"); tprintf(" # Startup synchronization: ..."); fflush(stdout); @@ -1526,22 +1540,29 @@ static int __bench_numa(const char *name) pids[i] = pid; } - /* Wait for all the threads to start up: */ - while (g->nr_tasks_started != g->p.nr_tasks) - usleep(USEC_PER_MSEC); - - BUG_ON(g->nr_tasks_started != g->p.nr_tasks); if (g->p.serialize_startup) { + bool threads_ready = false; double startup_sec; - pthread_mutex_lock(&g->startup_done_mutex); + /* + * Wait for all the threads to start up. The last thread will + * signal this process. + */ + pthread_mutex_lock(&g->startup_mutex); + while (g->nr_tasks_started != g->p.nr_tasks) + pthread_cond_wait(&g->startup_cond, &g->startup_mutex); - /* This will start all threads: */ - pthread_mutex_unlock(&g->start_work_mutex); + pthread_mutex_unlock(&g->startup_mutex); - /* This mutex is locked - the last started thread will wake us: */ - pthread_mutex_lock(&g->startup_done_mutex); + /* Wait for all threads to be at the start_work_cond. */ + while (!threads_ready) { + pthread_mutex_lock(&g->start_work_mutex); + threads_ready = (g->nr_tasks_working == g->p.nr_tasks); + pthread_mutex_unlock(&g->start_work_mutex); + if (!threads_ready) + usleep(1); + } gettimeofday(&stop, NULL); @@ -1555,7 +1576,11 @@ static int __bench_numa(const char *name) tprintf(" #\n"); start = stop; - pthread_mutex_unlock(&g->startup_done_mutex); + /* Start all threads running. */ + pthread_mutex_lock(&g->start_work_mutex); + g->start_work = true; + pthread_mutex_unlock(&g->start_work_mutex); + pthread_cond_broadcast(&g->start_work_cond); } else { gettimeofday(&start, NULL); } diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 4f176039fc8f..62a7b7420a44 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -87,6 +87,7 @@ static struct bench epoll_benchmarks[] = { static struct bench internals_benchmarks[] = { { "synthesize", "Benchmark perf event synthesis", bench_synthesize }, { "kallsyms-parse", "Benchmark kallsyms parsing", bench_kallsyms_parse }, + { "inject-build-id", "Benchmark build-id injection", bench_inject_build_id }, { NULL, NULL, NULL } }; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 39efa51d7fb3..a25411926e48 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -174,19 +174,19 @@ static int build_id_cache__add_kcore(const char *filename, bool force) static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) { char sbuild_id[SBUILD_ID_SIZE]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int err; struct nscookie nsc; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); return -1; } - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(&bid, sbuild_id); err = build_id_cache__add_s(sbuild_id, filename, nsi, false, false); pr_debug("Adding %s %s: %s\n", sbuild_id, filename, @@ -196,21 +196,21 @@ static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi) static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi) { - u8 build_id[BUILD_ID_SIZE]; char sbuild_id[SBUILD_ID_SIZE]; + struct build_id bid; struct nscookie nsc; int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); return -1; } - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(&bid, sbuild_id); err = build_id_cache__remove_s(sbuild_id); pr_debug("Removing %s %s: %s\n", sbuild_id, filename, err ? "FAIL" : "Ok"); @@ -274,17 +274,16 @@ static int build_id_cache__purge_all(void) static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) { char filename[PATH_MAX]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; if (dso__build_id_filename(dso, filename, sizeof(filename), false) && - filename__read_build_id(filename, build_id, - sizeof(build_id)) != sizeof(build_id)) { + filename__read_build_id(filename, &bid) == -1) { if (errno == ENOENT) return false; pr_warning("Problems with %s file, consider removing it from the cache\n", filename); - } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) { + } else if (memcmp(dso->bid.data, bid.data, bid.size)) { pr_warning("Problems with %s file, consider removing it from the cache\n", filename); } @@ -300,14 +299,14 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) { - u8 build_id[BUILD_ID_SIZE]; char sbuild_id[SBUILD_ID_SIZE]; + struct build_id bid; struct nscookie nsc; int err; nsinfo__mountns_enter(nsi, &nsc); - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); nsinfo__mountns_exit(&nsc); if (err < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -315,7 +314,7 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) } err = 0; - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(&bid, sbuild_id); if (build_id_cache__cached(sbuild_id)) err = build_id_cache__remove_s(sbuild_id); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 5938b100eaf4..d5bea5d3cd51 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -652,45 +652,6 @@ STAT_FN(ld_l2hit) STAT_FN(ld_llchit) STAT_FN(rmt_hit) -static uint64_t llc_miss(struct c2c_stats *stats) -{ - uint64_t llcmiss; - - llcmiss = stats->lcl_dram + - stats->rmt_dram + - stats->rmt_hitm + - stats->rmt_hit; - - return llcmiss; -} - -static int -ld_llcmiss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct hist_entry *he) -{ - struct c2c_hist_entry *c2c_he; - int width = c2c_width(fmt, hpp, he->hists); - - c2c_he = container_of(he, struct c2c_hist_entry, he); - - return scnprintf(hpp->buf, hpp->size, "%*lu", width, - llc_miss(&c2c_he->stats)); -} - -static int64_t -ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, - struct hist_entry *left, struct hist_entry *right) -{ - struct c2c_hist_entry *c2c_left; - struct c2c_hist_entry *c2c_right; - - c2c_left = container_of(left, struct c2c_hist_entry, he); - c2c_right = container_of(right, struct c2c_hist_entry, he); - - return (uint64_t) llc_miss(&c2c_left->stats) - - (uint64_t) llc_miss(&c2c_right->stats); -} - static uint64_t total_records(struct c2c_stats *stats) { uint64_t lclmiss, ldcnt, total; @@ -1328,7 +1289,7 @@ static struct c2c_dimension dim_iaddr = { }; static struct c2c_dimension dim_tot_hitm = { - .header = HEADER_SPAN("----- LLC Load Hitm -----", "Total", 2), + .header = HEADER_SPAN("------- Load Hitm -------", "Total", 2), .name = "tot_hitm", .cmp = tot_hitm_cmp, .entry = tot_hitm_entry, @@ -1336,7 +1297,7 @@ static struct c2c_dimension dim_tot_hitm = { }; static struct c2c_dimension dim_lcl_hitm = { - .header = HEADER_SPAN_LOW("Lcl"), + .header = HEADER_SPAN_LOW("LclHitm"), .name = "lcl_hitm", .cmp = lcl_hitm_cmp, .entry = lcl_hitm_entry, @@ -1344,7 +1305,7 @@ static struct c2c_dimension dim_lcl_hitm = { }; static struct c2c_dimension dim_rmt_hitm = { - .header = HEADER_SPAN_LOW("Rmt"), + .header = HEADER_SPAN_LOW("RmtHitm"), .name = "rmt_hitm", .cmp = rmt_hitm_cmp, .entry = rmt_hitm_entry, @@ -1367,16 +1328,16 @@ static struct c2c_dimension dim_cl_lcl_hitm = { .width = 7, }; -static struct c2c_dimension dim_stores = { - .header = HEADER_SPAN("---- Store Reference ----", "Total", 2), - .name = "stores", +static struct c2c_dimension dim_tot_stores = { + .header = HEADER_BOTH("Total", "Stores"), + .name = "tot_stores", .cmp = store_cmp, .entry = store_entry, .width = 7, }; static struct c2c_dimension dim_stores_l1hit = { - .header = HEADER_SPAN_LOW("L1Hit"), + .header = HEADER_SPAN("---- Stores ----", "L1Hit", 1), .name = "stores_l1hit", .cmp = st_l1hit_cmp, .entry = st_l1hit_entry, @@ -1432,7 +1393,7 @@ static struct c2c_dimension dim_ld_l2hit = { }; static struct c2c_dimension dim_ld_llchit = { - .header = HEADER_SPAN("-- LLC Load Hit --", "Llc", 1), + .header = HEADER_SPAN("- LLC Load Hit --", "LclHit", 1), .name = "ld_lclhit", .cmp = ld_llchit_cmp, .entry = ld_llchit_entry, @@ -1440,21 +1401,13 @@ static struct c2c_dimension dim_ld_llchit = { }; static struct c2c_dimension dim_ld_rmthit = { - .header = HEADER_SPAN_LOW("Rmt"), + .header = HEADER_SPAN("- RMT Load Hit --", "RmtHit", 1), .name = "ld_rmthit", .cmp = rmt_hit_cmp, .entry = rmt_hit_entry, .width = 8, }; -static struct c2c_dimension dim_ld_llcmiss = { - .header = HEADER_BOTH("LLC", "Ld Miss"), - .name = "ld_llcmiss", - .cmp = ld_llcmiss_cmp, - .entry = ld_llcmiss_entry, - .width = 7, -}; - static struct c2c_dimension dim_tot_recs = { .header = HEADER_BOTH("Total", "records"), .name = "tot_recs", @@ -1486,7 +1439,7 @@ static struct c2c_dimension dim_percent_hitm = { }; static struct c2c_dimension dim_percent_rmt_hitm = { - .header = HEADER_SPAN("----- HITM -----", "Rmt", 1), + .header = HEADER_SPAN("----- HITM -----", "RmtHitm", 1), .name = "percent_rmt_hitm", .cmp = percent_rmt_hitm_cmp, .entry = percent_rmt_hitm_entry, @@ -1495,7 +1448,7 @@ static struct c2c_dimension dim_percent_rmt_hitm = { }; static struct c2c_dimension dim_percent_lcl_hitm = { - .header = HEADER_SPAN_LOW("Lcl"), + .header = HEADER_SPAN_LOW("LclHitm"), .name = "percent_lcl_hitm", .cmp = percent_lcl_hitm_cmp, .entry = percent_lcl_hitm_entry, @@ -1648,7 +1601,7 @@ static struct c2c_dimension *dimensions[] = { &dim_rmt_hitm, &dim_cl_lcl_hitm, &dim_cl_rmt_hitm, - &dim_stores, + &dim_tot_stores, &dim_stores_l1hit, &dim_stores_l1miss, &dim_cl_stores_l1hit, @@ -1658,7 +1611,6 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_l2hit, &dim_ld_llchit, &dim_ld_rmthit, - &dim_ld_llcmiss, &dim_tot_recs, &dim_tot_loads, &dim_percent_hitm, @@ -2846,15 +2798,16 @@ static int perf_c2c__report(int argc, const char **argv) "dcacheline," "dcacheline_node," "dcacheline_count," - "tot_recs," "percent_hitm," "tot_hitm,lcl_hitm,rmt_hitm," - "stores,stores_l1hit,stores_l1miss," - "dram_lcl,dram_rmt," - "ld_llcmiss," + "tot_recs," "tot_loads," + "tot_stores," + "stores_l1hit,stores_l1miss," "ld_fbhit,ld_l1hit,ld_l2hit," - "ld_lclhit,ld_rmthit", + "ld_lclhit,lcl_hitm," + "ld_rmthit,rmt_hitm," + "dram_lcl,dram_rmt", c2c.display == DISPLAY_TOT ? "tot_hitm" : c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm" ); @@ -2916,7 +2869,7 @@ static int perf_c2c__record(int argc, const char **argv) bool event_set = false; struct option options[] = { OPT_CALLBACK('e', "event", &event_set, "event", - "event selector. Use 'perf mem record -e list' to list available events", + "event selector. Use 'perf c2c record -e list' to list available events", parse_record_events), OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"), OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"), diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index f8c9bdd8269a..584e2e1a3793 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -25,6 +25,7 @@ #include "util/map.h" #include "util/spark.h" #include "util/block-info.h" +#include "util/stream.h" #include #include #include @@ -42,6 +43,7 @@ struct perf_diff { int range_size; int range_num; bool has_br_stack; + bool stream; }; /* Diff command specific HPP columns. */ @@ -72,6 +74,7 @@ struct data__file { struct perf_data data; int idx; struct hists *hists; + struct evlist_streams *evlist_streams; struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX]; }; @@ -106,6 +109,7 @@ enum { COMPUTE_DELTA_ABS, COMPUTE_CYCLES, COMPUTE_MAX, + COMPUTE_STREAM, /* After COMPUTE_MAX to avoid use current compute arrays */ }; const char *compute_names[COMPUTE_MAX] = { @@ -393,6 +397,11 @@ static int diff__process_sample_event(struct perf_tool *tool, struct perf_diff *pdiff = container_of(tool, struct perf_diff, tool); struct addr_location al; struct hists *hists = evsel__hists(evsel); + struct hist_entry_iter iter = { + .evsel = evsel, + .sample = sample, + .ops = &hist_iter_normal, + }; int ret = -1; if (perf_time__ranges_skip_sample(pdiff->ptime_range, pdiff->range_num, @@ -411,14 +420,8 @@ static int diff__process_sample_event(struct perf_tool *tool, goto out_put; } - if (compute != COMPUTE_CYCLES) { - if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, - true)) { - pr_warning("problem incrementing symbol period, " - "skipping event\n"); - goto out_put; - } - } else { + switch (compute) { + case COMPUTE_CYCLES: if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL, NULL, NULL, sample, true)) { pr_warning("problem incrementing symbol period, " @@ -428,6 +431,23 @@ static int diff__process_sample_event(struct perf_tool *tool, hist__account_cycles(sample->branch_stack, &al, sample, false, NULL); + break; + + case COMPUTE_STREAM: + if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + NULL)) { + pr_debug("problem adding hist entry, skipping event\n"); + goto out_put; + } + break; + + default: + if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, + true)) { + pr_warning("problem incrementing symbol period, " + "skipping event\n"); + goto out_put; + } } /* @@ -996,10 +1016,55 @@ static void data_process(void) } } +static int process_base_stream(struct data__file *data_base, + struct data__file *data_pair, + const char *title __maybe_unused) +{ + struct evlist *evlist_base = data_base->session->evlist; + struct evlist *evlist_pair = data_pair->session->evlist; + struct evsel *evsel_base, *evsel_pair; + struct evsel_streams *es_base, *es_pair; + + evlist__for_each_entry(evlist_base, evsel_base) { + evsel_pair = evsel_match(evsel_base, evlist_pair); + if (!evsel_pair) + continue; + + es_base = evsel_streams__entry(data_base->evlist_streams, + evsel_base->idx); + if (!es_base) + return -1; + + es_pair = evsel_streams__entry(data_pair->evlist_streams, + evsel_pair->idx); + if (!es_pair) + return -1; + + evsel_streams__match(es_base, es_pair); + evsel_streams__report(es_base, es_pair); + } + + return 0; +} + +static void stream_process(void) +{ + /* + * Stream comparison only supports two data files. + * perf.data.old and perf.data. data__files[0] is perf.data.old, + * data__files[1] is perf.data. + */ + process_base_stream(&data__files[0], &data__files[1], + "# Output based on old perf data:\n#\n"); +} + static void data__free(struct data__file *d) { int col; + if (d->evlist_streams) + evlist_streams__delete(d->evlist_streams); + for (col = 0; col < PERF_HPP_DIFF__MAX_INDEX; col++) { struct diff_hpp_fmt *fmt = &d->fmt[col]; @@ -1153,9 +1218,19 @@ static int __cmd_diff(void) if (pdiff.ptime_range) zfree(&pdiff.ptime_range); + + if (compute == COMPUTE_STREAM) { + d->evlist_streams = evlist__create_streams( + d->session->evlist, 5); + if (!d->evlist_streams) + goto out_delete; + } } - data_process(); + if (compute == COMPUTE_STREAM) + stream_process(); + else + data_process(); out_delete: data__for_each_file(i, d) { @@ -1228,6 +1303,8 @@ static const struct option options[] = { "only consider symbols in these pids"), OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", "only consider symbols in these tids"), + OPT_BOOLEAN(0, "stream", &pdiff.stream, + "Enable hot streams comparison."), OPT_END() }; @@ -1887,6 +1964,9 @@ int cmd_diff(int argc, const char **argv) if (cycles_hist && (compute != COMPUTE_CYCLES)) usage_with_options(diff_usage, options); + if (pdiff.stream) + compute = COMPUTE_STREAM; + symbol__annotation_init(); if (symbol__init(NULL) < 0) @@ -1898,13 +1978,26 @@ int cmd_diff(int argc, const char **argv) if (check_file_brstack() < 0) return -1; - if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack) + if ((compute == COMPUTE_CYCLES || compute == COMPUTE_STREAM) + && !pdiff.has_br_stack) { return -1; + } - if (ui_init() < 0) - return -1; + if (compute == COMPUTE_STREAM) { + symbol_conf.show_branchflag_count = true; + symbol_conf.disable_add2line_warn = true; + callchain_param.mode = CHAIN_FLAT; + callchain_param.key = CCKEY_SRCLINE; + callchain_param.branch_callstack = 1; + symbol_conf.use_callchain = true; + callchain_register_param(&callchain_param); + sort_order = "srcline,symbol,dso"; + } else { + if (ui_init() < 0) + return -1; - sort__mode = SORT_MODE__DIFF; + sort__mode = SORT_MODE__DIFF; + } if (setup_sorting(NULL) < 0) usage_with_options(diff_usage, options); diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 1d44bc2f63d8..9366fad591dc 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -25,6 +25,7 @@ #include "target.h" #include "cpumap.h" #include "thread_map.h" +#include "strfilter.h" #include "util/cap.h" #include "util/config.h" #include "util/units.h" @@ -36,7 +37,6 @@ struct perf_ftrace { struct evlist *evlist; struct target target; const char *tracer; - bool list_avail_functions; struct list_head filters; struct list_head notrace; struct list_head graph_funcs; @@ -181,6 +181,40 @@ out: return ret; } +static int read_tracing_file_by_line(const char *name, + void (*cb)(char *str, void *arg), + void *cb_arg) +{ + char *line = NULL; + size_t len = 0; + char *file; + FILE *fp; + + file = get_tracing_file(name); + if (!file) { + pr_debug("cannot get tracing file: %s\n", name); + return -1; + } + + fp = fopen(file, "r"); + if (fp == NULL) { + pr_debug("cannot open tracing file: %s\n", name); + put_tracing_file(file); + return -1; + } + + while (getline(&line, &len, fp) != -1) { + cb(line, cb_arg); + } + + if (line) + free(line); + + fclose(fp); + put_tracing_file(file); + return 0; +} + static int write_tracing_file_int(const char *name, int value) { char buf[16]; @@ -557,9 +591,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGPIPE, sig_handler); - if (ftrace->list_avail_functions) - return read_tracing_file_to_stdout("available_filter_functions"); - if (reset_tracing_files(ftrace) < 0) { pr_err("failed to reset ftrace\n"); goto out; @@ -683,6 +714,46 @@ static int perf_ftrace_config(const char *var, const char *value, void *cb) return -1; } +static void list_function_cb(char *str, void *arg) +{ + struct strfilter *filter = (struct strfilter *)arg; + + if (strfilter__compare(filter, str)) + printf("%s", str); +} + +static int opt_list_avail_functions(const struct option *opt __maybe_unused, + const char *str, int unset) +{ + struct strfilter *filter; + const char *err = NULL; + int ret; + + if (unset || !str) + return -1; + + filter = strfilter__new(str, &err); + if (!filter) + return err ? -EINVAL : -ENOMEM; + + ret = strfilter__or(filter, str, &err); + if (ret == -EINVAL) { + pr_err("Filter parse error at %td.\n", err - str + 1); + pr_err("Source: \"%s\"\n", str); + pr_err(" %*c\n", (int)(err - str + 1), '^'); + strfilter__delete(filter); + return ret; + } + + ret = read_tracing_file_by_line("available_filter_functions", + list_function_cb, filter); + strfilter__delete(filter); + if (ret < 0) + return ret; + + exit(0); +} + static int parse_filter_func(const struct option *opt, const char *str, int unset __maybe_unused) { @@ -817,8 +888,9 @@ int cmd_ftrace(int argc, const char **argv) const struct option ftrace_options[] = { OPT_STRING('t', "tracer", &ftrace.tracer, "tracer", "Tracer to use: function_graph(default) or function"), - OPT_BOOLEAN('F', "funcs", &ftrace.list_avail_functions, - "Show available functions to filter"), + OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", + "Show available functions to filter", + opt_list_avail_functions, "*"), OPT_STRING('p', "pid", &ftrace.target.pid, "pid", "Trace on existing process id"), /* TODO: Add short option -t after -t/--tracer can be removed. */ diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6d2f410d773a..452a75fe68e5 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -10,6 +10,7 @@ #include "util/color.h" #include "util/dso.h" +#include "util/vdso.h" #include "util/evlist.h" #include "util/evsel.h" #include "util/map.h" @@ -23,9 +24,11 @@ #include "util/symbol.h" #include "util/synthetic-events.h" #include "util/thread.h" -#include +#include "util/namespaces.h" +#include #include +#include /* To get things like MAP_HUGETLB even on older libc headers */ #include #include @@ -35,6 +38,7 @@ struct perf_inject { struct perf_tool tool; struct perf_session *session; bool build_ids; + bool build_id_all; bool sched_stat; bool have_auxtrace; bool strip; @@ -54,6 +58,9 @@ struct event_entry { union perf_event event[]; }; +static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, + struct machine *machine, u8 cpumode, u32 flags); + static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) { ssize_t size; @@ -97,6 +104,13 @@ static int perf_event__repipe_op2_synth(struct perf_session *session, return perf_event__repipe_synth(session->tool, event); } +static int perf_event__repipe_op4_synth(struct perf_session *session, + union perf_event *event, + u64 data __maybe_unused) +{ + return perf_event__repipe_synth(session->tool, event); +} + static int perf_event__repipe_attr(struct perf_tool *tool, union perf_event *event, struct evlist **pevlist) @@ -115,6 +129,13 @@ static int perf_event__repipe_attr(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +static int perf_event__repipe_event_update(struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist __maybe_unused) +{ + return perf_event__repipe_synth(tool, event); +} + #ifdef HAVE_AUXTRACE_SUPPORT static int copy_bytes(struct perf_inject *inject, int fd, off_t size) @@ -303,6 +324,68 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool, } #endif +static struct dso *findnew_dso(int pid, int tid, const char *filename, + struct dso_id *id, struct machine *machine) +{ + struct thread *thread; + struct nsinfo *nsi = NULL; + struct nsinfo *nnsi; + struct dso *dso; + bool vdso; + + thread = machine__findnew_thread(machine, pid, tid); + if (thread == NULL) { + pr_err("cannot find or create a task %d/%d.\n", tid, pid); + return NULL; + } + + vdso = is_vdso_map(filename); + nsi = nsinfo__get(thread->nsinfo); + + if (vdso) { + /* The vdso maps are always on the host and not the + * container. Ensure that we don't use setns to look + * them up. + */ + nnsi = nsinfo__copy(nsi); + if (nnsi) { + nsinfo__put(nsi); + nnsi->need_setns = false; + nsi = nnsi; + } + dso = machine__findnew_vdso(machine, thread); + } else { + dso = machine__findnew_dso_id(machine, filename, id); + } + + if (dso) + dso->nsinfo = nsi; + else + nsinfo__put(nsi); + + thread__put(thread); + return dso; +} + +static int perf_event__repipe_buildid_mmap(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct dso *dso; + + dso = findnew_dso(event->mmap.pid, event->mmap.tid, + event->mmap.filename, NULL, machine); + + if (dso && !dso->hit) { + dso->hit = 1; + dso__inject_build_id(dso, tool, machine, sample->cpumode, 0); + dso__put(dso); + } + + return perf_event__repipe(tool, event, sample, machine); +} + static int perf_event__repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -341,6 +424,34 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, } #endif +static int perf_event__repipe_buildid_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct dso_id dso_id = { + .maj = event->mmap2.maj, + .min = event->mmap2.min, + .ino = event->mmap2.ino, + .ino_generation = event->mmap2.ino_generation, + }; + struct dso *dso; + + dso = findnew_dso(event->mmap2.pid, event->mmap2.tid, + event->mmap2.filename, &dso_id, machine); + + if (dso && !dso->hit) { + dso->hit = 1; + dso__inject_build_id(dso, tool, machine, sample->cpumode, + event->mmap2.flags); + dso__put(dso); + } + + perf_event__repipe(tool, event, sample, machine); + + return 0; +} + static int perf_event__repipe_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -405,34 +516,36 @@ static int perf_event__repipe_tracing_data(struct perf_session *session, static int dso__read_build_id(struct dso *dso) { + struct nscookie nsc; + if (dso->has_build_id) return 0; - if (filename__read_build_id(dso->long_name, dso->build_id, - sizeof(dso->build_id)) > 0) { + nsinfo__mountns_enter(dso->nsinfo, &nsc); + if (filename__read_build_id(dso->long_name, &dso->bid) > 0) dso->has_build_id = true; - return 0; - } + nsinfo__mountns_exit(&nsc); - return -1; + return dso->has_build_id ? 0 : -1; } static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, - struct machine *machine) + struct machine *machine, u8 cpumode, u32 flags) { - u16 misc = PERF_RECORD_MISC_USER; int err; + if (is_anon_memory(dso->long_name) || flags & MAP_HUGETLB) + return 0; + if (is_no_dso_memory(dso->long_name)) + return 0; + if (dso__read_build_id(dso) < 0) { pr_debug("no build_id found for %s\n", dso->long_name); return -1; } - if (dso->kernel) - misc = PERF_RECORD_MISC_KERNEL; - - err = perf_event__synthesize_build_id(tool, dso, misc, perf_event__repipe, - machine); + err = perf_event__synthesize_build_id(tool, dso, cpumode, + perf_event__repipe, machine); if (err) { pr_err("Can't synthesize build_id event for %s\n", dso->long_name); return -1; @@ -441,11 +554,10 @@ static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool, return 0; } -static int perf_event__inject_buildid(struct perf_tool *tool, - union perf_event *event, - struct perf_sample *sample, - struct evsel *evsel __maybe_unused, - struct machine *machine) +int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel __maybe_unused, + struct machine *machine) { struct addr_location al; struct thread *thread; @@ -460,19 +572,8 @@ static int perf_event__inject_buildid(struct perf_tool *tool, if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { if (!al.map->dso->hit) { al.map->dso->hit = 1; - if (map__load(al.map) >= 0) { - dso__inject_build_id(al.map->dso, tool, machine); - /* - * If this fails, too bad, let the other side - * account this as unresolved. - */ - } else { -#ifdef HAVE_LIBELF_SUPPORT - pr_warning("no symbols found in %s, maybe " - "install a debug package?\n", - al.map->dso->long_name); -#endif - } + dso__inject_build_id(al.map->dso, tool, machine, + sample->cpumode, al.map->flags); } } @@ -606,7 +707,7 @@ static int __cmd_inject(struct perf_inject *inject) signal(SIGINT, sig_handler); if (inject->build_ids || inject->sched_stat || - inject->itrace_synth_opts.set) { + inject->itrace_synth_opts.set || inject->build_id_all) { inject->tool.mmap = perf_event__repipe_mmap; inject->tool.mmap2 = perf_event__repipe_mmap2; inject->tool.fork = perf_event__repipe_fork; @@ -615,7 +716,10 @@ static int __cmd_inject(struct perf_inject *inject) output_data_offset = session->header.data_offset; - if (inject->build_ids) { + if (inject->build_id_all) { + inject->tool.mmap = perf_event__repipe_buildid_mmap; + inject->tool.mmap2 = perf_event__repipe_buildid_mmap2; + } else if (inject->build_ids) { inject->tool.sample = perf_event__inject_buildid; } else if (inject->sched_stat) { struct evsel *evsel; @@ -708,9 +812,12 @@ int cmd_inject(int argc, const char **argv) struct perf_inject inject = { .tool = { .sample = perf_event__repipe_sample, + .read = perf_event__repipe_sample, .mmap = perf_event__repipe, .mmap2 = perf_event__repipe, .comm = perf_event__repipe, + .namespaces = perf_event__repipe, + .cgroup = perf_event__repipe, .fork = perf_event__repipe, .exit = perf_event__repipe, .lost = perf_event__repipe, @@ -718,19 +825,28 @@ int cmd_inject(int argc, const char **argv) .aux = perf_event__repipe, .itrace_start = perf_event__repipe, .context_switch = perf_event__repipe, - .read = perf_event__repipe_sample, .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, + .ksymbol = perf_event__repipe, + .bpf = perf_event__repipe, + .text_poke = perf_event__repipe, .attr = perf_event__repipe_attr, + .event_update = perf_event__repipe_event_update, .tracing_data = perf_event__repipe_op2_synth, - .auxtrace_info = perf_event__repipe_op2_synth, - .auxtrace = perf_event__repipe_auxtrace, - .auxtrace_error = perf_event__repipe_op2_synth, - .time_conv = perf_event__repipe_op2_synth, .finished_round = perf_event__repipe_oe_synth, .build_id = perf_event__repipe_op2_synth, .id_index = perf_event__repipe_op2_synth, + .auxtrace_info = perf_event__repipe_op2_synth, + .auxtrace_error = perf_event__repipe_op2_synth, + .time_conv = perf_event__repipe_op2_synth, + .thread_map = perf_event__repipe_op2_synth, + .cpu_map = perf_event__repipe_op2_synth, + .stat_config = perf_event__repipe_op2_synth, + .stat = perf_event__repipe_op2_synth, + .stat_round = perf_event__repipe_op2_synth, .feature = perf_event__repipe_op2_synth, + .compressed = perf_event__repipe_op4_synth, + .auxtrace = perf_event__repipe_auxtrace, }, .input_name = "-", .samples = LIST_HEAD_INIT(inject.samples), @@ -747,6 +863,8 @@ int cmd_inject(int argc, const char **argv) struct option options[] = { OPT_BOOLEAN('b', "build-ids", &inject.build_ids, "Inject build-ids into the output stream"), + OPT_BOOLEAN(0, "buildid-all", &inject.build_id_all, + "Inject build-ids of all DSOs into the output stream"), OPT_STRING('i', "input", &inject.input_name, "file", "input file name"), OPT_STRING('o', "output", &inject.output.path, "file", @@ -795,8 +913,6 @@ int cmd_inject(int argc, const char **argv) return -1; } - inject.tool.ordered_events = inject.sched_stat; - data.path = inject.input_name; inject.session = perf_session__new(&data, true, &inject.tool); if (IS_ERR(inject.session)) @@ -805,7 +921,7 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); - if (inject.build_ids) { + if (inject.build_ids && !inject.build_id_all) { /* * to make sure the mmap records are ordered correctly * and so that the correct especially due to jitted code @@ -815,6 +931,11 @@ int cmd_inject(int argc, const char **argv) inject.tool.ordered_events = true; inject.tool.ordering_requires_timestamps = true; } + + if (inject.sched_stat) { + inject.tool.ordered_events = true; + } + #ifdef HAVE_JITDUMP if (inject.jit_mode) { inject.tool.mmap2 = perf_event__jit_repipe_mmap2; diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 0a7fe4cb5555..10ab5e40a34f 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -92,13 +92,6 @@ int cmd_list(int argc, const char **argv) else if ((sep = strchr(argv[i], ':')) != NULL) { int sep_idx; - if (sep == NULL) { - print_events(argv[i], raw_dump, !desc_flag, - long_desc_flag, - details_flag, - deprecated); - continue; - } sep_idx = sep - argv[i]; s = strdup(argv[i]); if (s == NULL) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 772f1057647f..adf311d15d3d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1593,6 +1593,16 @@ static int record__init_clock(struct record *rec) return 0; } +static void hit_auxtrace_snapshot_trigger(struct record *rec) +{ + if (trigger_is_ready(&auxtrace_snapshot_trigger)) { + trigger_hit(&auxtrace_snapshot_trigger); + auxtrace_record__snapshot_started = 1; + if (auxtrace_record__snapshot_start(rec->itr)) + trigger_error(&auxtrace_snapshot_trigger); + } +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -1937,6 +1947,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) case EVLIST_CTL_CMD_DISABLE: pr_info(EVLIST_DISABLED_MSG); break; + case EVLIST_CTL_CMD_SNAPSHOT: + hit_auxtrace_snapshot_trigger(rec); + evlist__ctlfd_ack(rec->evlist); + break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: default: @@ -2234,27 +2248,9 @@ static int parse_control_option(const struct option *opt, const char *str, int unset __maybe_unused) { - char *comma = NULL, *endptr = NULL; - struct record_opts *config = (struct record_opts *)opt->value; + struct record_opts *opts = opt->value; - if (strncmp(str, "fd:", 3)) - return -EINVAL; - - config->ctl_fd = strtoul(&str[3], &endptr, 0); - if (endptr == &str[3]) - return -EINVAL; - - comma = strchr(str, ','); - if (comma) { - if (endptr != comma) - return -EINVAL; - - config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0); - if (endptr == comma + 1 || *endptr != '\0') - return -EINVAL; - } - - return 0; + return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close); } static void switch_output_size_warn(struct record *rec) @@ -2596,9 +2592,11 @@ static struct option __record_options[] = { "libpfm4 event selector. use 'perf list' to list available events", parse_libpfm_events_option), #endif - OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd]", - "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n" - "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.", + OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", + "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n" + "\t\t\t 'snapshot': AUX area tracing snapshot).\n" + "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" + "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", parse_control_option), OPT_END() }; @@ -2671,12 +2669,14 @@ int cmd_record(int argc, const char **argv) !perf_can_record_switch_events()) { ui__error("kernel does not support recording context switch events\n"); parse_options_usage(record_usage, record_options, "switch-events", 0); - return -EINVAL; + err = -EINVAL; + goto out_opts; } if (switch_output_setup(rec)) { parse_options_usage(record_usage, record_options, "switch-output", 0); - return -EINVAL; + err = -EINVAL; + goto out_opts; } if (rec->switch_output.time) { @@ -2687,8 +2687,10 @@ int cmd_record(int argc, const char **argv) if (rec->switch_output.num_files) { rec->switch_output.filenames = calloc(sizeof(char *), rec->switch_output.num_files); - if (!rec->switch_output.filenames) - return -EINVAL; + if (!rec->switch_output.filenames) { + err = -EINVAL; + goto out_opts; + } } /* @@ -2704,7 +2706,8 @@ int cmd_record(int argc, const char **argv) rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits); if (!rec->affinity_mask.bits) { pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits); - return -ENOMEM; + err = -ENOMEM; + goto out_opts; } pr_debug2("thread mask[%zd]: empty\n", rec->affinity_mask.nbits); } @@ -2835,6 +2838,8 @@ out: evlist__delete(rec->evlist); symbol__exit(); auxtrace_record__free(rec->itr); +out_opts: + evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close); return err; } @@ -2842,12 +2847,7 @@ static void snapshot_sig_handler(int sig __maybe_unused) { struct record *rec = &record; - if (trigger_is_ready(&auxtrace_snapshot_trigger)) { - trigger_hit(&auxtrace_snapshot_trigger); - auxtrace_record__snapshot_started = 1; - if (auxtrace_record__snapshot_start(record.itr)) - trigger_error(&auxtrace_snapshot_trigger); - } + hit_auxtrace_snapshot_trigger(rec); if (switch_output_signal(rec)) trigger_hit(&switch_output_trigger); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index e6fc297cee91..0e16f9d5a947 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -130,7 +130,8 @@ struct work_atoms { struct thread *thread; struct rb_node node; u64 max_lat; - u64 max_lat_at; + u64 max_lat_start; + u64 max_lat_end; u64 total_lat; u64 nb_atoms; u64 total_runtime; @@ -1096,7 +1097,8 @@ add_sched_in_event(struct work_atoms *atoms, u64 timestamp) atoms->total_lat += delta; if (delta > atoms->max_lat) { atoms->max_lat = delta; - atoms->max_lat_at = timestamp; + atoms->max_lat_start = atom->wake_up_time; + atoms->max_lat_end = timestamp; } atoms->nb_atoms++; } @@ -1322,7 +1324,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ int i; int ret; u64 avg; - char max_lat_at[32]; + char max_lat_start[32], max_lat_end[32]; if (!work_list->nb_atoms) return; @@ -1344,13 +1346,14 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ printf(" "); avg = work_list->total_lat / work_list->nb_atoms; - timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at)); + timestamp__scnprintf_usec(work_list->max_lat_start, max_lat_start, sizeof(max_lat_start)); + timestamp__scnprintf_usec(work_list->max_lat_end, max_lat_end, sizeof(max_lat_end)); - printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n", + printf("|%11.3f ms |%9" PRIu64 " | avg:%8.3f ms | max:%8.3f ms | max start: %12s s | max end: %12s s\n", (double)work_list->total_runtime / NSEC_PER_MSEC, work_list->nb_atoms, (double)avg / NSEC_PER_MSEC, (double)work_list->max_lat / NSEC_PER_MSEC, - max_lat_at); + max_lat_start, max_lat_end); } static int pid_cmp(struct work_atoms *l, struct work_atoms *r) @@ -3137,7 +3140,8 @@ static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *d list_splice(&data->work_list, &this->work_list); if (this->max_lat < data->max_lat) { this->max_lat = data->max_lat; - this->max_lat_at = data->max_lat_at; + this->max_lat_start = data->max_lat_start; + this->max_lat_end = data->max_lat_end; } zfree(&data); return; @@ -3176,9 +3180,9 @@ static int perf_sched__lat(struct perf_sched *sched) perf_sched__merge_lat(sched); perf_sched__sort_lat(sched); - printf("\n -----------------------------------------------------------------------------------------------------------------\n"); - printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); - printf(" -----------------------------------------------------------------------------------------------------------------\n"); + printf("\n -------------------------------------------------------------------------------------------------------------------------------------------\n"); + printf(" Task | Runtime ms | Switches | Avg delay ms | Max delay ms | Max delay start | Max delay end |\n"); + printf(" -------------------------------------------------------------------------------------------------------------------------------------------\n"); next = rb_first_cached(&sched->sorted_atom_root); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 484ce6067d23..48588ccf902e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -702,12 +702,14 @@ static int perf_sample__fprintf_start(struct perf_script *script, char tstr[128]; if (PRINT_FIELD(COMM)) { + const char *comm = thread ? thread__comm_str(thread) : ":-1"; + if (latency_format) - printed += fprintf(fp, "%8.8s ", thread__comm_str(thread)); + printed += fprintf(fp, "%8.8s ", comm); else if (PRINT_FIELD(IP) && evsel__has_callchain(evsel) && symbol_conf.use_callchain) - printed += fprintf(fp, "%s ", thread__comm_str(thread)); + printed += fprintf(fp, "%s ", comm); else - printed += fprintf(fp, "%16s ", thread__comm_str(thread)); + printed += fprintf(fp, "%16s ", comm); } if (PRINT_FIELD(PID) && PRINT_FIELD(TID)) @@ -2238,7 +2240,7 @@ static int print_event_with_time(struct perf_tool *tool, if (tid != -1) thread = machine__findnew_thread(machine, pid, tid); - if (thread && evsel) { + if (evsel) { perf_sample__fprintf_start(script, sample, thread, evsel, event->header.type, stdout); } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index fddc97cac984..b01af171d94f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -56,7 +56,7 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/counts.h" -#include "util/group.h" +#include "util/topdown.h" #include "util/session.h" #include "util/tool.h" #include "util/string2.h" @@ -128,6 +128,15 @@ static const char * topdown_attrs[] = { NULL, }; +static const char *topdown_metric_attrs[] = { + "slots", + "topdown-retiring", + "topdown-bad-spec", + "topdown-fe-bound", + "topdown-be-bound", + NULL, +}; + static const char *smi_cost_attrs = { "{" "msr/aperf/," @@ -578,6 +587,7 @@ static void process_evlist(struct evlist *evlist, unsigned int interval) process_interval(); pr_info(EVLIST_DISABLED_MSG); break; + case EVLIST_CTL_CMD_SNAPSHOT: case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: default: @@ -1045,27 +1055,20 @@ static int parse_control_option(const struct option *opt, const char *str, int unset __maybe_unused) { - char *comma = NULL, *endptr = NULL; - struct perf_stat_config *config = (struct perf_stat_config *)opt->value; + struct perf_stat_config *config = opt->value; - if (strncmp(str, "fd:", 3)) - return -EINVAL; + return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close); +} - config->ctl_fd = strtoul(&str[3], &endptr, 0); - if (endptr == &str[3]) - return -EINVAL; - - comma = strchr(str, ','); - if (comma) { - if (endptr != comma) - return -EINVAL; - - config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0); - if (endptr == comma + 1 || *endptr != '\0') - return -EINVAL; +static int parse_stat_cgroups(const struct option *opt, + const char *str, int unset) +{ + if (stat_config.cgroup_list) { + pr_err("--cgroup and --for-each-cgroup cannot be used together\n"); + return -1; } - return 0; + return parse_cgroups(opt, str, unset); } static struct option stat_options[] = { @@ -1111,7 +1114,9 @@ static struct option stat_options[] = { OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", - "monitor event in cgroup name only", parse_cgroups), + "monitor event in cgroup name only", parse_stat_cgroups), + OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name", + "expand events for each cgroup"), OPT_STRING('o', "output", &output_name, "file", "output file name"), OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), OPT_INTEGER(0, "log-fd", &output_fd, @@ -1171,9 +1176,10 @@ static struct option stat_options[] = { "libpfm4 event selector. use 'perf list' to list available events", parse_libpfm_events_option), #endif - OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd]", + OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]", "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n" - "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.", + "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n" + "\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.", parse_control_option), OPT_END() }; @@ -1497,55 +1503,6 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) return 0; } -static int topdown_filter_events(const char **attr, char **str, bool use_group) -{ - int off = 0; - int i; - int len = 0; - char *s; - - for (i = 0; attr[i]; i++) { - if (pmu_have_event("cpu", attr[i])) { - len += strlen(attr[i]) + 1; - attr[i - off] = attr[i]; - } else - off++; - } - attr[i - off] = NULL; - - *str = malloc(len + 1 + 2); - if (!*str) - return -1; - s = *str; - if (i - off == 0) { - *s = 0; - return 0; - } - if (use_group) - *s++ = '{'; - for (i = 0; attr[i]; i++) { - strcpy(s, attr[i]); - s += strlen(s); - *s++ = ','; - } - if (use_group) { - s[-1] = '}'; - *s = 0; - } else - s[-1] = 0; - return 0; -} - -__weak bool arch_topdown_check_group(bool *warn) -{ - *warn = false; - return false; -} - -__weak void arch_topdown_group_warn(void) -{ -} - /* * Add default attributes, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: @@ -1742,6 +1699,24 @@ static int add_default_attributes(void) char *str = NULL; bool warn = false; + if (!force_metric_only) + stat_config.metric_only = true; + + if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) { + pr_err("Out of memory\n"); + return -1; + } + if (topdown_metric_attrs[0] && str) { + if (!stat_config.interval && !stat_config.metric_only) { + fprintf(stat_config.output, + "Topdown accuracy may decrease when measuring long periods.\n" + "Please print the result regularly, e.g. -I1000\n"); + } + goto setup_metrics; + } + + zfree(&str); + if (stat_config.aggr_mode != AGGR_GLOBAL && stat_config.aggr_mode != AGGR_CORE) { pr_err("top down event configuration requires --per-core mode\n"); @@ -1753,8 +1728,6 @@ static int add_default_attributes(void) return -1; } - if (!force_metric_only) - stat_config.metric_only = true; if (topdown_filter_events(topdown_attrs, &str, arch_topdown_check_group(&warn)) < 0) { pr_err("Out of memory\n"); @@ -1763,6 +1736,7 @@ static int add_default_attributes(void) if (topdown_attrs[0] && str) { if (warn) arch_topdown_group_warn(); +setup_metrics: err = parse_events(evsel_list, str, &errinfo); if (err) { fprintf(stderr, @@ -2063,8 +2037,10 @@ static void setup_system_wide(int forks) struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { - if (!counter->core.system_wide) + if (!counter->core.system_wide && + strcmp(counter->name, "duration_time")) { return; + } } if (evsel_list->core.nr_entries) @@ -2250,6 +2226,19 @@ int cmd_stat(int argc, const char **argv) if (add_default_attributes()) goto out; + if (stat_config.cgroup_list) { + if (nr_cgroups > 0) { + pr_err("--cgroup and --for-each-cgroup cannot be used together\n"); + parse_options_usage(stat_usage, stat_options, "G", 1); + parse_options_usage(NULL, stat_options, "for-each-cgroup", 0); + goto out; + } + + if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list, + &stat_config.metric_events, true) < 0) + goto out; + } + target__validate(&target); if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) @@ -2416,6 +2405,7 @@ out: metricgroup__rblist_exit(&stat_config.metric_events); runtime_stat_delete(&stat_config); + evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close); return status; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index bea461b6f937..44a75f234db1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1762,7 +1762,11 @@ static int trace__read_syscall_info(struct trace *trace, int id) if (table == NULL) return -ENOMEM; - memset(table + trace->sctbl->syscalls.max_id, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc)); + // Need to memset from offset 0 and +1 members if brand new + if (trace->syscalls.table == NULL) + memset(table, 0, (id + 1) * sizeof(*sc)); + else + memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc)); trace->syscalls.table = table; trace->sctbl->syscalls.max_id = id; diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c index 05cf2af9e2c2..d09ec2f03071 100644 --- a/tools/perf/builtin-version.c +++ b/tools/perf/builtin-version.c @@ -60,7 +60,6 @@ static void library_status(void) STATUS(HAVE_DWARF_SUPPORT, dwarf); STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations); STATUS(HAVE_GLIBC_SUPPORT, glibc); - STATUS(HAVE_GTK2_SUPPORT, gtk2); #ifndef HAVE_SYSCALL_TABLE_SUPPORT STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit); #endif diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 0b4d6431b072..15ecb1803fb9 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -75,6 +75,15 @@ include/uapi/asm-generic/mman-common.h include/uapi/asm-generic/unistd.h ' +# These copies are under tools/perf/trace/beauty/ as they are not used to in +# building object files only by scripts in tools/perf/trace/beauty/ to generate +# tables that then gets included in .c files for things like id->string syscall +# tables (and the reverse lookup as well: string -> id) + +BEAUTY_FILES=' +include/linux/socket.h +' + check_2 () { file1=$1 file2=$2 @@ -100,6 +109,14 @@ check () { check_2 tools/$file $file $* } +beauty_check () { + file=$1 + + shift + + check_2 tools/perf/trace/beauty/$file $file $* +} + # Check if we have the kernel headers (tools/perf/../../include), else # we're probably on a detached tarball, so no point in trying to check # differences. @@ -128,8 +145,9 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl -# These will require a beauty_check when we get some more like that -check_2 tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h +for i in $BEAUTY_FILES; do + beauty_check $i -B +done # check duplicated library files check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index 15e458e150bd..7a2264e1e4e1 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -9,31 +9,13 @@ struct perf_event_attr; -extern bool test_attr__enabled; -void test_attr__ready(void); -void test_attr__init(void); -void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, - int fd, int group_fd, unsigned long flags); - -#ifndef HAVE_ATTR_TEST -#define HAVE_ATTR_TEST 1 -#endif - static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags) { - int fd; - - fd = syscall(__NR_perf_event_open, attr, pid, cpu, - group_fd, flags); - -#if HAVE_ATTR_TEST - if (unlikely(test_attr__enabled)) - test_attr__open(attr, pid, cpu, fd, group_fd, flags); -#endif - return fd; + return syscall(__NR_perf_event_open, attr, pid, cpu, + group_fd, flags); } #endif /* _PERF_SYS_H */ diff --git a/tools/perf/pmu-events/arch/powerpc/power8/cache.json b/tools/perf/pmu-events/arch/powerpc/power8/cache.json index 6b792b2c87e2..05a17084d939 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/cache.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/cache.json @@ -32,8 +32,8 @@ { "EventCode": "0x1c04e", "EventName": "PM_DATA_FROM_L2MISS_MOD", - "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to a demand load", - "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load", + "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" }, { "EventCode": "0x3c040", @@ -74,8 +74,8 @@ { "EventCode": "0x4c04e", "EventName": "PM_DATA_FROM_L3MISS_MOD", - "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a demand load", - "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load", + "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" }, { "EventCode": "0x3c042", @@ -134,7 +134,7 @@ { "EventCode": "0x4e04e", "EventName": "PM_DPTEG_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a data side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request", "PublicDescription": "" }, { diff --git a/tools/perf/pmu-events/arch/powerpc/power8/frontend.json b/tools/perf/pmu-events/arch/powerpc/power8/frontend.json index 1ddc30655d43..1c902a8263b6 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/frontend.json @@ -116,8 +116,8 @@ { "EventCode": "0x1404e", "EventName": "PM_INST_FROM_L2MISS", - "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to an instruction fetch (not prefetch)", - "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)", + "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" }, { "EventCode": "0x34040", @@ -158,8 +158,8 @@ { "EventCode": "0x4404e", "EventName": "PM_INST_FROM_L3MISS_MOD", - "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch", - "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch", + "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" }, { "EventCode": "0x34042", @@ -320,7 +320,7 @@ { "EventCode": "0x1504e", "EventName": "PM_IPTEG_FROM_L2MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a instruction side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request", "PublicDescription": "" }, { @@ -344,7 +344,7 @@ { "EventCode": "0x4504e", "EventName": "PM_IPTEG_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a instruction side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request", "PublicDescription": "" }, { diff --git a/tools/perf/pmu-events/arch/powerpc/power8/marked.json b/tools/perf/pmu-events/arch/powerpc/power8/marked.json index 94dc58b83b7e..6de61a797bbd 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/marked.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/marked.json @@ -92,7 +92,7 @@ { "EventCode": "0x4c12e", "EventName": "PM_MRK_DATA_FROM_L2MISS_CYC", - "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L2 due to a marked load", + "BriefDescription": "Duration in cycles to reload from a location other than the local core's L2 due to a marked load", "PublicDescription": "" }, { @@ -158,13 +158,13 @@ { "EventCode": "0x201e4", "EventName": "PM_MRK_DATA_FROM_L3MISS", - "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a marked load", + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load", "PublicDescription": "" }, { "EventCode": "0x2d12e", "EventName": "PM_MRK_DATA_FROM_L3MISS_CYC", - "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L3 due to a marked load", + "BriefDescription": "Duration in cycles to reload from a location other than the local core's L3 due to a marked load", "PublicDescription": "" }, { @@ -392,7 +392,7 @@ { "EventCode": "0x1f14e", "EventName": "PM_MRK_DPTEG_FROM_L2MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a marked data side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request", "PublicDescription": "" }, { @@ -416,7 +416,7 @@ { "EventCode": "0x4f14e", "EventName": "PM_MRK_DPTEG_FROM_L3MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a marked data side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request", "PublicDescription": "" }, { diff --git a/tools/perf/pmu-events/arch/powerpc/power8/other.json b/tools/perf/pmu-events/arch/powerpc/power8/other.json index f4e760cab111..84a0cedf1fd9 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/other.json @@ -410,8 +410,8 @@ { "EventCode": "0x61c04e", "EventName": "PM_DATA_ALL_FROM_L2MISS_MOD", - "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either demand loads or data prefetch", - "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either demand loads or data prefetch", + "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" }, { "EventCode": "0x63c040", @@ -470,8 +470,8 @@ { "EventCode": "0x64c04e", "EventName": "PM_DATA_ALL_FROM_L3MISS_MOD", - "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either demand loads or data prefetch", - "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" + "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either demand loads or data prefetch", + "PublicDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1" }, { "EventCode": "0x63c042", @@ -1280,8 +1280,8 @@ { "EventCode": "0x51404e", "EventName": "PM_INST_ALL_FROM_L2MISS", - "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to instruction fetches and prefetches", - "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to instruction fetches and prefetches", + "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" }, { "EventCode": "0x534040", @@ -1340,8 +1340,8 @@ { "EventCode": "0x54404e", "EventName": "PM_INST_ALL_FROM_L3MISS_MOD", - "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch", - "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" + "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch", + "PublicDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1" }, { "EventCode": "0x534042", diff --git a/tools/perf/pmu-events/arch/powerpc/power8/translation.json b/tools/perf/pmu-events/arch/powerpc/power8/translation.json index 623e7475b010..a1657f5fdc6b 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/translation.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/translation.json @@ -44,7 +44,7 @@ { "EventCode": "0x1e04e", "EventName": "PM_DPTEG_FROM_L2MISS", - "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a data side request", + "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request", "PublicDescription": "" }, { diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json index 8383a37647ad..7a5d1bf543f8 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json @@ -1,37 +1,46 @@ [ { - "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)", - "MetricName": "Memory_RD_BW_Chip", - "MetricGroup": "Memory_BW", - "ScaleUnit": "1.6e-2MB" + "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)", + "MetricName": "Memory_RD_BW_Chip", + "MetricGroup": "Memory_BW", + "ScaleUnit": "1.6e-2MB", + "AggregationMode": "PerChip" }, { "MetricExpr": "(hv_24x7@PM_MCS01_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_WR_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT23\\,chip\\=?@ )", - "MetricName": "Memory_WR_BW_Chip", - "MetricGroup": "Memory_BW", - "ScaleUnit": "1.6e-2MB" + "MetricName": "Memory_WR_BW_Chip", + "MetricGroup": "Memory_BW", + "ScaleUnit": "1.6e-2MB", + "AggregationMode": "PerChip" }, { "MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )", - "MetricName": "PowerBUS_Frequency", - "ScaleUnit": "2.5e-7GHz" + "MetricName": "PowerBUS_Frequency", + "ScaleUnit": "2.5e-7GHz", + "AggregationMode": "PerChip" + }, + { + "MetricExpr": "(hv_24x7@CPM_CS_32MHZ_CYC\\,domain\\=3\\,core\\=?@ )", + "MetricName": "CPM_CS_32MHZ_CYC", + "ScaleUnit": "1MHz", + "AggregationMode": "PerCore" }, { "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@", "MetricName" : "mcs01-read", - "MetricGroup" : "memory_bw", + "MetricGroup" : "memory-bandwidth", "ScaleUnit": "6.1e-5MB" }, { "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@", "MetricName" : "mcs23-read", - "MetricGroup" : "memory_bw", + "MetricGroup" : "memory-bandwidth", "ScaleUnit": "6.1e-5MB" }, { "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@", "MetricName" : "mcs01-write", - "MetricGroup" : "memory_bw", + "MetricGroup" : "memory-bandwidth", "ScaleUnit": "6.1e-5MB" }, { @@ -48,7 +57,7 @@ { "MetricExpr" : "(nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@)", "MetricName" : "Memory-bandwidth-MCS", - "MetricGroup" : "memory_bw", + "MetricGroup" : "memory-bandwidth", "ScaleUnit": "6.1e-5MB" } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json index a9943eeb8d6b..4ceb67a0db21 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json @@ -19,5 +19,10 @@ "EventName": "bp_de_redirect", "EventCode": "0x91", "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)." + }, + { + "EventName": "bp_l1_tlb_fetch_hit", + "EventCode": "0x94", + "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB." } ] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json index 404d4c569c01..4ea7ec4f496e 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json @@ -117,6 +117,11 @@ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", "UMask": "0x1" }, + { + "EventName": "l2_request_g1.all_no_prefetch", + "EventCode": "0x60", + "UMask": "0xf9" + }, { "EventName": "l2_request_g2.group1", "EventCode": "0x61", @@ -243,12 +248,48 @@ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", "UMask": "0x1" }, + { + "EventName": "l2_cache_req_stat.ic_access_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", + "UMask": "0x7" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", + "UMask": "0x9" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).", + "UMask": "0xf6" + }, { "EventName": "l2_fill_pending.l2_fill_busy", "EventCode": "0x6d", "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.", "UMask": "0x1" }, + { + "EventName": "l2_pf_hit_l2", + "EventCode": "0x70", + "BriefDescription": "L2 prefetch hit in L2.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_hit_l3", + "EventCode": "0x71", + "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.", + "UMask": "0xff" + }, + { + "EventName": "l2_pf_miss_l2_l3", + "EventCode": "0x72", + "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.", + "UMask": "0xff" + }, { "EventName": "l3_request_g1.caching_l3_cache_accesses", "EventCode": "0x01", diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json new file mode 100644 index 000000000000..40271df40015 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/data-fabric.json @@ -0,0 +1,98 @@ +[ + { + "EventName": "remote_outbound_data_controller_0", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0", + "EventCode": "0x7c7", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_1", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1", + "EventCode": "0x807", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_2", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2", + "EventCode": "0x847", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_3", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3", + "EventCode": "0x887", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_0", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x07", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_1", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x47", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_2", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x87", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_3", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0xc7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_4", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x107", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_5", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x147", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_6", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x187", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_7", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x1c7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json new file mode 100644 index 000000000000..2cfe2d2f3bfd --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json @@ -0,0 +1,178 @@ +[ + { + "MetricName": "branch_misprediction_ratio", + "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)", + "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)", + "MetricGroup": "branch_prediction", + "ScaleUnit": "100%" + }, + { + "EventName": "all_dc_accesses", + "EventCode": "0x29", + "BriefDescription": "All L1 Data Cache Accesses", + "UMask": "0x7" + }, + { + "MetricName": "all_l2_cache_accesses", + "BriefDescription": "All L2 Cache Accesses", + "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_accesses_from_ic_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_accesses_from_dc_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)", + "UMask": "0xc8" + }, + { + "MetricName": "l2_cache_accesses_from_l2_hwpf", + "BriefDescription": "L2 Cache Accesses from L2 HWPF", + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_misses", + "BriefDescription": "All L2 Cache Misses", + "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_misses_from_ic_miss", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses", + "UMask": "0x01" + }, + { + "EventName": "l2_cache_misses_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses", + "UMask": "0x08" + }, + { + "MetricName": "l2_cache_misses_from_l2_hwpf", + "BriefDescription": "L2 Cache Misses from L2 HWPF", + "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_hits", + "BriefDescription": "All L2 Cache Hits", + "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_hits_from_ic_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses", + "UMask": "0x06" + }, + { + "EventName": "l2_cache_hits_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses", + "UMask": "0x70" + }, + { + "MetricName": "l2_cache_hits_from_l2_hwpf", + "BriefDescription": "L2 Cache Hits from L2 HWPF", + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l3_accesses", + "EventCode": "0x04", + "BriefDescription": "L3 Accesses", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_misses", + "EventCode": "0x04", + "BriefDescription": "L3 Misses (includes Chg2X)", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "MetricName": "l3_read_miss_latency", + "BriefDescription": "Average L3 Read Miss Latency (in core clocks)", + "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1.all_l3_miss_req_typs", + "MetricGroup": "l3_cache", + "ScaleUnit": "1core clocks" + }, + { + "MetricName": "ic_fetch_miss_ratio", + "BriefDescription": "L1 Instruction Cache (32B) Fetch Miss Ratio", + "MetricExpr": "d_ratio(l2_cache_req_stat.ic_access_in_l2, bp_l1_tlb_fetch_hit + bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_miss)", + "MetricGroup": "l2_cache", + "ScaleUnit": "100%" + }, + { + "MetricName": "l1_itlb_misses", + "BriefDescription": "L1 ITLB Misses", + "MetricExpr": "bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_miss", + "MetricGroup": "tlb" + }, + { + "EventName": "l2_itlb_misses", + "EventCode": "0x85", + "BriefDescription": "L2 ITLB Misses & Instruction page walks", + "UMask": "0x07" + }, + { + "EventName": "l1_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Misses", + "UMask": "0xff" + }, + { + "EventName": "l2_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L2 DTLB Misses & Data page walks", + "UMask": "0xf0" + }, + { + "EventName": "all_tlbs_flushed", + "EventCode": "0x78", + "BriefDescription": "All TLBs Flushed", + "UMask": "0xdf" + }, + { + "EventName": "uops_dispatched", + "EventCode": "0xaa", + "BriefDescription": "Micro-ops Dispatched", + "UMask": "0x03" + }, + { + "EventName": "sse_avx_stalls", + "EventCode": "0x0e", + "BriefDescription": "Mixed SSE/AVX Stalls", + "UMask": "0x0e" + }, + { + "EventName": "uops_retired", + "EventCode": "0xc1", + "BriefDescription": "Micro-ops Retired" + }, + { + "MetricName": "all_remote_links_outbound", + "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)", + "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3e-5MiB" + }, + { + "MetricName": "nps1_die_to_dram", + "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)", + "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.1e-5MiB" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json index 1c60bfa0f00b..f61b982f83ca 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json +++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json @@ -47,6 +47,11 @@ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).", "UMask": "0x1" }, + { + "EventName": "l2_request_g1.all_no_prefetch", + "EventCode": "0x60", + "UMask": "0xf9" + }, { "EventName": "l2_request_g2.group1", "EventCode": "0x61", @@ -173,6 +178,24 @@ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.", "UMask": "0x1" }, + { + "EventName": "l2_cache_req_stat.ic_access_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache requests in L2.", + "UMask": "0x7" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_miss_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2 and Data cache request miss in L2 (all types).", + "UMask": "0x9" + }, + { + "EventName": "l2_cache_req_stat.ic_dc_hit_in_l2", + "EventCode": "0x64", + "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request hit in L2 and Data cache request hit in L2 (all types).", + "UMask": "0xf6" + }, { "EventName": "l2_fill_pending.l2_fill_busy", "EventCode": "0x6d", diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json new file mode 100644 index 000000000000..40271df40015 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/data-fabric.json @@ -0,0 +1,98 @@ +[ + { + "EventName": "remote_outbound_data_controller_0", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 0", + "EventCode": "0x7c7", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_1", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 1", + "EventCode": "0x807", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_2", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 2", + "EventCode": "0x847", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_outbound_data_controller_3", + "PublicDescription": "Remote Link Controller Outbound Packet Types: Data (32B): Remote Link Controller 3", + "EventCode": "0x887", + "UMask": "0x02", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_0", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x07", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_1", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x47", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_2", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x87", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_3", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0xc7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_4", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x107", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_5", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x147", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_6", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x187", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "dram_channel_data_controller_7", + "PublicDescription": "DRAM Channel Controller Request Types: Requests with Data (64B): DRAM Channel Controller 0", + "EventCode": "0x1c7", + "UMask": "0x38", + "PerPkg": "1", + "Unit": "DFPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json new file mode 100644 index 000000000000..2ef91e25e661 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json @@ -0,0 +1,178 @@ +[ + { + "MetricName": "branch_misprediction_ratio", + "BriefDescription": "Execution-Time Branch Misprediction Ratio (Non-Speculative)", + "MetricExpr": "d_ratio(ex_ret_brn_misp, ex_ret_brn)", + "MetricGroup": "branch_prediction", + "ScaleUnit": "100%" + }, + { + "EventName": "all_dc_accesses", + "EventCode": "0x29", + "BriefDescription": "All L1 Data Cache Accesses", + "UMask": "0x7" + }, + { + "MetricName": "all_l2_cache_accesses", + "BriefDescription": "All L2 Cache Accesses", + "MetricExpr": "l2_request_g1.all_no_prefetch + l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_accesses_from_ic_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Instruction Cache Misses (including prefetch)", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_accesses_from_dc_misses", + "EventCode": "0x60", + "BriefDescription": "L2 Cache Accesses from L1 Data Cache Misses (including prefetch)", + "UMask": "0xc8" + }, + { + "MetricName": "l2_cache_accesses_from_l2_hwpf", + "BriefDescription": "L2 Cache Accesses from L2 HWPF", + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_misses", + "BriefDescription": "All L2 Cache Misses", + "MetricExpr": "l2_cache_req_stat.ic_dc_miss_in_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_misses_from_ic_miss", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Instruction Cache Misses", + "UMask": "0x01" + }, + { + "EventName": "l2_cache_misses_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Misses from L1 Data Cache Misses", + "UMask": "0x08" + }, + { + "MetricName": "l2_cache_misses_from_l2_hwpf", + "BriefDescription": "L2 Cache Misses from L2 HWPF", + "MetricExpr": "l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "MetricName": "all_l2_cache_hits", + "BriefDescription": "All L2 Cache Hits", + "MetricExpr": "l2_cache_req_stat.ic_dc_hit_in_l2 + l2_pf_hit_l2", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l2_cache_hits_from_ic_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Instruction Cache Misses", + "UMask": "0x06" + }, + { + "EventName": "l2_cache_hits_from_dc_misses", + "EventCode": "0x64", + "BriefDescription": "L2 Cache Hits from L1 Data Cache Misses", + "UMask": "0x70" + }, + { + "MetricName": "l2_cache_hits_from_l2_hwpf", + "BriefDescription": "L2 Cache Hits from L2 HWPF", + "MetricExpr": "l2_pf_hit_l2 + l2_pf_miss_l2_hit_l3 + l2_pf_miss_l2_l3", + "MetricGroup": "l2_cache" + }, + { + "EventName": "l3_accesses", + "EventCode": "0x04", + "BriefDescription": "L3 Accesses", + "UMask": "0xff", + "Unit": "L3PMC" + }, + { + "EventName": "l3_misses", + "EventCode": "0x04", + "BriefDescription": "L3 Misses (includes Chg2X)", + "UMask": "0x01", + "Unit": "L3PMC" + }, + { + "MetricName": "l3_read_miss_latency", + "BriefDescription": "Average L3 Read Miss Latency (in core clocks)", + "MetricExpr": "(xi_sys_fill_latency * 16) / xi_ccx_sdp_req1.all_l3_miss_req_typs", + "MetricGroup": "l3_cache", + "ScaleUnit": "1core clocks" + }, + { + "MetricName": "ic_fetch_miss_ratio", + "BriefDescription": "L1 Instruction Cache (32B) Fetch Miss Ratio", + "MetricExpr": "d_ratio(l2_cache_req_stat.ic_access_in_l2, bp_l1_tlb_fetch_hit + bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_tlb_miss)", + "MetricGroup": "l2_cache", + "ScaleUnit": "100%" + }, + { + "MetricName": "l1_itlb_misses", + "BriefDescription": "L1 ITLB Misses", + "MetricExpr": "bp_l1_tlb_miss_l2_hit + bp_l1_tlb_miss_l2_tlb_miss", + "MetricGroup": "tlb" + }, + { + "EventName": "l2_itlb_misses", + "EventCode": "0x85", + "BriefDescription": "L2 ITLB Misses & Instruction page walks", + "UMask": "0x07" + }, + { + "EventName": "l1_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Misses", + "UMask": "0xff" + }, + { + "EventName": "l2_dtlb_misses", + "EventCode": "0x45", + "BriefDescription": "L2 DTLB Misses & Data page walks", + "UMask": "0xf0" + }, + { + "EventName": "all_tlbs_flushed", + "EventCode": "0x78", + "BriefDescription": "All TLBs Flushed", + "UMask": "0xdf" + }, + { + "EventName": "uops_dispatched", + "EventCode": "0xaa", + "BriefDescription": "Micro-ops Dispatched", + "UMask": "0x03" + }, + { + "EventName": "sse_avx_stalls", + "EventCode": "0x0e", + "BriefDescription": "Mixed SSE/AVX Stalls", + "UMask": "0x0e" + }, + { + "EventName": "uops_retired", + "EventCode": "0xc1", + "BriefDescription": "Micro-ops Retired" + }, + { + "MetricName": "all_remote_links_outbound", + "BriefDescription": "Approximate: Outbound data bytes for all Remote Links for a node (die)", + "MetricExpr": "remote_outbound_data_controller_0 + remote_outbound_data_controller_1 + remote_outbound_data_controller_2 + remote_outbound_data_controller_3", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3e-5MiB" + }, + { + "MetricName": "nps1_die_to_dram", + "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)", + "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.1e-5MiB" + } +] diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json b/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json index 3fba310a5012..3c0f5837480f 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/cache.json @@ -8063,6 +8063,20 @@ "SampleAfterValue": "100003", "UMask": "0x1" }, + { + "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Deprecated": "1", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x1000020004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, { "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", @@ -9255,20 +9269,6 @@ "SampleAfterValue": "100003", "UMask": "0x1" }, - { - "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE", - "Counter": "0,1,2,3", - "CounterHTOff": "0,1,2,3", - "Deprecated": "1", - "EventCode": "0xB7, 0xBB", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x1000020004", - "Offcore": "1", - "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "UMask": "0x1" - }, { "BriefDescription": "This event is deprecated. Refer to new event OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index d25eebce34c9..de3193552277 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -4,14 +4,14 @@ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", "MetricGroup": "TopdownL1", "MetricName": "Frontend_Bound", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." }, { "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Frontend_Bound_SMT", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." }, { "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", @@ -22,13 +22,14 @@ }, { "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Bad_Speculation_SMT", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU." }, { "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricConstraint": "NO_NMI_WATCHDOG", "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", "MetricGroup": "TopdownL1", "MetricName": "Backend_Bound", @@ -36,7 +37,7 @@ }, { "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )", "MetricGroup": "TopdownL1_SMT", "MetricName": "Backend_Bound_SMT", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU." @@ -50,7 +51,7 @@ }, { "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Retiring_SMT", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU." @@ -58,7 +59,7 @@ { "BriefDescription": "Instructions Per Cycle (per Logical Processor)", "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "TopDownL1", + "MetricGroup": "Summary", "MetricName": "IPC" }, { @@ -73,24 +74,6 @@ "MetricGroup": "Branches;Fetch_BW;PGO", "MetricName": "IpTB" }, - { - "BriefDescription": "Branch instructions per taken branch. ", - "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", - "MetricGroup": "Branches;PGO", - "MetricName": "BpTB" - }, - { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )", - "MetricGroup": "PGO;IcMiss", - "MetricName": "IFetch_Line_Utilization" - }, - { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", - "MetricGroup": "DSB;Fetch_BW", - "MetricName": "DSB_Coverage" - }, { "BriefDescription": "Cycles Per Instruction (per Logical Processor)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", @@ -104,86 +87,110 @@ "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", "MetricExpr": "4 * cycles", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", - "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", + "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "MetricGroup": "TopDownL1_SMT", "MetricName": "SLOTS_SMT" }, { - "BriefDescription": "Instructions per Load (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", "MetricGroup": "Instruction_Type", - "MetricName": "IpL" + "MetricName": "IpLoad" }, { - "BriefDescription": "Instructions per Store (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", "MetricGroup": "Instruction_Type", - "MetricName": "IpS" + "MetricName": "IpStore" }, { - "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Branches;Instruction_Type", - "MetricName": "IpB" + "MetricName": "IpBranch" }, { - "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)", + "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", "MetricGroup": "Branches", "MetricName": "IpCall" }, + { + "BriefDescription": "Branch instructions per taken branch. ", + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTkBranch" + }, + { + "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", + "MetricGroup": "FLOPS;FP_Arith;Instruction_Type", + "MetricName": "IpFLOP" + }, { "BriefDescription": "Total number of retired Instructions", "MetricExpr": "INST_RETIRED.ANY", - "MetricGroup": "Summary", + "MetricGroup": "Summary;TopDownL1", "MetricName": "Instructions" }, + { + "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)", + "MetricExpr": "LSD.UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", + "MetricGroup": "LSD", + "MetricName": "LSD_Coverage" + }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", + "MetricGroup": "DSB;Fetch_BW", + "MetricName": "DSB_Coverage" + }, { "BriefDescription": "Instructions Per Cycle (per physical core)", "MetricExpr": "INST_RETIRED.ANY / cycles", - "MetricGroup": "SMT", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC" }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", - "MetricGroup": "SMT", + "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC_SMT" }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles", + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / cycles", "MetricGroup": "FLOPS", "MetricName": "FLOPc" }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "MetricGroup": "FLOPS_SMT", "MetricName": "FLOPc_SMT" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", - "MetricGroup": "Pipeline", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )", + "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", - "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "BrMispredicts", "MetricName": "Branch_Misprediction_Cost" }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", - "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "BrMispredicts_SMT", "MetricName": "Branch_Misprediction_Cost_SMT" }, @@ -213,14 +220,14 @@ }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricConstraint": "NO_NMI_WATCHDOG", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization", - "MetricConstraint": "NO_NMI_WATCHDOG" + "MetricName": "Page_Walks_Utilization" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", "MetricGroup": "TLB_SMT", "MetricName": "Page_Walks_Utilization_SMT" }, @@ -245,7 +252,7 @@ { "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;Offcore", "MetricName": "L3_Cache_Access_BW" }, { @@ -263,7 +270,7 @@ { "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", - "MetricGroup": "Cache_Misses", + "MetricGroup": "Cache_Misses;Offcore", "MetricName": "L2MPKI_All" }, { @@ -298,7 +305,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time", + "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, @@ -310,62 +317,74 @@ }, { "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", - "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", + "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "Summary", + "MetricGroup": "OS", "MetricName": "Kernel_Utilization" }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", - "MetricGroup": "Memory_Lat", - "MetricName": "DRAM_Read_Latency" + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "MetricGroup": "Memory_Lat;SoC", + "MetricName": "MEM_Read_Latency" }, { "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@", - "MetricGroup": "Memory_BW", - "MetricName": "DRAM_Parallel_Reads" + "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@", + "MetricGroup": "Memory_BW;SoC", + "MetricName": "MEM_Parallel_Reads" }, { "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches", "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ )", - "MetricGroup": "Memory_Lat", + "MetricGroup": "Memory_Lat;SoC;Server", "MetricName": "MEM_PMM_Read_Latency" }, { "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]", "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;SoC;Server", "MetricName": "PMM_Read_BW" }, { "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]", "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;SoC;Server", "MetricName": "PMM_Write_BW" }, + { + "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]", + "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time", + "MetricGroup": "IO_BW;SoC;Server", + "MetricName": "IO_Write_BW" + }, + { + "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]", + "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time", + "MetricGroup": "IO_BW;SoC;Server", + "MetricName": "IO_Read_BW" + }, { "BriefDescription": "Socket actual clocks when any core is active on that socket", "MetricExpr": "cha_0@event\\=0x0@", - "MetricGroup": "", + "MetricGroup": "SoC", "MetricName": "Socket_CLKS" }, { - "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )", + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )", - "MetricGroup": "", + "MetricGroup": "Branches;OS", "MetricName": "IpFarBranch" }, { diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json index 3553472ad266..0716b2e3ff75 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/frontend.json @@ -246,6 +246,30 @@ "SampleAfterValue": "2000003", "UMask": "0x10" }, + { + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAB", + "EventName": "DSB2MITE_SWITCHES.COUNT", + "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", + "Counter": "0,1,2,3,4,5,6,7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_1", + "MSRIndex": "0x3F7", + "MSRValue": "0x400106", + "PEBS": "2", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, { "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", "Counter": "0,1,2,3", @@ -359,6 +383,16 @@ "SampleAfterValue": "2000003", "UMask": "0x24" }, + { + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xE6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, { "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json index cc66a51c6a7b..0c07cb4fbf58 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/memory.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -13,7 +13,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -26,7 +26,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -81,7 +81,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -177,7 +177,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -218,7 +218,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -245,7 +245,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -286,7 +286,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -313,7 +313,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -350,7 +350,7 @@ "UMask": "0x8" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -405,7 +405,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -432,7 +432,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -473,7 +473,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -513,7 +513,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -540,7 +540,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -678,7 +678,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -705,7 +705,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -779,7 +779,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -792,7 +792,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -847,7 +847,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -874,7 +874,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -950,7 +950,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -977,7 +977,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1045,7 +1045,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1058,7 +1058,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1071,7 +1071,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1084,7 +1084,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1097,7 +1097,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1258,7 +1258,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1285,7 +1285,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1298,7 +1298,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1339,7 +1339,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1380,7 +1380,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1393,7 +1393,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1406,7 +1406,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1419,7 +1419,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1432,7 +1432,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1445,7 +1445,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1513,7 +1513,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1540,7 +1540,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1619,7 +1619,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1632,7 +1632,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1645,7 +1645,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1672,7 +1672,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1699,7 +1699,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1712,7 +1712,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1725,7 +1725,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1738,7 +1738,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1765,7 +1765,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_MISS OCR.ALL_READS.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1806,7 +1806,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1833,7 +1833,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1874,7 +1874,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1887,7 +1887,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1942,7 +1942,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1969,7 +1969,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1982,7 +1982,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1995,7 +1995,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2008,7 +2008,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2021,7 +2021,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2048,7 +2048,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2089,7 +2089,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_READS.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2144,7 +2144,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2185,7 +2185,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2212,7 +2212,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2225,7 +2225,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2241,7 +2241,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", "MSRValue": "0x80", @@ -2290,7 +2291,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2345,7 +2346,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2386,7 +2387,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2399,7 +2400,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2426,7 +2427,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2439,7 +2440,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2452,7 +2453,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2493,7 +2494,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2520,7 +2521,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2547,7 +2548,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2560,7 +2561,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2586,7 +2587,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2599,7 +2600,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2612,7 +2613,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2653,7 +2654,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2694,7 +2695,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2707,7 +2708,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2730,7 +2731,7 @@ "UMask": "0x40" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2743,7 +2744,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2756,7 +2757,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2769,7 +2770,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2782,7 +2783,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2809,7 +2810,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2822,7 +2823,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2835,7 +2836,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2848,7 +2849,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2875,7 +2876,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2902,7 +2903,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_READS.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2915,7 +2916,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2928,7 +2929,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2941,7 +2942,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2954,7 +2955,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2981,7 +2982,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3050,7 +3051,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3104,7 +3105,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3117,7 +3118,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3195,7 +3196,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3222,7 +3223,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3235,7 +3236,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3290,7 +3291,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3373,7 +3374,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3413,7 +3414,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3426,7 +3427,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3481,7 +3482,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3522,7 +3523,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3535,7 +3536,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3548,7 +3549,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3561,7 +3562,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3588,7 +3589,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3601,7 +3602,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3614,7 +3615,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3778,7 +3779,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3791,7 +3792,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3804,7 +3805,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3859,7 +3860,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3886,7 +3887,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3899,7 +3900,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3925,7 +3926,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3938,7 +3939,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3979,7 +3980,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4044,7 +4045,7 @@ "UMask": "0x20" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS OCR.ALL_PF_RFO.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4071,7 +4072,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4111,7 +4112,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4138,7 +4139,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4151,7 +4152,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4164,7 +4165,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP OCR.ALL_READS.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4205,7 +4206,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4243,7 +4244,7 @@ "UMask": "0x4" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4284,7 +4285,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4297,7 +4298,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4310,7 +4311,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4323,7 +4324,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4336,7 +4337,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4363,7 +4364,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4390,7 +4391,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4503,7 +4504,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4529,7 +4530,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4556,7 +4557,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4569,7 +4570,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4582,7 +4583,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4623,7 +4624,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4636,7 +4637,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4649,7 +4650,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4662,7 +4663,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4685,7 +4686,7 @@ "UMask": "0x80" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4698,7 +4699,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE OCR.ALL_PF_RFO.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4711,7 +4712,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4724,7 +4725,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_MISS OCR.ALL_RFO.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4765,7 +4766,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4802,7 +4803,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4828,7 +4829,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4841,7 +4842,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4909,7 +4910,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4950,7 +4951,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4977,7 +4978,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4990,21 +4991,7 @@ "UMask": "0x1" }, { - "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", - "Counter": "0,1,2,3", - "CounterHTOff": "0,1,2,3", - "Deprecated": "1", - "EventCode": "0xB7, 0xBB", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0110000001", - "Offcore": "1", - "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "UMask": "0x1" - }, - { - "BriefDescription": "ALL_RFO & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5017,7 +5004,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5030,7 +5017,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5043,7 +5030,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5070,7 +5057,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & REMOTE_HITM", + "BriefDescription": "OCR.ALL_READS.L3_MISS.REMOTE_HITM OCR.ALL_READS.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5097,7 +5084,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP OCR.PF_L3_RFO.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5152,7 +5139,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5188,7 +5175,7 @@ "UMask": "0x10" }, { - "BriefDescription": "ALL_RFO & L3_MISS & REMOTE_HITM", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.REMOTE_HITM OCR.ALL_RFO.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5201,7 +5188,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5214,7 +5201,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5227,7 +5214,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5240,7 +5227,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5253,7 +5240,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5266,7 +5253,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5335,7 +5322,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5390,7 +5377,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5403,7 +5390,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5419,7 +5406,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", "MSRIndex": "0x3F6", "MSRValue": "0x20", @@ -5444,7 +5432,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5457,7 +5445,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5470,7 +5458,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5510,7 +5498,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5523,7 +5511,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5536,7 +5524,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5549,7 +5537,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5585,7 +5573,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.SNOOP_NONE OCR.ALL_RFO.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5598,7 +5586,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5667,7 +5655,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5708,7 +5696,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5747,7 +5735,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5760,7 +5748,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_MISS.SNOOP_NONE OCR.ALL_READS.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5829,7 +5817,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5870,7 +5858,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5925,7 +5913,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5938,7 +5926,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5951,7 +5939,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5964,7 +5952,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5977,7 +5965,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS OCR.ALL_DATA_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6004,7 +5992,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6017,7 +6005,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6030,7 +6018,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6099,7 +6087,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6181,7 +6169,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6222,7 +6210,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6235,7 +6223,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP OCR.PF_L2_RFO.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6262,7 +6250,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6359,7 +6347,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6400,7 +6388,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6413,7 +6401,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6426,7 +6414,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6439,7 +6427,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6452,7 +6440,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HITM_OTHER_CORE OCR.OTHER.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6496,7 +6484,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", "MSRIndex": "0x3F6", "MSRValue": "0x100", @@ -6592,7 +6581,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", "MSRIndex": "0x3F6", "MSRValue": "0x10", @@ -6640,7 +6630,7 @@ "UMask": "0x20" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6694,7 +6684,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6707,7 +6697,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6763,7 +6753,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6776,7 +6766,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6789,7 +6779,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6816,7 +6806,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6843,7 +6833,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6873,7 +6863,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", "MSRIndex": "0x3F6", "MSRValue": "0x200", @@ -6893,7 +6884,7 @@ "UMask": "0x20" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6962,7 +6953,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6975,7 +6966,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7016,7 +7007,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7029,7 +7020,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7070,7 +7061,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7083,7 +7074,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7096,7 +7087,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7123,7 +7114,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7136,7 +7127,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP OCR.ALL_RFO.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7149,7 +7140,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7204,7 +7195,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7217,7 +7208,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7243,7 +7234,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", "MSRIndex": "0x3F6", "MSRValue": "0x40", @@ -7254,7 +7246,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE OCR.ALL_RFO.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7295,7 +7287,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7336,7 +7328,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED OCR.OTHER.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7405,7 +7397,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7418,7 +7410,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.ANY_SNOOP OCR.OTHER.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7431,7 +7423,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7444,7 +7436,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7457,7 +7449,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7470,7 +7462,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7483,7 +7475,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7524,7 +7516,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7551,7 +7543,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7564,7 +7556,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7577,7 +7569,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7604,7 +7596,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7617,7 +7609,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7658,7 +7650,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7681,7 +7673,7 @@ "UMask": "0x2" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7708,7 +7700,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7735,7 +7727,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7762,7 +7754,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7789,7 +7781,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7802,7 +7794,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE OCR.ALL_DATA_RD.L3_MISS.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7815,7 +7807,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7828,7 +7820,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7841,7 +7833,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7854,7 +7846,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7895,7 +7887,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7922,7 +7914,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7949,7 +7941,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7975,19 +7967,6 @@ "SampleAfterValue": "100003", "UMask": "0x1" }, - { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", - "Counter": "0,1,2,3", - "CounterHTOff": "0,1,2,3", - "EventCode": "0xB7, 0xBB", - "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", - "MSRIndex": "0x1a6,0x1a7", - "MSRValue": "0x0810000001", - "Offcore": "1", - "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "SampleAfterValue": "100003", - "UMask": "0x1" - }, { "BriefDescription": "This event is deprecated. Refer to new event OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", @@ -8003,7 +7982,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8026,7 +8005,7 @@ "UMask": "0x2" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8039,7 +8018,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8070,7 +8049,7 @@ "UMask": "0x10" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP OCR.DEMAND_RFO.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8083,7 +8062,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8236,7 +8215,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8249,7 +8228,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8262,7 +8241,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_DRAM & SNOOP_MISS_OR_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8289,7 +8268,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8344,7 +8323,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM OCR.ALL_DATA_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8357,7 +8336,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8426,7 +8405,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8439,7 +8418,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8452,7 +8431,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8520,7 +8499,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8630,7 +8609,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_MISS.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8713,7 +8692,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8726,7 +8705,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8766,7 +8745,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & REMOTE_HIT_FORWARD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD OCR.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8803,7 +8782,7 @@ "UMask": "0x10" }, { - "BriefDescription": "ALL_READS & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_READS.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8816,7 +8795,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8857,7 +8836,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8968,7 +8947,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8981,7 +8960,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8994,7 +8973,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9021,7 +9000,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9047,7 +9026,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9060,7 +9039,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9073,7 +9052,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_LOCAL_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_LOCAL_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9168,6 +9147,19 @@ "SampleAfterValue": "100003", "UMask": "0x1" }, + { + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0810000001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, { "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", "Counter": "0,1,2,3", @@ -9261,7 +9253,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9316,7 +9308,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9329,7 +9321,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP OCR.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9356,7 +9348,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9383,7 +9375,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9424,7 +9416,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9465,7 +9457,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9506,7 +9498,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_MISS & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_MISS.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9533,7 +9525,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_LOCAL_DRAM & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9546,7 +9538,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS_REMOTE_HOP1_DRAM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS_REMOTE_HOP1_DRAM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9601,7 +9593,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9614,7 +9606,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9641,7 +9633,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_MISS & REMOTE_HITM", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9678,7 +9670,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9705,7 +9697,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_MISS_REMOTE_HOP1_DRAM & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9718,7 +9710,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9745,7 +9737,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE OCR.ALL_READS.L3_MISS.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9772,7 +9764,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_MISS_REMOTE_HOP1_DRAM & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_MISS_REMOTE_HOP1_DRAM.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9785,7 +9777,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD OCR.OTHER.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9812,7 +9804,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_MISS & REMOTE_HITM", + "BriefDescription": "OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM OCR.ALL_PF_RFO.L3_MISS.REMOTE_HITM", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9839,7 +9831,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS_LOCAL_DRAM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_MISS_LOCAL_DRAM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9855,7 +9847,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", "MSRValue": "0x4", @@ -9883,7 +9876,8 @@ "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", - "EventCode": "0xCD", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", "MSRIndex": "0x3F6", "MSRValue": "0x8", @@ -9894,7 +9888,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_MISS & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_MISS.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -9905,5 +9899,19 @@ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "UMask": "0x1" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event OCR.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Deprecated": "1", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_HOP1_DRAM.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0110000001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json index 05d13d53c374..f77d78e90954 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/other.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/other.json @@ -1,6 +1,6 @@ [ { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -13,7 +13,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -26,7 +26,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -39,7 +39,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -52,7 +52,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -65,7 +65,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -78,7 +78,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -104,7 +104,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -117,7 +117,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -130,7 +130,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -143,7 +143,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -156,7 +156,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -169,7 +169,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -182,7 +182,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -195,7 +195,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -208,7 +208,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -221,7 +221,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -234,7 +234,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -247,7 +247,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -260,7 +260,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -273,7 +273,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -299,7 +299,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -312,7 +312,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -325,7 +325,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -338,7 +338,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -351,7 +351,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -364,7 +364,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -377,7 +377,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -390,7 +390,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -403,7 +403,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -416,7 +416,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -429,7 +429,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -442,7 +442,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_READS.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -478,7 +478,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -491,7 +491,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -517,7 +517,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -530,7 +530,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -543,7 +543,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -556,7 +556,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -582,7 +582,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -595,7 +595,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -621,7 +621,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -634,7 +634,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -660,7 +660,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -673,7 +673,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -699,7 +699,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -712,7 +712,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -738,7 +738,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -751,7 +751,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_MISS OCR.ALL_RFO.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -764,7 +764,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -777,7 +777,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -790,7 +790,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -803,7 +803,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -816,7 +816,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -829,7 +829,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -842,7 +842,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -855,7 +855,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -868,7 +868,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -881,7 +881,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -894,7 +894,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -946,7 +946,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -985,7 +985,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -998,7 +998,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1011,7 +1011,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1024,7 +1024,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1050,7 +1050,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1063,7 +1063,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1076,7 +1076,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1089,7 +1089,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1102,7 +1102,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1115,7 +1115,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1128,7 +1128,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1141,7 +1141,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1167,7 +1167,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1180,7 +1180,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP OCR.DEMAND_RFO.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1193,7 +1193,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1206,7 +1206,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1219,7 +1219,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1232,7 +1232,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1245,7 +1245,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1258,7 +1258,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1297,7 +1297,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1310,7 +1310,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & ANY_RESPONSE have any response type.", + "BriefDescription": "OCR.ALL_PF_DATA_RD.ANY_RESPONSE have any response type.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1323,7 +1323,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1336,7 +1336,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_MISS OCR.ALL_PF_RFO.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1349,7 +1349,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1362,7 +1362,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1388,7 +1388,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1401,7 +1401,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP OCR.ALL_RFO.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1414,7 +1414,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1427,7 +1427,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1440,7 +1440,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.ANY_SNOOP OCR.ALL_READS.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1453,7 +1453,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1466,7 +1466,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1479,7 +1479,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1492,7 +1492,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1505,7 +1505,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1518,7 +1518,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1531,7 +1531,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1544,7 +1544,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1557,7 +1557,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1570,7 +1570,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1583,7 +1583,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1596,7 +1596,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1609,7 +1609,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1622,7 +1622,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1635,7 +1635,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1661,7 +1661,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & ANY_RESPONSE have any response type.", + "BriefDescription": "OCR.ALL_RFO.ANY_RESPONSE have any response type.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1726,7 +1726,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1739,7 +1739,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1752,7 +1752,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP OCR.PF_L2_RFO.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1765,7 +1765,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1778,7 +1778,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1791,7 +1791,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1804,7 +1804,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1817,7 +1817,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_MISS OCR.ALL_READS.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1830,7 +1830,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1843,7 +1843,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1856,7 +1856,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1908,7 +1908,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1921,7 +1921,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1934,7 +1934,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1947,7 +1947,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1960,7 +1960,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_READS.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1973,7 +1973,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1986,7 +1986,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -1999,7 +1999,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2012,7 +2012,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2025,7 +2025,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2038,7 +2038,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2051,7 +2051,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2077,7 +2077,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2090,7 +2090,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2103,7 +2103,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2116,7 +2116,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2129,7 +2129,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2142,7 +2142,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2155,7 +2155,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2168,7 +2168,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2194,7 +2194,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2207,7 +2207,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2220,7 +2220,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2233,7 +2233,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2246,7 +2246,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2259,7 +2259,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2272,7 +2272,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2285,7 +2285,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2337,7 +2337,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2350,7 +2350,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2363,7 +2363,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2376,7 +2376,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2389,7 +2389,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2402,7 +2402,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2428,7 +2428,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2441,7 +2441,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2454,7 +2454,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2490,7 +2490,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2503,7 +2503,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2516,7 +2516,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2529,7 +2529,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2542,7 +2542,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2555,7 +2555,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2568,7 +2568,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2581,7 +2581,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2594,7 +2594,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2607,7 +2607,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2620,7 +2620,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & ANY_RESPONSE have any response type.", + "BriefDescription": "OCR.ALL_PF_RFO.ANY_RESPONSE have any response type.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2633,7 +2633,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2646,7 +2646,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2659,7 +2659,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2672,7 +2672,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2685,7 +2685,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2698,7 +2698,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2711,7 +2711,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2724,7 +2724,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2747,7 +2747,7 @@ "UMask": "0x40" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2773,7 +2773,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2799,7 +2799,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2812,7 +2812,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2825,7 +2825,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2838,7 +2838,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2851,7 +2851,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2877,7 +2877,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2890,7 +2890,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2916,7 +2916,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2929,7 +2929,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2942,7 +2942,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2955,7 +2955,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2968,7 +2968,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -2994,7 +2994,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3020,7 +3020,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3046,7 +3046,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3059,7 +3059,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3072,7 +3072,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3085,7 +3085,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3098,7 +3098,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3111,7 +3111,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3124,7 +3124,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3137,7 +3137,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & ANY_RESPONSE have any response type.", + "BriefDescription": "OCR.ALL_DATA_RD.ANY_RESPONSE have any response type.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3150,7 +3150,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3163,7 +3163,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3189,7 +3189,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3202,7 +3202,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3215,7 +3215,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3241,7 +3241,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3254,7 +3254,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3267,7 +3267,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3280,7 +3280,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD OCR.OTHER.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3306,7 +3306,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3319,7 +3319,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3332,7 +3332,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3345,7 +3345,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3358,7 +3358,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3371,7 +3371,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3384,7 +3384,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3397,7 +3397,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3423,7 +3423,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3436,7 +3436,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3449,7 +3449,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3462,7 +3462,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3475,7 +3475,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3488,7 +3488,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3501,7 +3501,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3514,7 +3514,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3527,7 +3527,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3540,7 +3540,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3553,7 +3553,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3566,7 +3566,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3579,7 +3579,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3592,7 +3592,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3605,7 +3605,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3618,7 +3618,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3631,7 +3631,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3644,7 +3644,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3657,7 +3657,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3670,7 +3670,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3696,7 +3696,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3709,7 +3709,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3735,7 +3735,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3748,7 +3748,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3771,7 +3771,7 @@ "UMask": "0x2" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3797,7 +3797,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3810,7 +3810,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3823,7 +3823,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3836,7 +3836,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3849,7 +3849,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3875,7 +3875,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3888,7 +3888,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3914,7 +3914,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3927,7 +3927,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3940,7 +3940,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3953,7 +3953,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3966,7 +3966,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3979,7 +3979,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -3992,7 +3992,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4005,7 +4005,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4018,7 +4018,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4031,7 +4031,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4044,7 +4044,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4057,7 +4057,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4070,7 +4070,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4092,7 +4092,7 @@ "UMask": "0x4" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4118,7 +4118,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4131,7 +4131,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4144,7 +4144,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4157,7 +4157,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4170,7 +4170,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4196,7 +4196,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4209,7 +4209,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4235,7 +4235,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4248,7 +4248,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4261,7 +4261,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4274,7 +4274,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4287,7 +4287,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP OCR.ALL_RFO.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4300,7 +4300,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4313,7 +4313,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP OCR.ALL_RFO.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4326,7 +4326,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4339,7 +4339,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4352,7 +4352,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4365,7 +4365,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4378,7 +4378,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4391,7 +4391,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4404,7 +4404,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4417,7 +4417,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_DATA_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4430,7 +4430,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4443,7 +4443,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4469,7 +4469,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4495,7 +4495,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4521,7 +4521,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4534,7 +4534,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4560,7 +4560,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4573,7 +4573,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4586,7 +4586,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP OCR.ALL_READS.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4599,7 +4599,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4612,7 +4612,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4625,7 +4625,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4638,7 +4638,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4651,7 +4651,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4677,7 +4677,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & SNOOP_HIT_WITH_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4690,7 +4690,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4716,7 +4716,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_HIT.SNOOP_NONE OCR.ALL_READS.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4729,7 +4729,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4742,7 +4742,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4755,7 +4755,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4768,7 +4768,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4781,7 +4781,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4794,7 +4794,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4807,7 +4807,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4820,7 +4820,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4833,7 +4833,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.ANY_SNOOP OCR.ALL_READS.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4846,7 +4846,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED OCR.ALL_READS.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4859,7 +4859,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & ANY_RESPONSE have any response type.", + "BriefDescription": "OCR.ALL_READS.ANY_RESPONSE have any response type.", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4872,7 +4872,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4885,7 +4885,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4898,7 +4898,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4911,7 +4911,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4924,7 +4924,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4937,7 +4937,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4950,7 +4950,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4963,7 +4963,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -4976,7 +4976,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5015,7 +5015,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5041,7 +5041,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5054,7 +5054,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5067,7 +5067,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5080,7 +5080,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5093,7 +5093,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5106,7 +5106,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5119,7 +5119,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.ANY_SNOOP OCR.ALL_READS.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5132,7 +5132,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5145,7 +5145,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5158,7 +5158,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5171,7 +5171,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5184,7 +5184,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5197,7 +5197,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5210,7 +5210,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5236,7 +5236,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5249,7 +5249,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5262,7 +5262,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5275,7 +5275,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.SNOOP_NONE OCR.ALL_RFO.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5288,7 +5288,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5301,7 +5301,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5314,7 +5314,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5327,7 +5327,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5340,7 +5340,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5353,7 +5353,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5366,7 +5366,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5379,7 +5379,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5401,7 +5401,7 @@ "UMask": "0x2" }, { - "BriefDescription": "ALL_RFO & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5414,7 +5414,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5427,7 +5427,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5440,7 +5440,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5476,7 +5476,7 @@ "UMask": "0x20" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE OCR.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5489,7 +5489,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5502,7 +5502,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5515,7 +5515,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5528,7 +5528,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5541,7 +5541,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5554,7 +5554,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5567,7 +5567,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5580,7 +5580,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5593,7 +5593,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5606,7 +5606,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5619,7 +5619,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5645,7 +5645,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_READS.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5658,7 +5658,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5671,7 +5671,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5684,7 +5684,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & SNOOP_NONE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5697,7 +5697,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP OCR.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5710,7 +5710,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5723,7 +5723,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5736,7 +5736,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5749,7 +5749,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5762,7 +5762,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5775,7 +5775,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD OCR.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5788,7 +5788,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5801,7 +5801,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5814,7 +5814,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5827,7 +5827,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5840,7 +5840,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_DATA_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5853,7 +5853,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5866,7 +5866,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5879,7 +5879,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP OCR.ALL_PF_DATA_RD.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5905,7 +5905,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5918,7 +5918,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP OCR.PF_L3_RFO.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5931,7 +5931,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5957,7 +5957,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5970,7 +5970,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5983,7 +5983,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -5996,7 +5996,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_E.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6009,7 +6009,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6022,7 +6022,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6035,7 +6035,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6048,7 +6048,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6074,7 +6074,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6100,7 +6100,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6113,7 +6113,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6139,7 +6139,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6152,7 +6152,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6165,7 +6165,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6191,7 +6191,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6204,7 +6204,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6217,7 +6217,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6230,7 +6230,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6243,7 +6243,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6256,7 +6256,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6269,7 +6269,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.ANY_SNOOP OCR.OTHER.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6282,7 +6282,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6295,7 +6295,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6308,7 +6308,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6321,7 +6321,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6347,7 +6347,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & PMM_HIT_LOCAL_PMM & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.PMM_HIT_LOCAL_PMM.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6360,7 +6360,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6373,7 +6373,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6386,7 +6386,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6412,7 +6412,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6438,7 +6438,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & SNOOP_MISS", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6451,7 +6451,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HIT_WITH_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6464,7 +6464,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_PF_RFO.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6477,7 +6477,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6490,7 +6490,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6529,7 +6529,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_READS.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6542,7 +6542,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6555,7 +6555,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6568,7 +6568,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED OCR.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6620,7 +6620,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6633,7 +6633,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_READS.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6659,7 +6659,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_READS.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6672,7 +6672,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP OCR.ALL_DATA_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6685,7 +6685,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED OCR.ALL_RFO.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6698,7 +6698,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6711,7 +6711,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6763,7 +6763,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED OCR.OTHER.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6776,7 +6776,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE OCR.ALL_PF_RFO.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6789,7 +6789,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6815,7 +6815,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP OCR.ALL_RFO.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6828,7 +6828,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6841,7 +6841,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6867,7 +6867,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6880,7 +6880,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6893,7 +6893,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_M.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6906,7 +6906,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6919,7 +6919,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6958,7 +6958,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6971,7 +6971,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6984,7 +6984,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -6997,7 +6997,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE OCR.ALL_PF_DATA_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7019,7 +7019,7 @@ "UMask": "0x8" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7032,7 +7032,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7045,7 +7045,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7058,7 +7058,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7071,7 +7071,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_M.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7084,7 +7084,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7097,7 +7097,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7110,7 +7110,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP OCR.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7123,7 +7123,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7136,7 +7136,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7149,7 +7149,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7162,7 +7162,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7175,7 +7175,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7188,7 +7188,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7201,7 +7201,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7214,7 +7214,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7227,7 +7227,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_PF_RFO.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7240,7 +7240,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD OCR.ALL_RFO.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7253,7 +7253,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7276,7 +7276,7 @@ "UMask": "0x18" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7289,7 +7289,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7315,7 +7315,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7328,7 +7328,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NONE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7341,7 +7341,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7354,7 +7354,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7367,7 +7367,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7380,7 +7380,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7393,7 +7393,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & ANY_SNOOP", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP OCR.ALL_RFO.L3_HIT_E.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7406,7 +7406,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_RFO.L3_HIT_S.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7419,7 +7419,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_DATA_RD.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7432,7 +7432,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7445,7 +7445,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7458,7 +7458,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE OCR.ALL_PF_RFO.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7520,7 +7520,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE OCR.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7533,7 +7533,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7546,7 +7546,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7572,7 +7572,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT & SNOOP_NONE", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7585,7 +7585,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD OCR.ALL_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7598,7 +7598,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_PF_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7611,7 +7611,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED OCR.ALL_PF_DATA_RD.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7637,7 +7637,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7650,7 +7650,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7676,7 +7676,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & SUPPLIER_NONE & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.SUPPLIER_NONE.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7689,7 +7689,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7702,7 +7702,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7728,7 +7728,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_E.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7741,7 +7741,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7754,7 +7754,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7767,7 +7767,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7780,7 +7780,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_MISS", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS OCR.ALL_DATA_RD.L3_HIT.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7793,7 +7793,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7806,7 +7806,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT & ANY_SNOOP", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP OCR.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7832,7 +7832,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE OCR.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7845,7 +7845,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP OCR.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7858,7 +7858,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7884,7 +7884,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7897,7 +7897,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7910,7 +7910,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_S & HITM_OTHER_CORE", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_S.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7949,7 +7949,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7962,7 +7962,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -7975,7 +7975,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_M & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_M.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8001,7 +8001,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8014,7 +8014,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD OCR.ALL_READS.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8027,7 +8027,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE OCR.ALL_RFO.L3_HIT_E.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8053,7 +8053,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand data writes (RFOs) DEMAND_RFO & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all demand data writes (RFOs) OCR.DEMAND_RFO.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8092,7 +8092,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & NO_SNOOP_NEEDED", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED OCR.ALL_DATA_RD.SUPPLIER_NONE.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8105,7 +8105,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8118,7 +8118,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_F & ANY_SNOOP", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP OCR.ALL_PF_RFO.L3_HIT_F.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8131,7 +8131,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED OCR.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8144,7 +8144,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8157,7 +8157,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8170,7 +8170,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8183,7 +8183,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT & HITM_OTHER_CORE", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT.HITM_OTHER_CORE OCR.OTHER.L3_HIT.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8196,7 +8196,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8222,7 +8222,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8235,7 +8235,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_F & HITM_OTHER_CORE", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_F.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8257,7 +8257,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8270,7 +8270,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all demand code reads DEMAND_CODE_RD & L3_HIT_M & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all demand code reads OCR.DEMAND_CODE_RD.L3_HIT_M.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8283,7 +8283,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & L3_HIT_S & SNOOP_NONE", + "BriefDescription": "OCR.ALL_PF_RFO.L3_HIT_S.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8309,7 +8309,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8322,7 +8322,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8348,7 +8348,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_E.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8361,7 +8361,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8374,7 +8374,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs PF_L2_RFO & L3_HIT_S & NO_SNOOP_NEEDED", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs OCR.PF_L2_RFO.L3_HIT_S.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8387,7 +8387,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & L3_HIT_F & SNOOP_NONE", + "BriefDescription": "OCR.ALL_RFO.L3_HIT_F.SNOOP_NONE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8400,7 +8400,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8413,7 +8413,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts demand data reads DEMAND_DATA_RD & L3_HIT_M & NO_SNOOP_NEEDED", + "BriefDescription": "Counts demand data reads OCR.DEMAND_DATA_RD.L3_HIT_M.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8426,7 +8426,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_S & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_S.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8439,7 +8439,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HITM_OTHER_CORE", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE OCR.ALL_DATA_RD.SUPPLIER_NONE.HITM_OTHER_CORE", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8452,7 +8452,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD OCR.ALL_PF_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8465,7 +8465,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts any other requests OTHER & L3_HIT_F & NO_SNOOP_NEEDED", + "BriefDescription": "Counts any other requests OCR.OTHER.L3_HIT_F.NO_SNOOP_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8478,7 +8478,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & ANY_SNOOP", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.ANY_SNOOP OCR.ALL_READS.L3_HIT_S.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8504,7 +8504,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8517,7 +8517,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD OCR.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8543,7 +8543,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8556,7 +8556,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8569,7 +8569,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs PF_L3_RFO & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs OCR.PF_L3_RFO.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8582,7 +8582,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads PF_L3_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads OCR.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8595,7 +8595,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_PF_RFO & PMM_HIT_LOCAL_PMM & SNOOP_NOT_NEEDED", + "BriefDescription": "OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED OCR.ALL_PF_RFO.PMM_HIT_LOCAL_PMM.SNOOP_NOT_NEEDED", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8608,7 +8608,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & HIT_OTHER_CORE_FWD", + "BriefDescription": "OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD OCR.ALL_DATA_RD.SUPPLIER_NONE.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8621,7 +8621,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests PF_L1D_AND_SW & L3_HIT_E & HIT_OTHER_CORE_FWD", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests OCR.PF_L1D_AND_SW.L3_HIT_E.HIT_OTHER_CORE_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8634,7 +8634,7 @@ "UMask": "0x1" }, { - "BriefDescription": "Counts prefetch (that bring data to L2) data reads PF_L2_DATA_RD & L3_HIT_F & HIT_OTHER_CORE_NO_FWD", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads OCR.PF_L2_DATA_RD.L3_HIT_F.HIT_OTHER_CORE_NO_FWD", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", @@ -8647,7 +8647,7 @@ "UMask": "0x1" }, { - "BriefDescription": "ALL_READS & L3_HIT_S & SNOOP_MISS", + "BriefDescription": "OCR.ALL_READS.L3_HIT_S.SNOOP_MISS", "Counter": "0,1,2,3", "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json index 5ec668f46ac1..023f31c72a42 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/pipeline.json @@ -826,16 +826,6 @@ "SampleAfterValue": "2000003", "UMask": "0x2" }, - { - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", - "Counter": "0,1,2,3", - "CounterHTOff": "0,1,2,3,4,5,6,7", - "EventCode": "0xE6", - "EventName": "BACLEARS.ANY", - "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", - "SampleAfterValue": "100003", - "UMask": "0x1" - }, { "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json index 3fb5cdce842f..4ba9e6d9f25e 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json @@ -90,32 +90,32 @@ "Unit": "iMC" }, { - "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB/sec). Derived from unc_m_pmm_rpq_inserts", + "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB). Derived from unc_m_pmm_rpq_inserts", "Counter": "0,1,2,3", "EventCode": "0xE3", "EventName": "UNC_M_PMM_BANDWIDTH.READ", "PerPkg": "1", - "ScaleUnit": "6.103515625E-5MB/sec", + "ScaleUnit": "6.103515625E-5MB", "Unit": "iMC" }, { - "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB/sec). Derived from unc_m_pmm_wpq_inserts", + "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB). Derived from unc_m_pmm_wpq_inserts", "Counter": "0,1,2,3", "EventCode": "0xE7", "EventName": "UNC_M_PMM_BANDWIDTH.WRITE", "PerPkg": "1", - "ScaleUnit": "6.103515625E-5MB/sec", + "ScaleUnit": "6.103515625E-5MB", "Unit": "iMC" }, { - "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB/sec). Derived from unc_m_pmm_rpq_inserts", + "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB). Derived from unc_m_pmm_rpq_inserts", "Counter": "0,1,2,3", "EventCode": "0xE3", "EventName": "UNC_M_PMM_BANDWIDTH.TOTAL", "MetricExpr": "UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS", "MetricName": "UNC_M_PMM_BANDWIDTH.TOTAL", "PerPkg": "1", - "ScaleUnit": "6.103515625E-5MB/sec", + "ScaleUnit": "6.103515625E-5MB", "Unit": "iMC" }, { diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json index df355ba7acc8..0cd083839e75 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-other.json @@ -537,6 +537,27 @@ "UMask": "0x10", "Unit": "CHA" }, + { + "BriefDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC", + "Counter": "0,1,2,3", + "EventCode": "0x35", + "EventName": "UNC_CHA_TOR_INSERTS.IA_MISS_DRD", + "Filter": "config1=0x40433", + "PerPkg": "1", + "PublicDescription": "TOR Inserts : DRds issued by iA Cores that Missed the LLC : Counts the number of entries successfully inserted into the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.", + "UMask": "0x21", + "Unit": "CHA" + }, + { + "BriefDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC", + "EventCode": "0x36", + "EventName": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD", + "Filter": "config1=0x40433", + "PerPkg": "1", + "PublicDescription": "TOR Occupancy : DRds issued by iA Cores that Missed the LLC : For each cycle, this event accumulates the number of valid entries in the TOR that match qualifications specified by the subevent. Does not include addressless requests such as locks and interrupts.", + "UMask": "0x21", + "Unit": "CHA" + }, { "BriefDescription": "Clockticks of the IIO Traffic Controller", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 25b06cf98747..2f2a209e87e1 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -38,3 +38,4 @@ GenuineIntel-6-7E,v1,icelake,core GenuineIntel-6-86,v1,tremontx,core AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core +AuthenticAMD-25-[[:xdigit:]]+,v1,amdzen2,core diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json index 24df183693fa..e750a21976f1 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json @@ -1,1663 +1,1675 @@ [ { - "EventCode": "0x24", - "UMask": "0x21", - "BriefDescription": "Demand Data Read miss L2, no rejects", + "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", - "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x22", - "BriefDescription": "RFO requests that miss L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_MISS", - "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x24", - "BriefDescription": "L2 cache misses when fetching instructions", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "PublicDescription": "Counts L2 cache misses when fetching instructions.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x27", - "BriefDescription": "Demand requests that miss L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "PublicDescription": "Demand requests that miss L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x38", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.PF_MISS", - "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x3f", - "BriefDescription": "All requests that miss L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.MISS", - "PublicDescription": "All requests that miss L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xc1", - "BriefDescription": "Demand Data Read requests that hit L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xc2", - "BriefDescription": "RFO requests that hit L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_HIT", - "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xc4", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xd8", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.PF_HIT", - "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xe1", - "BriefDescription": "Demand Data Read requests", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xe2", - "BriefDescription": "RFO requests to L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_RFO", - "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xe4", - "BriefDescription": "L2 code requests", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "PublicDescription": "Counts the total number of L2 code requests.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xe7", - "BriefDescription": "Demand requests to L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "PublicDescription": "Demand requests to L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xf8", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_PF", - "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xff", - "BriefDescription": "All L2 requests", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.REFERENCES", - "PublicDescription": "All L2 requests.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x2E", - "UMask": "0x41", - "BriefDescription": "Core-originated cacheable demand requests missed L3", - "Counter": "0,1,2,3", - "EventName": "LONGEST_LAT_CACHE.MISS", - "Errata": "SKL057", - "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x2E", - "UMask": "0x4f", - "BriefDescription": "Core-originated cacheable demand requests that refer to L3", - "Counter": "0,1,2,3", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "Errata": "SKL057", - "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding.", - "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "L1D miss outstandings duration in cycles", - "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.PENDING", - "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "UMask": "0x2", - "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", - "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.FB_FULL", - "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x51", - "UMask": "0x1", - "BriefDescription": "L1D data line replacements", - "Counter": "0,1,2,3", - "EventName": "L1D.REPLACEMENT", - "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "CounterMask": "1", - "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", - "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "CounterMask": "6", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x2", - "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", - "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x2", - "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", - "CounterMask": "1", - "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x4", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", - "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x4", - "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "CounterMask": "1", - "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x8", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "CounterMask": "1", - "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x8", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB0", - "UMask": "0x1", - "BriefDescription": "Demand Data Read requests sent to uncore", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", - "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB0", - "UMask": "0x2", - "BriefDescription": "Cacheable and noncachaeble code read requests", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", - "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB0", - "UMask": "0x4", "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xB0", - "UMask": "0x8", - "BriefDescription": "Demand and prefetch data reads", + "BriefDescription": "Counts all demand code reads that have any response type.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", - "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB0", - "UMask": "0x80", - "BriefDescription": "Any memory transaction that reached the SQ.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", - "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB2", - "UMask": "0x1", - "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", - "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010004", + "Offcore": "1", "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "EventCode": "0xD0", - "UMask": "0x11", - "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "Retired load instructions that miss the STLB.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.PF_MISS", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x38" + }, + { + "BriefDescription": "Demand requests that miss L2 cache", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "PublicDescription": "Demand requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x27" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "EventCode": "0xD0", - "UMask": "0x12", - "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", - "PublicDescription": "Retired store instructions that miss the STLB.", - "SampleAfterValue": "100003", - "L1_Hit_Indication": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD0", - "UMask": "0x21", - "BriefDescription": "Retired load instructions with locked access. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.LOCK_LOADS", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD0", - "UMask": "0x41", - "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD0", - "UMask": "0x42", - "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.SPLIT_STORES", - "SampleAfterValue": "100003", - "L1_Hit_Indication": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD0", - "UMask": "0x81", - "BriefDescription": "All retired load instructions. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.ALL_LOADS", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD0", - "UMask": "0x82", - "BriefDescription": "All retired store instructions. (Precise Event)", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_INST_RETIRED.ALL_STORES", - "PublicDescription": "All retired store instructions.", - "SampleAfterValue": "2000003", - "L1_Hit_Indication": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD1", - "UMask": "0x1", - "BriefDescription": "Retired load instructions with L1 cache hits as data sources", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD1", - "UMask": "0x2", - "BriefDescription": "Retired load instructions with L2 cache hits as data sources", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L2_HIT", - "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD1", - "UMask": "0x4", "BriefDescription": "Retired load instructions with L3 cache hits as data sources", - "Data_LA": "1", - "PEBS": "1", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", "EventName": "MEM_LOAD_RETIRED.L3_HIT", - "PublicDescription": "Retired load instructions with L3 cache hits as data sources.", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.", "SampleAfterValue": "50021", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4" }, { - "EventCode": "0xD1", - "UMask": "0x8", - "BriefDescription": "Retired load instructions missed L1 cache as data sources", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L1_MISS", - "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD1", - "UMask": "0x10", - "BriefDescription": "Retired load instructions missed L2 cache as data sources", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L2_MISS", - "PublicDescription": "Retired load instructions missed L2 cache as data sources.", - "SampleAfterValue": "50021", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD1", - "UMask": "0x20", - "BriefDescription": "Retired load instructions missed L3 cache as data sources", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.L3_MISS", - "PublicDescription": "Retired load instructions missed L3 cache as data sources.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD1", - "UMask": "0x40", - "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD2", - "UMask": "0x1", - "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", - "SampleAfterValue": "20011", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD2", - "UMask": "0x2", - "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", - "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", - "SampleAfterValue": "20011", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD2", - "UMask": "0x4", - "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", - "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", - "SampleAfterValue": "20011", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD2", - "UMask": "0x8", - "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", - "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD3", - "UMask": "0x1", - "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD3", - "UMask": "0x2", - "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD3", - "UMask": "0x4", - "BriefDescription": "Retired load instructions whose data sources was remote HITM", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD3", - "UMask": "0x8", - "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xD4", - "UMask": "0x4", - "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", - "Data_LA": "1", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "MEM_LOAD_MISC_RETIRED.UC", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xF0", - "UMask": "0x40", "BriefDescription": "L2 writebacks that access L2 cache", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF0", "EventName": "L2_TRANS.L2_WB", "PublicDescription": "Counts L2 writebacks that access L2 cache.", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0xF1", - "UMask": "0x1f", "BriefDescription": "L2 cache lines filling L2", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF1", "EventName": "L2_LINES_IN.ALL", "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1f" + }, + { + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Demand Data Read requests sent to uncore", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions missed L3 cache as data sources", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.", + "SampleAfterValue": "100007", + "UMask": "0x20" + }, + { + "BriefDescription": "All retired store instructions.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x82" }, { - "EventCode": "0xF2", - "UMask": "0x1", "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", "EventName": "L2_LINES_OUT.SILENT", - "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch data reads that hit in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Core-originated cacheable demand requests missed L3", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL057", + "EventCode": "0x2E", + "EventName": "LONGEST_LAT_CACHE.MISS", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x41" + }, + { + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_PF", + "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", + "SampleAfterValue": "200003", + "UMask": "0xf8" + }, + { + "BriefDescription": "Retired load instructions whose data sources was remote HITM", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM", + "PEBS": "1", + "PublicDescription": "Retired load instructions whose data sources was remote HITM.", + "SampleAfterValue": "100007", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts all prefetch data reads that have any response type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "RFO requests that miss L2 cache", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x22" }, { - "EventCode": "0xF2", - "UMask": "0x2", "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", "EventName": "L2_LINES_OUT.NON_SILENT", - "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", + "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped.", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xF2", - "UMask": "0x4", - "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", - "Deprecated": "1", - "Counter": "0,1,2,3", - "EventName": "L2_LINES_OUT.USELESS_PREF", - "PublicDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xF2", - "UMask": "0x4", "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF2", "EventName": "L2_LINES_OUT.USELESS_HWPF", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" + }, + { + "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "All requests that miss L2 cache", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.MISS", + "PublicDescription": "All requests that miss L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0x3f" + }, + { + "BriefDescription": "L2 code requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "PublicDescription": "Counts the total number of L2 code requests.", + "SampleAfterValue": "200003", + "UMask": "0xe4" + }, + { + "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD", + "PEBS": "1", + "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.", + "SampleAfterValue": "100007", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand & prefetch data reads that have any response type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "SampleAfterValue": "20011", + "UMask": "0x2" + }, + { + "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions missed L1 cache as data sources", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "L2 cache misses when fetching instructions", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", + "SampleAfterValue": "200003", + "UMask": "0x24" + }, + { + "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that hit in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Demand requests to L2 cache", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "PublicDescription": "Demand requests to L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xe7" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "AnyThread": "1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Core-originated cacheable demand requests that refer to L3", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL057", + "EventCode": "0x2E", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", + "SampleAfterValue": "100003", + "UMask": "0x4f" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions that miss the STLB.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", + "PEBS": "1", + "SampleAfterValue": "100003", + "UMask": "0x11" + }, + { + "BriefDescription": "Counts demand data reads that have any response type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "L1D data line replacements", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x51", + "EventName": "L1D.REPLACEMENT", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD4", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x4" + }, + { + "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.", + "SampleAfterValue": "100007", + "UMask": "0x40" + }, + { + "BriefDescription": "This event is deprecated. Refer to new event L2_LINES_OUT.USELESS_HWPF", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Deprecated": "1", + "EventCode": "0xF2", + "EventName": "L2_LINES_OUT.USELESS_PREF", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.PF_HIT", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xd8" + }, + { + "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Demand Data Read miss L2, no rejects", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", + "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", + "SampleAfterValue": "200003", + "UMask": "0x21" + }, + { + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "All retired load instructions.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "PEBS": "1", + "SampleAfterValue": "2000003", + "UMask": "0x81" + }, + { + "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "PEBS": "1", + "SampleAfterValue": "20011", + "UMask": "0x1" + }, + { + "BriefDescription": "Demand Data Read requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", + "SampleAfterValue": "200003", + "UMask": "0xe1" + }, + { + "BriefDescription": "All L2 requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.REFERENCES", + "PublicDescription": "All L2 requests.", + "SampleAfterValue": "200003", + "UMask": "0xff" + }, + { + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xF4", - "UMask": "0x10", "BriefDescription": "Number of cache line split locks sent to uncore.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xF4", "EventName": "SQ_MISC.SPLIT_LOCK", "PublicDescription": "Counts the number of cache line split locks sent to the uncore.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads have any response type.", - "MSRValue": "0x0000010001", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", + "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD TBD", - "MSRValue": "0x01003C0001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD TBD", - "MSRValue": "0x04003C0001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD TBD", - "MSRValue": "0x10003C0001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD TBD", - "MSRValue": "0x3F803C0001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) have any response type.", - "MSRValue": "0x0000010002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", - "MSRValue": "0x01003C0002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", - "MSRValue": "0x04003C0002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", - "MSRValue": "0x10003C0002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", - "MSRValue": "0x3F803C0002", - "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", - "MSRValue": "0x0000010004", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", + "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "MSRValue": "0x01003C0004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "MSRValue": "0x04003C0004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "MSRValue": "0x10003C0004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "MSRValue": "0x3F803C0004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads have any response type.", - "MSRValue": "0x0000010010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "MSRValue": "0x01003C0010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "MSRValue": "0x04003C0010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "MSRValue": "0x10003C0010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "MSRValue": "0x3F803C0010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.", - "MSRValue": "0x0000010020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "MSRValue": "0x01003C0020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "MSRValue": "0x04003C0020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "MSRValue": "0x10003C0020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "MSRValue": "0x3F803C0020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.", - "MSRValue": "0x0000010080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "MSRValue": "0x01003C0080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "MSRValue": "0x04003C0080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "MSRValue": "0x10003C0080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "MSRValue": "0x3F803C0080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.", - "MSRValue": "0x0000010100", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "MSRValue": "0x01003C0100", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "MSRValue": "0x04003C0100", - "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "MSRValue": "0x10003C0100", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", + "SampleAfterValue": "200003", + "UMask": "0xc4" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "MSRValue": "0x3F803C0100", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", + "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.", - "MSRValue": "0x0000010400", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "MSRValue": "0x01003C0400", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "MSRValue": "0x04003C0400", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "MSRValue": "0x10003C0400", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "MSRValue": "0x3F803C0400", - "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD have any response type.", - "MSRValue": "0x0000010490", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD have any response type.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x01003C0490", + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", + "PEBS": "1", + "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x04003C0490", + "BriefDescription": "RFO requests that hit L2 cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "SampleAfterValue": "200003", + "UMask": "0xc2" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x10003C0490", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3F803C0490", + "BriefDescription": "L1D miss outstandings duration in cycles", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.PENDING", + "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD have any response type.", - "MSRValue": "0x0000010120", + "BriefDescription": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", + "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x01003C0120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x04003C0120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x10003C0120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3F803C0120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD have any response type.", - "MSRValue": "0x0000010491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x01003C0491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x04003C0491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x10003C0491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3F803C0491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD have any response type.", - "MSRValue": "0x0000010122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD have any response type.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x01003C0122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x04003C0122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x10003C0122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3F803C0122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads", - "MSRValue": "0x08007C0001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts demand data reads", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs)", - "MSRValue": "0x08007C0002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts all demand data writes (RFOs)", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "MSRValue": "0x08007C0004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads", - "MSRValue": "0x08007C0010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs", - "MSRValue": "0x08007C0020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads", - "MSRValue": "0x08007C0080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs", - "MSRValue": "0x08007C0100", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests", - "MSRValue": "0x08007C0400", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD", - "MSRValue": "0x08007C0490", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD", - "MSRValue": "0x08007C0120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD", - "MSRValue": "0x08007C0491", - "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "TBD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD", - "MSRValue": "0x08007C0122", + "BriefDescription": "Demand Data Read requests that hit L2 cache", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", - "PublicDescription": "TBD", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "Counts the number of demand Data Read requests, initiated by load instructions, that hit L2 cache", + "SampleAfterValue": "200003", + "UMask": "0xc1" + }, + { + "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", + "SampleAfterValue": "20011", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired store instructions that split across a cacheline boundary.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.SPLIT_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", + "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", + "SampleAfterValue": "100003", + "UMask": "0x42" + }, + { + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Any memory transaction that reached the SQ.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", + "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", + "SampleAfterValue": "100003", + "UMask": "0x80" + }, + { + "BriefDescription": "Cacheable and noncachaeble code read requests", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", + "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x48", + "EventName": "L1D_PEND_MISS.FB_FULL", + "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand code reads that hit in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch RFOs that hit in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions with locked access.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.LOCK_LOADS", + "PEBS": "1", + "SampleAfterValue": "100007", + "UMask": "0x21" + }, + { + "BriefDescription": "Demand and prefetch data reads", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD3", + "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.", + "SampleAfterValue": "100007", + "UMask": "0x1" + }, + { + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions that split across a cacheline boundary.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", + "SampleAfterValue": "100003", + "UMask": "0x41" + }, + { + "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB2", + "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", + "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3F803C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch RFOs that have any response type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0000010120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x10003C0002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired store instructions that miss the STLB.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD0", + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "L1_Hit_Indication": "1", + "PEBS": "1", + "SampleAfterValue": "100003", + "UMask": "0x12" + }, + { + "BriefDescription": "RFO requests to L2 cache", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x24", + "EventName": "L2_RQSTS.ALL_RFO", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", + "SampleAfterValue": "200003", + "UMask": "0xe2" + }, + { + "BriefDescription": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x08003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired load instructions missed L2 cache as data sources", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xD1", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "PEBS": "1", + "PublicDescription": "Retired load instructions missed L2 cache as data sources.", + "SampleAfterValue": "50021", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x01003C0001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x04003C0122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json index c5d0babe89fc..e197cde15047 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json @@ -1,85 +1,85 @@ [ { - "EventCode": "0xC7", - "UMask": "0x1", - "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", - "UMask": "0x2", - "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC7", - "UMask": "0x4", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xC7", - "UMask": "0x8", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.", "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC7", - "UMask": "0x10", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC7", - "UMask": "0x20", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC7", - "UMask": "0x40", - "BriefDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8)", - "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0xC7", - "UMask": "0x80", - "BriefDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16)", + "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x80" + }, + { + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "EventCode": "0xCA", - "UMask": "0x1e", "BriefDescription": "Cycles with any input/output SSE or FP assist", "Counter": "0,1,2,3", - "EventName": "FP_ASSIST.ANY", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", + "EventCode": "0xCA", + "EventName": "FP_ASSIST.ANY", "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1e" + }, + { + "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC7", + "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "UMask": "0x8" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json index 4dc583cfb545..cdf95bd2a73d 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json @@ -1,482 +1,516 @@ [ { - "EventCode": "0x79", - "UMask": "0x4", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", - "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x4", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", - "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_UOPS", - "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", - "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_UOPS", - "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x10", - "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_DSB_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x18", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", - "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", - "CounterMask": "1", - "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x18", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", - "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", - "CounterMask": "4", - "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x20", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_MITE_UOPS", - "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x24", - "BriefDescription": "Cycles MITE is delivering any Uop", - "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", - "CounterMask": "1", - "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x24", - "BriefDescription": "Cycles MITE is delivering 4 Uops", - "Counter": "0,1,2,3", - "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", - "CounterMask": "4", - "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_UOPS", - "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_SWITCHES", - "CounterMask": "1", - "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x80", - "UMask": "0x4", "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x80", "EventName": "ICACHE_16B.IFDATA_STALL", "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0x83", - "UMask": "0x1", - "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "BriefDescription": "Retired Instructions who experienced iTLB true miss.", "Counter": "0,1,2,3", - "EventName": "ICACHE_64B.IFTAG_HIT", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x83", - "UMask": "0x2", - "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", - "Counter": "0,1,2,3", - "EventName": "ICACHE_64B.IFTAG_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x83", - "UMask": "0x4", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", - "Counter": "0,1,2,3", - "EventName": "ICACHE_64B.IFTAG_STALL", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "Invert": "1", - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", - "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", - "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", - "CounterMask": "1", - "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", - "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", - "CounterMask": "2", - "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", - "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", - "CounterMask": "3", - "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", - "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", - "CounterMask": "4", - "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x9C", - "UMask": "0x1", - "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", - "Counter": "0,1,2,3", - "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xAB", - "UMask": "0x2", - "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", - "Counter": "0,1,2,3", - "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3", "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x400406", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x200206", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x400206", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.", - "PEBS": "1", - "MSRValue": "0x15", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.STLB_MISS", - "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.", - "PEBS": "1", - "MSRValue": "0x14", - "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.ITLB_MISS", "MSRIndex": "0x3F7", + "MSRValue": "0x14", + "PEBS": "1", "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", - "TakenAlone": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.", - "PEBS": "1", - "MSRValue": "0x13", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.L2_MISS", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { + "CounterHTOff": "0,1,2,3", "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.", - "PEBS": "1", - "MSRValue": "0x12", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.L1I_MISS", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.", - "PEBS": "1", - "MSRValue": "0x11", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.DSB_MISS", - "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x300206", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x100206", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", - "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x420006", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x410006", - "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", - "MSRIndex": "0x3F7", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x408006", - "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", "MSRIndex": "0x3F7", - "TakenAlone": "1", + "MSRValue": "0x408006", + "PEBS": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x404006", + "BriefDescription": "Cycles with less than 3 uops delivered by the front end.", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE", + "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.DSB_CYCLES", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE", + "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xE6", + "EventName": "BACLEARS.ANY", + "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.DSB_MISS", "MSRIndex": "0x3F7", - "TakenAlone": "1", + "MSRValue": "0x11", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x402006", + "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled", "Counter": "0,1,2,3", - "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", - "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE", + "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.", - "PEBS": "1", - "MSRValue": "0x401006", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", "MSRIndex": "0x3F7", + "MSRValue": "0x401006", + "PEBS": "1", "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", - "TakenAlone": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" }, { - "EventCode": "0xC6", - "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", - "PEBS": "1", - "MSRValue": "0x400806", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MITE_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles with less than 2 uops delivered by the front end.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE", + "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_CYCLES", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "UMask": "0x30" + }, + { + "BriefDescription": "Cycles MITE is delivering any Uop", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x24" + }, + { + "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_HIT", + "SampleAfterValue": "200003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_SWITCHES", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "SampleAfterValue": "2000003", + "UMask": "0x30" + }, + { + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.L2_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x13", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MITE_CYCLES", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", + "MSRIndex": "0x3F7", + "MSRValue": "0x404006", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x20" + }, + { + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_STALL", + "SampleAfterValue": "200003", + "UMask": "0x4" + }, + { + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAB", + "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x18" + }, + { + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.STLB_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x15", + "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", + "MSRIndex": "0x3F7", + "MSRValue": "0x420006", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", "MSRIndex": "0x3F7", + "MSRValue": "0x400806", + "PEBS": "1", "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", - "TakenAlone": "1", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle", + "Counter": "0,1,2,3,4,5,6,7", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xc6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_1", + "MSRIndex": "0x3F7", + "MSRValue": "0x400106", + "PEBS": "2", + "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", + "MSRIndex": "0x3F7", + "MSRValue": "0x400206", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", + "MSRIndex": "0x3F7", + "MSRValue": "0x400406", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles MITE is delivering 4 Uops", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "UMask": "0x24" + }, + { + "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x79", + "EventName": "IDQ.MS_DSB_CYCLES", + "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x79", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", + "SampleAfterValue": "2000003", + "UMask": "0x30" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", + "MSRIndex": "0x3F7", + "MSRValue": "0x410006", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", + "MSRIndex": "0x3F7", + "MSRValue": "0x200206", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", + "MSRIndex": "0x3F7", + "MSRValue": "0x300206", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", + "MSRIndex": "0x3F7", + "MSRValue": "0x100206", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0x79", + "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "SampleAfterValue": "2000003", + "UMask": "0x18" + }, + { + "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAB", + "EventName": "DSB2MITE_SWITCHES.COUNT", + "PublicDescription": "This event counts the number of the Decode Stream Buffer (DSB)-to-MITE switches including all misses because of missing Decode Stream Buffer (DSB) cache and u-arch forced misses.\nNote: Invoking MITE requires two or three cycles delay.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", + "MSRIndex": "0x3F7", + "MSRValue": "0x402006", + "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x9C", + "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK", + "Invert": "1", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x83", + "EventName": "ICACHE_64B.IFTAG_MISS", + "SampleAfterValue": "200003", + "UMask": "0x2" + }, + { + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC6", + "EventName": "FRONTEND_RETIRED.L1I_MISS", + "MSRIndex": "0x3F7", + "MSRValue": "0x12", + "PEBS": "1", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json index 48a9cdf81307..6c3fd89d204d 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json @@ -1,1396 +1,1403 @@ [ { - "EventCode": "0x54", - "UMask": "0x1", - "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_CONFLICT", - "PublicDescription": "Number of times a TSX line had a cache conflict.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x54", - "UMask": "0x2", - "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", + "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_CAPACITY", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0x54", - "UMask": "0x4", - "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", - "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", - "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x54", - "UMask": "0x8", - "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", - "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", - "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x54", - "UMask": "0x10", - "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", - "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", - "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x54", - "UMask": "0x20", - "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", - "Counter": "0,1,2,3", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", - "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x54", - "UMask": "0x40", - "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", - "Counter": "0,1,2,3", - "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", - "PublicDescription": "Number of times we could not allocate Lock Buffer.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5d", - "UMask": "0x1", - "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", - "Counter": "0,1,2,3", - "EventName": "TX_EXEC.MISC1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5d", - "UMask": "0x2", - "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region", - "Counter": "0,1,2,3", - "EventName": "TX_EXEC.MISC2", - "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5d", - "UMask": "0x4", - "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded", - "Counter": "0,1,2,3", - "EventName": "TX_EXEC.MISC3", - "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5d", - "UMask": "0x8", - "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.", - "Counter": "0,1,2,3", - "EventName": "TX_EXEC.MISC4", - "PublicDescription": "RTM region detected inside HLE.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5d", - "UMask": "0x10", - "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region", - "Counter": "0,1,2,3", - "EventName": "TX_EXEC.MISC5", - "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x10", - "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "UMask": "0x10", "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "6", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { "EventCode": "0x60", - "UMask": "0x10", - "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", - "CounterMask": "1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xA3", - "UMask": "0x2", - "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", - "CounterMask": "2", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x6", - "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", - "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", - "CounterMask": "6", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB0", - "UMask": "0x10", - "BriefDescription": "Demand Data Read requests who miss L3 cache", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", - "PublicDescription": "Demand Data Read requests who miss L3 cache.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xC3", - "UMask": "0x2", - "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", - "Errata": "SKL089", - "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xC8", - "UMask": "0x1", - "BriefDescription": "Number of times an HLE execution started.", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.START", - "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC8", - "UMask": "0x2", - "BriefDescription": "Number of times an HLE execution successfully committed", - "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.COMMIT", - "PublicDescription": "Number of times HLE commit succeeded.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC8", - "UMask": "0x4", - "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED", - "PublicDescription": "Number of times HLE abort was triggered. (PEBS)", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC8", - "UMask": "0x8", - "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", - "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED_MEM", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC8", - "UMask": "0x10", "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", "EventName": "HLE_RETIRED.ABORTED_TIMER", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xC8", - "UMask": "0x20", - "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", - "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC8", - "UMask": "0x40", - "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type", - "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED_MEMTYPE", - "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC8", - "UMask": "0x80", - "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).", - "Counter": "0,1,2,3", - "EventName": "HLE_RETIRED.ABORTED_EVENTS", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC9", - "UMask": "0x1", - "BriefDescription": "Number of times an RTM execution started.", - "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.START", - "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC9", - "UMask": "0x2", - "BriefDescription": "Number of times an RTM execution successfully committed", - "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.COMMIT", - "PublicDescription": "Number of times RTM commit succeeded.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC9", - "UMask": "0x4", - "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.ABORTED", - "PublicDescription": "Number of times RTM abort was triggered. (PEBS)", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC9", - "UMask": "0x8", "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", "EventName": "RTM_RETIRED.ABORTED_MEM", "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x8" }, { - "EventCode": "0xC9", - "UMask": "0x10", - "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.", + "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.", "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.ABORTED_TIMER", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xC9", - "UMask": "0x20", - "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions", - "Counter": "0,1,2,3", - "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY", - "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC9", - "UMask": "0x40", "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", "EventName": "RTM_RETIRED.ABORTED_MEMTYPE", "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { + "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch data reads that miss in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", + "MSRIndex": "0x3F6", + "MSRValue": "0x20", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "100007", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", + "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CONFLICT", + "PublicDescription": "Number of times a TSX line had a cache conflict.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", + "MSRIndex": "0x3F6", + "MSRValue": "0x40", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "2003", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_MEM", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", + "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC5", + "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC4", + "PublicDescription": "RTM region detected inside HLE.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC3", + "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC2", + "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5d", + "EventName": "TX_EXEC.MISC1", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an RTM execution successfully committed", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0xC9", - "UMask": "0x80", + "EventName": "RTM_RETIRED.COMMIT", + "PublicDescription": "Number of times RTM commit succeeded.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts prefetch RFOs that miss in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_TIMER", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00490", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", + "SampleAfterValue": "2000003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an HLE execution successfully committed", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.COMMIT", + "PublicDescription": "Number of times HLE commit succeeded.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", + "PublicDescription": "Number of times we could not allocate Lock Buffer.", + "SampleAfterValue": "2000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Demand Data Read requests who miss L3 cache", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB0", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "PublicDescription": "Demand Data Read requests who miss L3 cache.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that miss in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY", + "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", + "MSRIndex": "0x3F6", + "MSRValue": "0x100", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "503", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00020", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.ABORTED", + "PEBS": "1", + "PublicDescription": "Number of times RTM abort was triggered.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED", + "PEBS": "1", + "PublicDescription": "Number of times HLE abort was triggered.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", + "MSRIndex": "0x3F6", + "MSRValue": "0x10", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "20011", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", + "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", + "SampleAfterValue": "2000003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x60", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000002", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", + "MSRIndex": "0x3F6", + "MSRValue": "0x200", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "101", + "TakenAlone": "1", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00400", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000010", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_CAPACITY", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063B800120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_MEMTYPE", + "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.", + "SampleAfterValue": "2000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Number of times an RTM execution started.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", + "EventName": "RTM_RETIRED.START", + "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL089", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x54", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", + "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00001", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000080", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC9", "EventName": "RTM_RETIRED.ABORTED_EVENTS", "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x80" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.", - "PEBS": "2", - "MSRValue": "0x200", + "BriefDescription": "Number of times an HLE execution started.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "101", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.START", + "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.", - "PEBS": "2", - "MSRValue": "0x100", + "BriefDescription": "Counts all demand code reads that miss in the L3.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "503", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x3FBC000004", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.", - "PEBS": "2", - "MSRValue": "0x80", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128", "MSRIndex": "0x3F6", + "MSRValue": "0x80", + "PEBS": "2", "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", "SampleAfterValue": "1009", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.", - "PEBS": "2", - "MSRValue": "0x40", - "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.", "TakenAlone": "1", - "SampleAfterValue": "2003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.", - "PEBS": "2", - "MSRValue": "0x20", + "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x083FC00491", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.", - "PEBS": "2", - "MSRValue": "0x10", + "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "20011", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x103FC00120", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", - "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", - "PEBS": "2", - "MSRValue": "0x8", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.", "Counter": "0,1,2,3", - "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", - "MSRIndex": "0x3F6", - "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", - "TakenAlone": "1", - "SampleAfterValue": "50021", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x063FC00100", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "6", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x6" + }, + { + "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC8", + "EventName": "HLE_RETIRED.ABORTED_EVENTS", + "SampleAfterValue": "2000003", + "UMask": "0x80" + }, + { + "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xB7, 0xBB", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000122", + "Offcore": "1", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xCD", - "UMask": "0x1", "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.", - "PEBS": "2", - "MSRValue": "0x4", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4", "MSRIndex": "0x3F6", + "MSRValue": "0x4", + "PEBS": "2", "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "100003", "TakenAlone": "1", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD TBD", - "MSRValue": "0x3FBC000001", + "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "EventCode": "0xcd", + "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8", + "MSRIndex": "0x3F6", + "MSRValue": "0x8", + "PEBS": "2", + "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.", + "SampleAfterValue": "50021", + "TakenAlone": "1", + "UMask": "0x1" }, { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD", - "MSRValue": "0x083FC00001", + "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", + "CounterHTOff": "0,1,2,3", "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD", - "MSRValue": "0x103FC00001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD", - "MSRValue": "0x063FC00001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD", - "MSRValue": "0x063B800001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand data reads TBD", - "MSRValue": "0x0604000001", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD TBD", - "MSRValue": "0x3FBC000002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD", - "MSRValue": "0x083FC00002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD", - "MSRValue": "0x103FC00002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD", - "MSRValue": "0x063FC00002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD", - "MSRValue": "0x063B800002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all demand data writes (RFOs) TBD", - "MSRValue": "0x0604000002", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "MSRValue": "0x3FBC000004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "MSRValue": "0x083FC00004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "MSRValue": "0x103FC00004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "MSRValue": "0x063FC00004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "MSRValue": "0x063B800004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "MSRValue": "0x0604000004", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "MSRValue": "0x3FBC000010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "MSRValue": "0x083FC00010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "MSRValue": "0x103FC00010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "MSRValue": "0x063FC00010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "MSRValue": "0x063B800010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "MSRValue": "0x0604000010", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "MSRValue": "0x3FBC000020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "MSRValue": "0x083FC00020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "MSRValue": "0x103FC00020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "MSRValue": "0x063FC00020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "MSRValue": "0x063B800020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "MSRValue": "0x0604000020", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "MSRValue": "0x3FBC000080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "MSRValue": "0x083FC00080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "MSRValue": "0x103FC00080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "MSRValue": "0x063FC00080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "MSRValue": "0x063B800080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "MSRValue": "0x0604000080", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "MSRValue": "0x3FBC000100", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "MSRValue": "0x083FC00100", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "MSRValue": "0x103FC00100", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "MSRValue": "0x063FC00100", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "MSRValue": "0x063B800100", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "MSRValue": "0x0604000100", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "MSRValue": "0x3FBC000400", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "MSRValue": "0x083FC00400", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "MSRValue": "0x103FC00400", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "MSRValue": "0x063FC00400", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "MSRValue": "0x063B800400", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "MSRValue": "0x0604000400", - "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { + "MSRIndex": "0x1a6,0x1a7", + "MSRValue": "0x0604000400", "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3FBC000490", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x083FC00490", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x103FC00490", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063FC00490", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063B800490", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x0604000490", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3FBC000120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x083FC00120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x103FC00120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063FC00120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063B800120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x0604000120", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3FBC000491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x083FC00491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x103FC00491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063FC00491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063B800491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x0604000491", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD TBD", - "MSRValue": "0x3FBC000122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x083FC00122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x103FC00122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063FC00122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x063B800122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "Offcore": "1", - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "TBD TBD", - "MSRValue": "0x0604000122", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", - "MSRIndex": "0x1a6, 0x1a7", - "PublicDescription": "TBD TBD", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/other.json b/tools/perf/pmu-events/arch/x86/skylakex/other.json index 778a541463eb..f6b147ba8ef6 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/other.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/other.json @@ -1,164 +1,116 @@ [ { - "EventCode": "0x28", - "UMask": "0x7", - "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.", - "Counter": "0,1,2,3", - "EventName": "CORE_POWER.LVL0_TURBO_LICENSE", - "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x28", - "UMask": "0x18", - "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.", - "Counter": "0,1,2,3", - "EventName": "CORE_POWER.LVL1_TURBO_LICENSE", - "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x28", - "UMask": "0x20", - "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.", - "Counter": "0,1,2,3", - "EventName": "CORE_POWER.LVL2_TURBO_LICENSE", - "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x28", - "UMask": "0x40", "BriefDescription": "Core cycles the core was throttled due to a pending power level request.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x28", "EventName": "CORE_POWER.THROTTLE", "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.", "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x40" }, { - "EventCode": "0x32", - "UMask": "0x1", - "BriefDescription": "Number of PREFETCHNTA instructions executed.", - "Counter": "0,1,2,3", - "EventName": "SW_PREFETCH_ACCESS.NTA", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x32", - "UMask": "0x2", - "BriefDescription": "Number of PREFETCHT0 instructions executed.", - "Counter": "0,1,2,3", - "EventName": "SW_PREFETCH_ACCESS.T0", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x32", - "UMask": "0x4", - "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", - "Counter": "0,1,2,3", - "EventName": "SW_PREFETCH_ACCESS.T1_T2", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x32", - "UMask": "0x8", - "BriefDescription": "Number of PREFETCHW instructions executed.", - "Counter": "0,1,2,3", - "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xCB", - "UMask": "0x1", - "BriefDescription": "Number of hardware interrupts received by the processor.", - "Counter": "0,1,2,3", - "EventName": "HW_INTERRUPTS.RECEIVED", - "PublicDescription": "Counts the number of hardware interruptions received by the processor.", - "SampleAfterValue": "203", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xEF", - "UMask": "0x1", - "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITI", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xEF", - "UMask": "0x2", - "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITFSE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xEF", - "UMask": "0x4", - "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_SHITFSE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xEF", - "UMask": "0x8", - "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDM", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xEF", - "UMask": "0x10", - "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDM", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xEF", - "UMask": "0x20", - "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDFE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xEF", - "UMask": "0x40", - "Counter": "0,1,2,3", - "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDFE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xFE", - "UMask": "0x2", - "BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly", - "Counter": "0,1,2,3", - "EventName": "IDI_MISC.WB_UPGRADE", - "PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xFE", - "UMask": "0x4", "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xFE", "EventName": "IDI_MISC.WB_DOWNGRADE", "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" + }, + { + "BriefDescription": "Number of PREFETCHW instructions executed.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x28", + "EventName": "CORE_POWER.LVL0_TURBO_LICENSE", + "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.", + "SampleAfterValue": "200003", + "UMask": "0x7" + }, + { + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x28", + "EventName": "CORE_POWER.LVL1_TURBO_LICENSE", + "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.", + "SampleAfterValue": "200003", + "UMask": "0x18" + }, + { + "BriefDescription": "Number of PREFETCHT0 instructions executed.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.T0", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of hardware interrupts received by the processor.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCB", + "EventName": "HW_INTERRUPTS.RECEIVED", + "PublicDescription": "Counts the number of hardware interruptions received by the processor.", + "SampleAfterValue": "203", + "UMask": "0x1" + }, + { + "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x28", + "EventName": "CORE_POWER.LVL2_TURBO_LICENSE", + "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.", + "SampleAfterValue": "200003", + "UMask": "0x20" + }, + { + "BriefDescription": "Number of PREFETCHNTA instructions executed.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.NTA", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x32", + "EventName": "SW_PREFETCH_ACCESS.T1_T2", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x09", + "EventName": "MEMORY_DISAMBIGUATION.HISTORY_RESET", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xFE", + "EventName": "IDI_MISC.WB_UPGRADE", + "PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.", + "SampleAfterValue": "100003", + "UMask": "0x2" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json index 369f56c1d1b5..3bfc6943ddf9 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -1,967 +1,959 @@ [ { - "UMask": "0x1", - "BriefDescription": "Instructions retired from execution.", - "Counter": "Fixed counter 0", - "EventName": "INST_RETIRED.ANY", - "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 0" - }, - { - "UMask": "0x2", - "BriefDescription": "Core cycles when the thread is not in halt state", - "Counter": "Fixed counter 1", - "EventName": "CPU_CLK_UNHALTED.THREAD", - "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" - }, - { - "UMask": "0x2", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "Fixed counter 1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" - }, - { - "UMask": "0x3", - "BriefDescription": "Reference cycles when the core is not in halt state.", - "Counter": "Fixed counter 2", - "EventName": "CPU_CLK_UNHALTED.REF_TSC", - "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" - }, - { - "EventCode": "0x03", - "UMask": "0x2", - "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", "Counter": "0,1,2,3", - "EventName": "LD_BLOCKS.STORE_FORWARD", - "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.ANY_P", + "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "SampleAfterValue": "2000003" }, { - "EventCode": "0x03", - "UMask": "0x8", - "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", "Counter": "0,1,2,3", - "EventName": "LD_BLOCKS.NO_SR", - "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x07", - "UMask": "0x1", - "BriefDescription": "False dependencies in MOB due to partial compare on address.", - "Counter": "0,1,2,3", - "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", - "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0D", - "UMask": "0x1", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", - "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES", - "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0D", - "UMask": "0x1", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0D", - "UMask": "0x80", - "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.", - "Counter": "0,1,2,3", - "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", "Invert": "1", - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", - "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.ANY", - "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0E", - "UMask": "0x2", - "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", - "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", - "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0E", - "UMask": "0x20", - "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.", - "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.SLOW_LEA", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x14", - "UMask": "0x1", - "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", - "Counter": "0,1,2,3", - "EventName": "ARITH.DIVIDER_ACTIVE", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Thread cycles when thread is not in halt state", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", - "CounterMask": "1", - "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "SampleAfterValue": "2503", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "SampleAfterValue": "2503", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x2", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x2", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2503", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x4C", - "UMask": "0x1", - "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", - "Counter": "0,1,2,3", - "EventName": "LOAD_HIT_PRE.SW_PF", - "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x59", - "UMask": "0x1", - "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.", - "Counter": "0,1,2,3", - "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD", - "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "Invert": "1", - "EventCode": "0x5E", - "UMask": "0x1", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_END", - "CounterMask": "1", - "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5E", - "UMask": "0x1", - "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", - "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_CYCLES", - "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x87", - "UMask": "0x1", - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", - "Counter": "0,1,2,3", - "EventName": "ILD_STALL.LCP", - "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x1", - "BriefDescription": "Cycles per thread when uops are executed in port 0", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0", - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", - "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xa2", - "UMask": "0x1", - "BriefDescription": "Resource-related stall cycles", - "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ANY", - "PublicDescription": "Counts resource-related stall cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA2", - "UMask": "0x8", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", - "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.SB", - "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x4", - "BriefDescription": "Total execution stalls.", - "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "CounterMask": "4", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x5", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "CounterMask": "5", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x8", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "CounterMask": "8", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0xc", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "CounterMask": "12", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x10", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", - "CounterMask": "16", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x14", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "CounterMask": "20", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA6", - "UMask": "0x1", - "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.", - "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", - "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA6", - "UMask": "0x2", - "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", - "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", - "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA6", - "UMask": "0x4", - "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", - "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", - "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA6", - "UMask": "0x8", - "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", - "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", - "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA6", - "UMask": "0x10", "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", "EventName": "EXE_ACTIVITY.4_PORTS_UTIL", "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xA6", - "UMask": "0x40", - "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.", + "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", "Counter": "0,1,2,3", - "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Number of Uops delivered by the LSD.", - "Counter": "0,1,2,3", - "EventName": "LSD.UOPS", - "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_4_UOPS", - "CounterMask": "4", - "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", - "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_ACTIVE", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", - "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", + "EventCode": "0x14", + "EventName": "ARITH.DIVIDER_ACTIVE", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "BriefDescription": "False dependencies in MOB due to partial compare on address.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "CounterMask": "4", - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x07", + "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", + "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", + "SampleAfterValue": "100003", + "UMask": "0x1" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "BriefDescription": "Far branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "CounterMask": "3", - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "PEBS": "1", + "PublicDescription": "This event counts far branch instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x40" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "CounterMask": "2", - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", - "CounterMask": "1", - "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "Invert": "1", - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.THREAD", - "PublicDescription": "Number of uops to be executed per-thread each cycle.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Number of uops executed on the core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE", - "PublicDescription": "Number of uops executed from any thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "Invert": "1", - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "CounterMask": "4", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "CounterMask": "3", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "CounterMask": "2", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x10", "BriefDescription": "Counts the number of x87 uops dispatched.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", "EventName": "UOPS_EXECUTED.X87", "PublicDescription": "Counts the number of x87 uops executed.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0xC0", - "UMask": "0x0", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.ANY_P", - "Errata": "SKL091, SKL044", - "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC0", - "UMask": "0x1", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "PEBS": "2", - "Counter": "1", - "EventName": "INST_RETIRED.PREC_DIST", - "Errata": "SKL091, SKL044", - "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", - "SampleAfterValue": "2000003", - "CounterHTOff": "1" - }, - { - "Invert": "1", - "EventCode": "0xC0", - "UMask": "0x1", - "BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.", - "PEBS": "2", - "Counter": "0,2,3", - "EventName": "INST_RETIRED.TOTAL_CYCLES_PS", - "CounterMask": "10", - "Errata": "SKL091, SKL044", - "PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,2,3" - }, - { - "EventCode": "0xC1", - "UMask": "0x3f", - "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", - "Counter": "0,1,2,3", - "EventName": "OTHER_ASSISTS.ANY", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x4C", + "EventName": "LOAD_HIT_PRE.SW_PF", + "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" + }, + { + "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", + "SampleAfterValue": "400009", + "UMask": "0x2" + }, + { + "BriefDescription": "Total execution stalls.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "5", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x5" + }, + { + "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.SLOW_LEA", + "SampleAfterValue": "2000003", + "UMask": "0x20" }, { - "Invert": "1", - "EventCode": "0xC2", - "UMask": "0x2", "BriefDescription": "Cycles with less than 10 actually retired uops.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "10", + "EventCode": "0xC2", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "Invert": "1", "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "Invert": "1", - "EventCode": "0xC2", - "UMask": "0x2", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Thread cycles when thread is not in halt state", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles without actually retired uops.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "SampleAfterValue": "2000003" }, { - "EventCode": "0xC2", - "UMask": "0x2", - "BriefDescription": "Retirement slots used.", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "PublicDescription": "Counts the retirement slots used.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" + }, + { + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", + "SampleAfterValue": "25003", + "UMask": "0x1" }, { - "EdgeDetect": "1", - "EventCode": "0xC3", - "UMask": "0x1", "BriefDescription": "Number of machine clears (nukes) of any type.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.COUNT", + "CounterHTOff": "0,1,2,3,4,5,6,7", "CounterMask": "1", - "PublicDescription": "Number of machine clears (nukes) of any type.", + "EdgeDetect": "1", + "EventCode": "0xC3", + "EventName": "MACHINE_CLEARS.COUNT", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" + }, + { + "AnyThread": "1", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 1", + "CounterHTOff": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.THREAD", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "Invert": "1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.BOUND_ON_STORES", + "SampleAfterValue": "2000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "8", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_ACTIVE", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "25003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles per thread when uops are executed in port 0", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles per thread when uops are executed in port 1", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles per thread when uops are executed in port 2", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles per thread when uops are executed in port 3", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles per thread when uops are executed in port 4", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", + "SampleAfterValue": "2000003", + "UMask": "0x20" + }, + { + "BriefDescription": "Cycles per thread when uops are executed in port 6", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", + "SampleAfterValue": "2000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Cycles per thread when uops are executed in port 7", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.", + "SampleAfterValue": "2000003", + "UMask": "0x80" + }, + { + "AnyThread": "1", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "Counter": "1", + "CounterHTOff": "1", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.PREC_DIST", + "PEBS": "2", + "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xA8", + "EventName": "LSD.CYCLES_4_UOPS", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.3_PORTS_UTIL", + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x5E", + "EventName": "RS_EVENTS.EMPTY_END", + "Invert": "1", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "AnyThread": "1", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "25003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles where the pipeline is stalled due to serializing operations.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x59", + "EventName": "PARTIAL_RAT_STALLS.SCOREBOARD", + "PublicDescription": "This event counts cycles during which the microcode scoreboard stalls happen.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Not taken branch instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xc4", + "EventName": "BR_INST_RETIRED.COND_NTAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x10" + }, + { + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "3", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Reference cycles when the core is not in halt state.", + "Counter": "Fixed counter 2", + "CounterHTOff": "Fixed counter 2", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "SampleAfterValue": "2000003", + "UMask": "0x3" + }, + { + "BriefDescription": "All mispredicted macro branch instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", + "SampleAfterValue": "400009" + }, + { + "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC1", + "EventName": "OTHER_ASSISTS.ANY", + "SampleAfterValue": "100003", + "UMask": "0x3f" + }, + { + "BriefDescription": "Cycles without actually retired uops.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xC2", + "EventName": "UOPS_RETIRED.STALL_CYCLES", + "Invert": "1", + "PublicDescription": "This event counts cycles without actually retired uops.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of Uops delivered by the LSD.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA8", + "EventName": "LSD.UOPS", + "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "25003", + "UMask": "0x2" + }, + { + "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x87", + "EventName": "ILD_STALL.LCP", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "16", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Taken branch instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PEBS": "1", + "PublicDescription": "This event counts taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x20" + }, + { + "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x03", + "EventName": "LD_BLOCKS.NO_SR", + "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.ANY", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Core cycles when the thread is not in halt state", + "Counter": "Fixed counter 1", + "CounterHTOff": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "AnyThread": "1", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "25003", + "UMask": "0x1" + }, + { + "BriefDescription": "Direct and indirect near call instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PEBS": "1", + "PublicDescription": "This event counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x2" + }, + { + "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCC", + "EventName": "ROB_MISC_EVENTS.PAUSE_INST", + "SampleAfterValue": "2000003", + "UMask": "0x40" + }, + { + "BriefDescription": "Resource-related stall cycles", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xa2", + "EventName": "RESOURCE_STALLS.ANY", + "PublicDescription": "Counts resource-related stall cycles.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0xC3", - "UMask": "0x4", "BriefDescription": "Self-modifying code (SMC) detected.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC3", "EventName": "MACHINE_CLEARS.SMC", "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0xC4", - "UMask": "0x0", - "BriefDescription": "All (macro) branch instructions retired.", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "Errata": "SKL091", - "PublicDescription": "Counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "25003", + "UMask": "0x2" }, { - "EventCode": "0xC4", - "UMask": "0x1", - "BriefDescription": "Conditional branch instructions retired.", - "PEBS": "1", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "4", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1" }, { - "EventCode": "0xC4", - "UMask": "0x2", - "BriefDescription": "Direct and indirect near call instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x4", - "BriefDescription": "All (macro) branch instructions retired.", - "PEBS": "2", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "Errata": "SKL091", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC4", - "UMask": "0x8", - "BriefDescription": "Return instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x10", - "BriefDescription": "Counts all not taken macro branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts not taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x20", - "BriefDescription": "Taken branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x40", - "BriefDescription": "Counts the number of far branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "Errata": "SKL091", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x0", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x1", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x2", - "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.NEAR_CALL", - "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x4", - "BriefDescription": "Mispredicted macro branch instructions retired.", - "PEBS": "2", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC5", - "UMask": "0x20", "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "PEBS": "1", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", + "PEBS": "1", "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" + }, + { + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "CounterMask": "20", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "SampleAfterValue": "2000003", + "UMask": "0x14" + }, + { + "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS", + "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.", + "Counter": "0,2,3", + "CounterHTOff": "0,2,3", + "CounterMask": "10", + "Errata": "SKL091, SKL044", + "EventCode": "0xC0", + "EventName": "INST_RETIRED.TOTAL_CYCLES_PS", + "Invert": "1", + "PEBS": "2", + "PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Retirement slots used.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "Counts the retirement slots used.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "AnyThread": "1", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003" + }, + { + "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0E", + "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.", + "SampleAfterValue": "2000003", + "UMask": "0x2" }, { - "EventCode": "0xCC", - "UMask": "0x20", "BriefDescription": "Increments whenever there is an update to the LBR array.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xCC", "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0xCC", - "UMask": "0x40", - "BriefDescription": "Number of retired PAUSE instructions (that do not end up with a VMExit to the VMM; TSX aborted Instructions may be counted). This event is not supported on first SKL and KBL products.", + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "Counter": "0,1,2,3", - "EventName": "ROB_MISC_EVENTS.PAUSE_INST", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x5E", + "EventName": "RS_EVENTS.EMPTY_CYCLES", + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0xE6", - "UMask": "0x1", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "BriefDescription": "Return instructions retired.", "Counter": "0,1,2,3", - "EventName": "BACLEARS.ANY", - "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PEBS": "1", + "PublicDescription": "This event counts return instructions retired.", + "SampleAfterValue": "100007", + "UMask": "0x8" + }, + { + "BriefDescription": "Instructions retired from execution.", + "Counter": "Fixed counter 0", + "CounterHTOff": "Fixed counter 0", + "EventName": "INST_RETIRED.ANY", + "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "2", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA2", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EdgeDetect": "1", + "EventCode": "0x3C", + "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", + "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", + "SampleAfterValue": "100007" + }, + { + "BriefDescription": "All (macro) branch instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x4" + }, + { + "BriefDescription": "Mispredicted macro branch instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.1_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Not taken branch instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x10" + }, + { + "BriefDescription": "Conditional branch instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "PEBS": "1", + "PublicDescription": "This event counts conditional branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x1" + }, + { + "BriefDescription": "Mispredicted conditional branch instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xC5", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "PEBS": "1", + "PublicDescription": "This event counts mispredicted conditional branch instructions retired.", + "SampleAfterValue": "400009", + "UMask": "0x1" + }, + { + "BriefDescription": "Number of uops executed on the core.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Number of uops executed from any thread.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "12", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "2000003", + "UMask": "0xc" + }, + { + "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xA6", + "EventName": "EXE_ACTIVITY.2_PORTS_UTIL", + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x0D", + "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES", + "SampleAfterValue": "2000003", + "UMask": "0x80" + }, + { + "BriefDescription": "All (macro) branch instructions retired.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Errata": "SKL091", + "EventCode": "0xC4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009" + }, + { + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xB1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", + "SampleAfterValue": "2000003", + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0xA3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "SampleAfterValue": "2000003", + "UMask": "0x1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index 390bdab1be9d..f31794d3b926 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -4,14 +4,14 @@ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)", "MetricGroup": "TopdownL1", "MetricName": "Frontend_Bound", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound." }, { "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Frontend_Bound_SMT", - "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-ops (uops). Ideally the Frontend can issue 4 uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." + "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU." }, { "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations", @@ -22,13 +22,14 @@ }, { "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Bad_Speculation_SMT", "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU." }, { "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend", + "MetricConstraint": "NO_NMI_WATCHDOG", "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * cycles)) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles)) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * cycles)) )", "MetricGroup": "TopdownL1", "MetricName": "Backend_Bound", @@ -36,7 +37,7 @@ }, { "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) )", + "MetricExpr": "1 - ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )", "MetricGroup": "TopdownL1_SMT", "MetricName": "Backend_Bound_SMT", "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU." @@ -50,7 +51,7 @@ }, { "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.", - "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))", + "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", "MetricGroup": "TopdownL1_SMT", "MetricName": "Retiring_SMT", "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum 4 uops retired per cycle has been achieved. Maximizing Retiring typically increases the Instruction-Per-Cycle metric. Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Microcode assists are categorized under Retiring. They hurt performance and can often be avoided. SMT version; use when SMT is enabled and measuring per logical CPU." @@ -58,7 +59,7 @@ { "BriefDescription": "Instructions Per Cycle (per Logical Processor)", "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "TopDownL1", + "MetricGroup": "Summary", "MetricName": "IPC" }, { @@ -73,24 +74,6 @@ "MetricGroup": "Branches;Fetch_BW;PGO", "MetricName": "IpTB" }, - { - "BriefDescription": "Branch instructions per taken branch. ", - "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", - "MetricGroup": "Branches;PGO", - "MetricName": "BpTB" - }, - { - "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely (includes speculatively fetches) consumed by program instructions", - "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1 ) )", - "MetricGroup": "PGO;IcMiss", - "MetricName": "IFetch_Line_Utilization" - }, - { - "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", - "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", - "MetricGroup": "DSB;Fetch_BW", - "MetricName": "DSB_Coverage" - }, { "BriefDescription": "Cycles Per Instruction (per Logical Processor)", "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)", @@ -104,86 +87,104 @@ "MetricName": "CLKS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", "MetricExpr": "4 * cycles", "MetricGroup": "TopDownL1", "MetricName": "SLOTS" }, { - "BriefDescription": "Total issue-pipeline slots (per-Physical Core)", - "MetricExpr": "4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)", + "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "MetricGroup": "TopDownL1_SMT", "MetricName": "SLOTS_SMT" }, { - "BriefDescription": "Instructions per Load (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", "MetricGroup": "Instruction_Type", - "MetricName": "IpL" + "MetricName": "IpLoad" }, { - "BriefDescription": "Instructions per Store (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", "MetricGroup": "Instruction_Type", - "MetricName": "IpS" + "MetricName": "IpStore" }, { - "BriefDescription": "Instructions per Branch (lower number means higher occurance rate)", + "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", "MetricGroup": "Branches;Instruction_Type", - "MetricName": "IpB" + "MetricName": "IpBranch" }, { - "BriefDescription": "Instruction per (near) call (lower number means higher occurance rate)", + "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", "MetricGroup": "Branches", "MetricName": "IpCall" }, + { + "BriefDescription": "Branch instructions per taken branch. ", + "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", + "MetricGroup": "Branches;PGO", + "MetricName": "BpTkBranch" + }, + { + "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", + "MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )", + "MetricGroup": "FLOPS;FP_Arith;Instruction_Type", + "MetricName": "IpFLOP" + }, { "BriefDescription": "Total number of retired Instructions", "MetricExpr": "INST_RETIRED.ANY", - "MetricGroup": "Summary", + "MetricGroup": "Summary;TopDownL1", "MetricName": "Instructions" }, + { + "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", + "MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)", + "MetricGroup": "DSB;Fetch_BW", + "MetricName": "DSB_Coverage" + }, { "BriefDescription": "Instructions Per Cycle (per physical core)", "MetricExpr": "INST_RETIRED.ANY / cycles", - "MetricGroup": "SMT", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC" }, { "BriefDescription": "Instructions Per Cycle (per physical core)", - "MetricExpr": "INST_RETIRED.ANY / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", - "MetricGroup": "SMT", + "MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", + "MetricGroup": "SMT;TopDownL1", "MetricName": "CoreIPC_SMT" }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / cycles", + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / cycles", "MetricGroup": "FLOPS", "MetricName": "FLOPc" }, { "BriefDescription": "Floating Point Operations Per Cycle", - "MetricExpr": "(( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))", + "MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )", "MetricGroup": "FLOPS_SMT", "MetricName": "FLOPc_SMT" }, { "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)", - "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)", - "MetricGroup": "Pipeline", + "MetricExpr": "UOPS_EXECUTED.THREAD / ( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 )", + "MetricGroup": "Pipeline;Ports_Utilization", "MetricName": "ILP" }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", - "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * cycles))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * cycles)) ) * (4 * cycles) / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "BrMispredicts", "MetricName": "Branch_Misprediction_Cost" }, { "BriefDescription": "Branch Misprediction Cost: Fraction of TopDown slots wasted per non-speculative branch misprediction (jeclear)", - "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * (( INT_MISC.RECOVERY_CYCLES_ANY / 2 )) ) / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * (4 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) / BR_MISP_RETIRED.ALL_BRANCHES", + "MetricExpr": "( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (( INT_MISC.CLEAR_RESTEER_CYCLES + 9 * BACLEARS.ANY ) / cycles) / (4 * ( IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE - ( FRONTEND_RETIRED.LATENCY_GE_1 - FRONTEND_RETIRED.LATENCY_GE_2 ) / (UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES", "MetricGroup": "BrMispredicts_SMT", "MetricName": "Branch_Misprediction_Cost_SMT" }, @@ -213,14 +214,14 @@ }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", + "MetricConstraint": "NO_NMI_WATCHDOG", "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )", "MetricGroup": "TLB", - "MetricName": "Page_Walks_Utilization", - "MetricConstraint": "NO_NMI_WATCHDOG" + "MetricName": "Page_Walks_Utilization" }, { "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses", - "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) )", + "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )", "MetricGroup": "TLB_SMT", "MetricName": "Page_Walks_Utilization_SMT" }, @@ -245,7 +246,7 @@ { "BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]", "MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;Offcore", "MetricName": "L3_Cache_Access_BW" }, { @@ -263,7 +264,7 @@ { "BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)", "MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY", - "MetricGroup": "Cache_Misses", + "MetricGroup": "Cache_Misses;Offcore", "MetricName": "L2MPKI_All" }, { @@ -298,7 +299,7 @@ }, { "BriefDescription": "Giga Floating Point Operations Per Second", - "MetricExpr": "( (( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )) / 1000000000 ) / duration_time", + "MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time", "MetricGroup": "FLOPS;Summary", "MetricName": "GFLOPs" }, @@ -310,44 +311,56 @@ }, { "BriefDescription": "Fraction of cycles where both hardware Logical Processors were active", - "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0", + "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 )", "MetricGroup": "SMT;Summary", "MetricName": "SMT_2T_Utilization" }, { - "BriefDescription": "Fraction of cycles spent in Kernel mode", + "BriefDescription": "Fraction of cycles spent in the Operating System (OS) Kernel mode", "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD", - "MetricGroup": "Summary", + "MetricGroup": "OS", "MetricName": "Kernel_Utilization" }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", - "MetricGroup": "Memory_BW", + "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, { "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", - "MetricGroup": "Memory_Lat", - "MetricName": "DRAM_Read_Latency" + "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )", + "MetricGroup": "Memory_Lat;SoC", + "MetricName": "MEM_Read_Latency" }, { "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", - "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@", - "MetricGroup": "Memory_BW", - "MetricName": "DRAM_Parallel_Reads" + "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@", + "MetricGroup": "Memory_BW;SoC", + "MetricName": "MEM_Parallel_Reads" + }, + { + "BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]", + "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time", + "MetricGroup": "IO_BW;SoC;Server", + "MetricName": "IO_Write_BW" + }, + { + "BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]", + "MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time", + "MetricGroup": "IO_BW;SoC;Server", + "MetricName": "IO_Read_BW" }, { "BriefDescription": "Socket actual clocks when any core is active on that socket", "MetricExpr": "cha_0@event\\=0x0@", - "MetricGroup": "", + "MetricGroup": "SoC", "MetricName": "Socket_CLKS" }, { - "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions. )", + "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", "MetricExpr": "INST_RETIRED.ANY / ( BR_INST_RETIRED.FAR_BRANCH / 2 )", - "MetricGroup": "", + "MetricGroup": "Branches;OS", "MetricName": "IpFarBranch" }, { diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json index 9c7e5f8beee2..b80b5d66385d 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json @@ -94,17 +94,7 @@ "Unit": "iMC" }, { - "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd", - "Counter": "0,1,2,3", - "EventCode": "0x4", - "EventName": "LLC_MISSES.MEM_READ", - "PerPkg": "1", - "ScaleUnit": "64Bytes", - "UMask": "0x3", - "Unit": "iMC" - }, - { - "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills) ", + "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills)", "Counter": "0,1,2,3", "EventCode": "0x4", "EventName": "UNC_M_CAS_COUNT.RD_REG", @@ -119,18 +109,18 @@ "EventCode": "0x4", "EventName": "UNC_M_CAS_COUNT.RD_UNDERFILL", "PerPkg": "1", - "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request). ", + "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request).", "UMask": "0x2", "Unit": "iMC" }, { - "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr", + "BriefDescription": "DRAM CAS (Column Address Strobe) Commands.; DRAM WR_CAS (w/ and w/out auto-pre) in Write Major Mode", "Counter": "0,1,2,3", "EventCode": "0x4", - "EventName": "LLC_MISSES.MEM_WRITE", + "EventName": "UNC_M_CAS_COUNT.WR_WMM", "PerPkg": "1", - "ScaleUnit": "64Bytes", - "UMask": "0xC", + "PublicDescription": "Counts the total number or DRAM Write CAS commands issued on this channel while in Write-Major-Mode.", + "UMask": "0x4", "Unit": "iMC" }, { @@ -139,7 +129,7 @@ "EventCode": "0x10", "EventName": "UNC_M_RPQ_INSERTS", "PerPkg": "1", - "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ. ", + "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ.", "Unit": "iMC" }, { @@ -166,7 +156,7 @@ "EventCode": "0x81", "EventName": "UNC_M_WPQ_OCCUPANCY", "PerPkg": "1", - "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests.", + "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (memory controller). They deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC. This is not to be confused with actually performing the write to DRAM. Therefore, the average latency for this queue is actually not useful for deconstruction intermediate write latencies. So, we provide filtering based on if the request has posted or not. By using the 'not posted' filter, we can track how long writes spent in the iMC before completions were sent to the HA. The 'posted' filter, on the other hand, provides information about how much queueing is actually happenning in the iMC for writes before they are actually issued to memory. High average occupancies will generally coincide with high write major mode counts. Is there a filter of sorts???", "Unit": "iMC" } ] diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json index adb42c72f5c8..d7a0270de983 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json @@ -119,9 +119,57 @@ "EventName": "UPI_DATA_BANDWIDTH_TX", "PerPkg": "1", "ScaleUnit": "7.11E-06Bytes", - "UMask": "0x0F", + "UMask": "0xf", "Unit": "UPI LL" }, + { + "BriefDescription": "PCI Express bandwidth reading at IIO, part 0", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "ScaleUnit": "4Bytes", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth reading at IIO, part 1", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "ScaleUnit": "4Bytes", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth reading at IIO, part 2", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "ScaleUnit": "4Bytes", + "UMask": "0x04", + "Unit": "IIO" + }, + { + "BriefDescription": "PCI Express bandwidth reading at IIO, part 3", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "ScaleUnit": "4Bytes", + "UMask": "0x04", + "Unit": "IIO" + }, { "BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0", "Counter": "0,1", @@ -129,7 +177,7 @@ "EventName": "LLC_MISSES.PCIE_READ", "FCMask": "0x07", "Filter": "ch_mask=0x1f", - "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", + "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", "MetricName": "LLC_MISSES.PCIE_READ", "PerPkg": "1", "PortMask": "0x01", @@ -137,29 +185,12 @@ "UMask": "0x04", "Unit": "IIO" }, - { - "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0", - "Counter": "0,1", - "EventCode": "0x83", - "EventName": "LLC_MISSES.PCIE_WRITE", - "FCMask": "0x07", - "Filter": "ch_mask=0x1f", - "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", - "MetricName": "LLC_MISSES.PCIE_WRITE", - "PerPkg": "1", - "PortMask": "0x01", - "ScaleUnit": "4Bytes", - "UMask": "0x01", - "Unit": "IIO" - }, { "BriefDescription": "PCI Express bandwidth writing at IIO, part 0", "Counter": "0,1", "EventCode": "0x83", "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0", "FCMask": "0x07", - "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", - "MetricName": "LLC_MISSES.PCIE_WRITE", "PerPkg": "1", "PortMask": "0x01", "ScaleUnit": "4Bytes", @@ -203,53 +234,18 @@ "Unit": "IIO" }, { - "BriefDescription": "PCI Express bandwidth reading at IIO, part 0", + "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0", "Counter": "0,1", "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0", + "EventName": "LLC_MISSES.PCIE_WRITE", "FCMask": "0x07", - "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", - "MetricName": "LLC_MISSES.PCIE_READ", + "Filter": "ch_mask=0x1f", + "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3", + "MetricName": "LLC_MISSES.PCIE_WRITE", "PerPkg": "1", "PortMask": "0x01", "ScaleUnit": "4Bytes", - "UMask": "0x04", - "Unit": "IIO" - }, - { - "BriefDescription": "PCI Express bandwidth reading at IIO, part 1", - "Counter": "0,1", - "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1", - "FCMask": "0x07", - "PerPkg": "1", - "PortMask": "0x02", - "ScaleUnit": "4Bytes", - "UMask": "0x04", - "Unit": "IIO" - }, - { - "BriefDescription": "PCI Express bandwidth reading at IIO, part 2", - "Counter": "0,1", - "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2", - "FCMask": "0x07", - "PerPkg": "1", - "PortMask": "0x04", - "ScaleUnit": "4Bytes", - "UMask": "0x04", - "Unit": "IIO" - }, - { - "BriefDescription": "PCI Express bandwidth reading at IIO, part 3", - "Counter": "0,1", - "EventCode": "0x83", - "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3", - "FCMask": "0x07", - "PerPkg": "1", - "PortMask": "0x08", - "ScaleUnit": "4Bytes", - "UMask": "0x04", + "UMask": "0x01", "Unit": "IIO" }, { @@ -312,6 +308,16 @@ "UMask": "0x02", "Unit": "CHA" }, + { + "BriefDescription": "FaST wire asserted; Horizontal", + "Counter": "0,1,2,3", + "EventCode": "0xA5", + "EventName": "UNC_CHA_FAST_ASSERTED.HORZ", + "PerPkg": "1", + "PublicDescription": "Counts the number of cycles either the local or incoming distress signals are asserted. Incoming distress includes up, dn and across.", + "UMask": "0x02", + "Unit": "CHA" + }, { "BriefDescription": "Read request from a remote socket which hit in the HitMe Cache to a line In the E state", "Counter": "0,1,2,3", @@ -342,6 +348,46 @@ "UMask": "0x01", "Unit": "CHA" }, + { + "BriefDescription": "Lines Victimized; Lines in E state", + "Counter": "0,1,2,3", + "EventCode": "0x37", + "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_E", + "PerPkg": "1", + "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.", + "UMask": "0x02", + "Unit": "CHA" + }, + { + "BriefDescription": "Lines Victimized; Lines in F State", + "Counter": "0,1,2,3", + "EventCode": "0x37", + "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_F", + "PerPkg": "1", + "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.", + "UMask": "0x08", + "Unit": "CHA" + }, + { + "BriefDescription": "Lines Victimized; Lines in M state", + "Counter": "0,1,2,3", + "EventCode": "0x37", + "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_M", + "PerPkg": "1", + "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Lines Victimized; Lines in S State", + "Counter": "0,1,2,3", + "EventCode": "0x37", + "EventName": "UNC_CHA_LLC_VICTIMS.TOTAL_S", + "PerPkg": "1", + "PublicDescription": "Counts the number of lines that were victimized on a fill. This can be filtered by the state that the line was in.", + "UMask": "0x04", + "Unit": "CHA" + }, { "BriefDescription": "Number of times that an RFO hit in S state.", "Counter": "0,1,2,3", @@ -372,6 +418,65 @@ "UMask": "0x20", "Unit": "CHA" }, + { + "BriefDescription": "Ingress (from CMS) Allocations; IRQ", + "Counter": "0,1,2,3", + "EventCode": "0x13", + "EventName": "UNC_CHA_RxC_INSERTS.IRQ", + "PerPkg": "1", + "PublicDescription": "Counts number of allocations per cycle into the specified Ingress queue.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Ingress (from CMS) Request Queue Rejects; PhyAddr Match", + "Counter": "0,1,2,3", + "EventCode": "0x19", + "EventName": "UNC_CHA_RxC_IRQ1_REJECT.PA_MATCH", + "PerPkg": "1", + "PublicDescription": "Ingress (from CMS) Request Queue Rejects; PhyAddr Match", + "UMask": "0x80", + "Unit": "CHA" + }, + { + "BriefDescription": "Ingress (from CMS) Occupancy; IRQ", + "EventCode": "0x11", + "EventName": "UNC_CHA_RxC_OCCUPANCY.IRQ", + "PerPkg": "1", + "PublicDescription": "Counts number of entries in the specified Ingress queue in each cycle.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Snoop filter capacity evictions for E-state entries.", + "Counter": "0,1,2,3", + "EventCode": "0x3D", + "EventName": "UNC_CHA_SF_EVICTION.E_STATE", + "PerPkg": "1", + "PublicDescription": "Counts snoop filter capacity evictions for entries tracking exclusive lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.", + "UMask": "0x02", + "Unit": "CHA" + }, + { + "BriefDescription": "Snoop filter capacity evictions for M-state entries.", + "Counter": "0,1,2,3", + "EventCode": "0x3D", + "EventName": "UNC_CHA_SF_EVICTION.M_STATE", + "PerPkg": "1", + "PublicDescription": "Counts snoop filter capacity evictions for entries tracking modified lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.", + "UMask": "0x01", + "Unit": "CHA" + }, + { + "BriefDescription": "Snoop filter capacity evictions for S-state entries.", + "Counter": "0,1,2,3", + "EventCode": "0x3D", + "EventName": "UNC_CHA_SF_EVICTION.S_STATE", + "PerPkg": "1", + "PublicDescription": "Counts snoop filter capacity evictions for entries tracking shared lines in the cores cache. Snoop filter capacity evictions occur when the snoop filter is full and evicts an existing entry to track a new entry. Does not count clean evictions such as when a cores cache replaces a tracked cacheline with a new cacheline.", + "UMask": "0x04", + "Unit": "CHA" + }, { "BriefDescription": "RspCnflct* Snoop Responses Received", "Counter": "0,1,2,3", @@ -449,7 +554,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x01", - "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -461,7 +566,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x02", - "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -473,7 +578,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x04", - "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -485,7 +590,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x08", - "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -497,7 +602,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x01", - "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, @@ -509,34 +614,226 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x02", - "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU ", + "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU", "Counter": "2,3", "EventCode": "0xC0", "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART2", "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x04", - "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU ", + "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU", "Counter": "2,3", "EventCode": "0xC0", "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART3", "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x08", - "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part0", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part0. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part1", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part1. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part2", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part2. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by a different IIO unit to IIO Part3", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts ever peer to peer read request for 4 bytes of data made by a different IIO unit to the MMIO space of a card on IIO Part3. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part0 by a different IIO unit", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part1 by a different IIO unit", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part2 by a different IIO unit", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made to IIO Part3 by a different IIO unit", + "Counter": "2,3", + "EventCode": "0xC0", + "EventName": "UNC_IIO_DATA_REQ_BY_CPU.PEER_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part0 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part1 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part2 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for 4 bytes made by IIO Part3 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer read request for 4 bytes of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of 4 bytes made by IIO Part0 to an IIO target", + "Counter": "0,1", + "EventCode": "0x83", + "EventName": "UNC_IIO_DATA_REQ_OF_CPU.PEER_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer write request of 4 bytes of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, { "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part0", "Counter": "0,1,2,3", @@ -545,7 +842,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x01", - "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -557,7 +854,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x02", - "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -569,7 +866,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x04", - "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -581,7 +878,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x08", - "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) or by another IIO unit to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", "UMask": "0x04", "Unit": "IIO" }, @@ -593,7 +890,7 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x01", - "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, @@ -605,34 +902,130 @@ "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x02", - "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU ", + "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU", "Counter": "0,1,2,3", "EventCode": "0xC1", "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART2", "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x04", - "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, { - "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU ", + "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU", "Counter": "0,1,2,3", "EventCode": "0xC1", "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART3", "FCMask": "0x07", "PerPkg": "1", "PortMask": "0x08", - "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core) or by another IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", "UMask": "0x01", "Unit": "IIO" }, + { + "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part0", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part0. Does not include requests made by the same IIO unit. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part1", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part1. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part2", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part2. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request for up to a 64 byte transaction is made by a different IIO unit to IIO Part3", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer read request for up to a 64 byte transaction of data made by a different IIO unit to the MMIO space of a card on IIO Part3. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part0 by a different IIO unit", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part1 by a different IIO unit", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part2 by a different IIO unit", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made to IIO Part3 by a different IIO unit", + "Counter": "0,1,2,3", + "EventCode": "0xC1", + "EventName": "UNC_IIO_TXN_REQ_BY_CPU.PEER_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a different IIO unit. Does not include requests made by the same IIO unit. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, { "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part0 to Memory", "Counter": "0,1,2,3", @@ -729,6 +1122,102 @@ "UMask": "0x01", "Unit": "IIO" }, + { + "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part0 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part1 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part1 to the MMIO space of an IIO target. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part2 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer read request of up to a 64 byte transaction is made by IIO Part3 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_READ.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer read request of up to a 64 byte transaction made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x08", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part0 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART0", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x01", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part0 to the MMIO space of an IIO target. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part1 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART1", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x02", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part1 to the MMIO space of an IIO target.In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part2 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART2", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x04", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part2 to the MMIO space of an IIO target. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, + { + "BriefDescription": "Peer to peer write request of up to a 64 byte transaction is made by IIO Part3 to an IIO target", + "Counter": "0,1,2,3", + "EventCode": "0x84", + "EventName": "UNC_IIO_TXN_REQ_OF_CPU.PEER_WRITE.PART3", + "FCMask": "0x07", + "PerPkg": "1", + "PortMask": "0x08", + "PublicDescription": "Counts every peer to peer write request of up to a 64 byte transaction of data made by IIO Part3 to the MMIO space of an IIO target. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.", + "UMask": "0x02", + "Unit": "IIO" + }, { "BriefDescription": "Traffic in which the M2M to iMC Bypass was not taken", "Counter": "0,1,2,3", @@ -813,7 +1302,7 @@ "Unit": "M2M" }, { - "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state) ", + "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state)", "Counter": "0,1,2,3", "EventCode": "0x2D", "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_A", @@ -823,7 +1312,7 @@ "Unit": "M2M" }, { - "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state) ", + "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state)", "Counter": "0,1,2,3", "EventCode": "0x2D", "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_I", @@ -833,7 +1322,7 @@ "Unit": "M2M" }, { - "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state) ", + "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state)", "Counter": "0,1,2,3", "EventCode": "0x2D", "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_S", @@ -863,7 +1352,7 @@ "Unit": "M2M" }, { - "BriefDescription": "Multi-socket cacheline Directory update from/to Any state ", + "BriefDescription": "Multi-socket cacheline Directory update from/to Any state", "Counter": "0,1,2,3", "EventCode": "0x2E", "EventName": "UNC_M2M_DIRECTORY_UPDATE.ANY", @@ -918,7 +1407,7 @@ "EventCode": "0x37", "EventName": "UNC_M2M_IMC_READS.ALL", "PerPkg": "1", - "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller). ", + "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller).", "UMask": "0x4", "Unit": "M2M" }, @@ -942,6 +1431,16 @@ "UMask": "0x10", "Unit": "M2M" }, + { + "BriefDescription": "M2M Writes Issued to iMC; All, regardless of priority.", + "Counter": "0,1,2,3", + "EventCode": "0x38", + "EventName": "UNC_M2M_IMC_WRITES.NI", + "PerPkg": "1", + "PublicDescription": "M2M Writes Issued to iMC; All, regardless of priority.", + "UMask": "0x80", + "Unit": "M2M" + }, { "BriefDescription": "Partial Non-Isochronous writes to the iMC", "Counter": "0,1,2,3", @@ -976,12 +1475,77 @@ "EventCode": "0x1", "EventName": "UNC_M2M_RxC_AD_INSERTS", "PerPkg": "1", - "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop). This is generally used for reads, and ", + "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop). This is generally used for reads, and", + "Unit": "M2M" + }, + { + "BriefDescription": "AD Ingress (from CMS) Occupancy", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UNC_M2M_RxC_AD_OCCUPANCY", + "PerPkg": "1", + "PublicDescription": "AD Ingress (from CMS) Occupancy", + "Unit": "M2M" + }, + { + "BriefDescription": "BL Ingress (from CMS) Allocations", + "Counter": "0,1,2,3", + "EventCode": "0x5", + "EventName": "UNC_M2M_RxC_BL_INSERTS", + "PerPkg": "1", + "PublicDescription": "BL Ingress (from CMS) Allocations", + "Unit": "M2M" + }, + { + "BriefDescription": "BL Ingress (from CMS) Occupancy", + "Counter": "0,1,2,3", + "EventCode": "0x6", + "EventName": "UNC_M2M_RxC_BL_OCCUPANCY", + "PerPkg": "1", + "PublicDescription": "BL Ingress (from CMS) Occupancy", + "Unit": "M2M" + }, + { + "BriefDescription": "AD Egress (to CMS) Allocations", + "Counter": "0,1,2,3", + "EventCode": "0x9", + "EventName": "UNC_M2M_TxC_AD_INSERTS", + "PerPkg": "1", + "PublicDescription": "AD Egress (to CMS) Allocations", + "Unit": "M2M" + }, + { + "BriefDescription": "AD Egress (to CMS) Occupancy", + "Counter": "0,1,2,3", + "EventCode": "0xA", + "EventName": "UNC_M2M_TxC_AD_OCCUPANCY", + "PerPkg": "1", + "PublicDescription": "AD Egress (to CMS) Occupancy", + "Unit": "M2M" + }, + { + "BriefDescription": "BL Egress (to CMS) Allocations; All", + "Counter": "0,1,2,3", + "EventCode": "0x15", + "EventName": "UNC_M2M_TxC_BL_INSERTS.ALL", + "PerPkg": "1", + "PublicDescription": "BL Egress (to CMS) Allocations; All", + "UMask": "0x03", + "Unit": "M2M" + }, + { + "BriefDescription": "BL Egress (to CMS) Occupancy; All", + "Counter": "0,1,2,3", + "EventCode": "0x16", + "EventName": "UNC_M2M_TxC_BL_OCCUPANCY.ALL", + "PerPkg": "1", + "PublicDescription": "BL Egress (to CMS) Occupancy; All", + "UMask": "0x03", "Unit": "M2M" }, { "BriefDescription": "Prefetches generated by the flow control queue of the M3UPI unit.", - "Counter": "0,1,2,3", + "Counter": "0,1,2", "EventCode": "0x29", "EventName": "UNC_M3UPI_UPI_PREFETCH_SPAWN", "PerPkg": "1", @@ -1113,16 +1677,6 @@ "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the TxL(transmit) FLIT buffer and pass directly out the UPI Link. Generally, when data is transmitted across the Intel Ultra Path Interconnect (UPI), it will bypass the TxQ and pass directly to the link. However, the TxQ will be used in L0p (Low Power) mode and (Link Layer Retry) LLR mode, increasing latency to transfer out to the link.", "Unit": "UPI LL" }, - { - "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data", - "Counter": "0,1,2,3", - "EventCode": "0x2", - "EventName": "UPI_DATA_BANDWIDTH_TX", - "PerPkg": "1", - "ScaleUnit": "7.11E-06Bytes", - "UMask": "0x0F", - "Unit": "UPI LL" - }, { "BriefDescription": "Null FLITs transmitted from any slot", "Counter": "0,1,2,3", @@ -1133,6 +1687,16 @@ "UMask": "0x27", "Unit": "UPI LL" }, + { + "BriefDescription": "Valid Flits Sent; Data", + "Counter": "0,1,2,3", + "EventCode": "0x2", + "EventName": "UNC_UPI_TxL_FLITS.DATA", + "PerPkg": "1", + "PublicDescription": "Shows legal flit time (hides impact of L0p and L0c).; Count Data Flits (which consume all slots), but how much to count is based on Slot0-2 mask, so count can be 0-3 depending on which slots are enabled for counting..", + "UMask": "0x8", + "Unit": "UPI LL" + }, { "BriefDescription": "Idle FLITs transmitted", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json index 7f466c97e485..bbeee1058096 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json @@ -1,284 +1,284 @@ [ { - "EventCode": "0x08", - "UMask": "0x1", - "BriefDescription": "Load misses in all DTLB levels that cause page walks", + "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", - "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.STLB_HIT", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" }, { - "EventCode": "0x08", - "UMask": "0x2", - "BriefDescription": "Page walk completed due to a demand data load to a 4K page", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x4", - "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x8", - "BriefDescription": "Page walk completed due to a demand data load to a 1G page", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", - "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0xe", - "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x10", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", - "CounterMask": "1", - "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x20", - "BriefDescription": "Loads that miss the DTLB and hit the STLB.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x1", "BriefDescription": "Store misses in all DTLB levels that cause page walks", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK", "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" }, { - "EventCode": "0x49", - "UMask": "0x2", - "BriefDescription": "Page walk completed due to a demand data store to a 4K page", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x4", "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4" }, { - "EventCode": "0x49", - "UMask": "0x8", - "BriefDescription": "Page walk completed due to a demand data store to a 1G page", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", - "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0xe", - "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x10", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", - "CounterMask": "1", - "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x20", - "BriefDescription": "Stores that miss the DTLB and hit the STLB.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x4F", - "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", - "Counter": "0,1,2,3", - "EventName": "EPT.WALK_PENDING", - "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x1", - "BriefDescription": "Misses at all ITLB levels that cause page walks", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", - "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x2", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", - "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x4", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x8", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", - "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0xe", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x10", "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", "EventName": "ITLB_MISSES.WALK_PENDING", "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x10" }, { - "EventCode": "0x85", - "UMask": "0x10", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_ACTIVE", - "CounterMask": "1", - "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "CounterHTOff": "0,1,2,3,4,5,6,7", "EventCode": "0x85", - "UMask": "0x20", - "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.STLB_HIT", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2" }, { - "EventCode": "0xAE", - "UMask": "0x1", "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xAE", "EventName": "ITLB.ITLB_FLUSH", "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" + }, + { + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_ACTIVE", + "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Loads that miss the DTLB and hit the STLB.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", + "SampleAfterValue": "2000003", + "UMask": "0x20" + }, + { + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "EventCode": "0xBD", - "UMask": "0x1", "BriefDescription": "DTLB flush attempts of the thread-specific entries", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xBD", "EventName": "TLB_FLUSH.DTLB_THREAD", "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1" + }, + { + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", + "SampleAfterValue": "2000003", + "UMask": "0x10" + }, + { + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Misses at all ITLB levels that cause page walks", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Stores that miss the DTLB and hit the STLB.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", + "SampleAfterValue": "100003", + "UMask": "0x20" + }, + { + "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" + }, + { + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" + }, + { + "BriefDescription": "Page walk completed due to a demand data store to a 4K page", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", + "SampleAfterValue": "100003", + "UMask": "0x2" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x8" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0xe" + }, + { + "BriefDescription": "Page walk completed due to a demand data load to a 4K page", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", + "SampleAfterValue": "2000003", + "UMask": "0x2" + }, + { + "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", + "SampleAfterValue": "2000003", + "UMask": "0x4" + }, + { + "BriefDescription": "Load misses in all DTLB levels that cause page walks", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", + "SampleAfterValue": "100003", + "UMask": "0x1" + }, + { + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x4F", + "EventName": "EPT.WALK_PENDING", + "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", + "SampleAfterValue": "2000003", + "UMask": "0x10" }, { - "EventCode": "0xBD", - "UMask": "0x20", "BriefDescription": "STLB flush attempts", "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0xBD", "EventName": "TLB_FLUSH.STLB_ANY", "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).", "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x20" + }, + { + "BriefDescription": "Page walk completed due to a demand data load to a 1G page", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", + "SampleAfterValue": "2000003", + "UMask": "0x8" + }, + { + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "CounterMask": "1", + "EventCode": "0x08", + "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", + "SampleAfterValue": "100003", + "UMask": "0x10" + }, + { + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x85", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", + "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", + "SampleAfterValue": "100003", + "UMask": "0x4" + }, + { + "BriefDescription": "Page walk completed due to a demand data store to a 1G page", + "Counter": "0,1,2,3", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "EventCode": "0x49", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.", + "SampleAfterValue": "100003", + "UMask": "0x8" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index fc9c158bfa13..e47644cab3fa 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -48,11 +48,40 @@ #include #include "jsmn.h" #include "json.h" -#include "jevents.h" +#include "pmu-events.h" int verbose; char *prog; +struct json_event { + char *name; + char *event; + char *desc; + char *long_desc; + char *pmu; + char *unit; + char *perpkg; + char *aggr_mode; + char *metric_expr; + char *metric_name; + char *metric_group; + char *deprecated; + char *metric_constraint; +}; + +enum aggr_mode_class convert(const char *aggr_mode) +{ + if (!strcmp(aggr_mode, "PerCore")) + return PerCore; + else if (!strcmp(aggr_mode, "PerChip")) + return PerChip; + + pr_err("%s: Wrong AggregationMode value '%s'\n", prog, aggr_mode); + return -1; +} + +typedef int (*func)(void *data, struct json_event *je); + int eprintf(int level, int var, const char *fmt, ...) { @@ -71,11 +100,6 @@ int eprintf(int level, int var, const char *fmt, ...) return ret; } -__attribute__((weak)) char *get_cpu_str(void) -{ - return NULL; -} - static void addfield(char *map, char **dst, const char *sep, const char *a, jsmntok_t *bt) { @@ -240,6 +264,7 @@ static struct map { { "hisi_sccl,hha", "hisi_sccl,hha" }, { "hisi_sccl,l3c", "hisi_sccl,l3c" }, { "L3PMC", "amd_l3" }, + { "DFPMC", "amd_df" }, {} }; @@ -318,12 +343,7 @@ static void print_events_table_prefix(FILE *fp, const char *tblname) close_table = 1; } -static int print_events_table_entry(void *data, char *name, char *event, - char *desc, char *long_desc, - char *pmu, char *unit, char *perpkg, - char *metric_expr, - char *metric_name, char *metric_group, - char *deprecated, char *metric_constraint) +static int print_events_table_entry(void *data, struct json_event *je) { struct perf_entry_data *pd = data; FILE *outfp = pd->outfp; @@ -335,30 +355,32 @@ static int print_events_table_entry(void *data, char *name, char *event, */ fprintf(outfp, "{\n"); - if (name) - fprintf(outfp, "\t.name = \"%s\",\n", name); - if (event) - fprintf(outfp, "\t.event = \"%s\",\n", event); - fprintf(outfp, "\t.desc = \"%s\",\n", desc); + if (je->name) + fprintf(outfp, "\t.name = \"%s\",\n", je->name); + if (je->event) + fprintf(outfp, "\t.event = \"%s\",\n", je->event); + fprintf(outfp, "\t.desc = \"%s\",\n", je->desc); fprintf(outfp, "\t.topic = \"%s\",\n", topic); - if (long_desc && long_desc[0]) - fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc); - if (pmu) - fprintf(outfp, "\t.pmu = \"%s\",\n", pmu); - if (unit) - fprintf(outfp, "\t.unit = \"%s\",\n", unit); - if (perpkg) - fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg); - if (metric_expr) - fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr); - if (metric_name) - fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name); - if (metric_group) - fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group); - if (deprecated) - fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated); - if (metric_constraint) - fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint); + if (je->long_desc && je->long_desc[0]) + fprintf(outfp, "\t.long_desc = \"%s\",\n", je->long_desc); + if (je->pmu) + fprintf(outfp, "\t.pmu = \"%s\",\n", je->pmu); + if (je->unit) + fprintf(outfp, "\t.unit = \"%s\",\n", je->unit); + if (je->perpkg) + fprintf(outfp, "\t.perpkg = \"%s\",\n", je->perpkg); + if (je->aggr_mode) + fprintf(outfp, "\t.aggr_mode = \"%d\",\n", convert(je->aggr_mode)); + if (je->metric_expr) + fprintf(outfp, "\t.metric_expr = \"%s\",\n", je->metric_expr); + if (je->metric_name) + fprintf(outfp, "\t.metric_name = \"%s\",\n", je->metric_name); + if (je->metric_group) + fprintf(outfp, "\t.metric_group = \"%s\",\n", je->metric_group); + if (je->deprecated) + fprintf(outfp, "\t.deprecated = \"%s\",\n", je->deprecated); + if (je->metric_constraint) + fprintf(outfp, "\t.metric_constraint = \"%s\",\n", je->metric_constraint); fprintf(outfp, "},\n"); return 0; @@ -373,6 +395,7 @@ struct event_struct { char *pmu; char *unit; char *perpkg; + char *aggr_mode; char *metric_expr; char *metric_name; char *metric_group; @@ -380,17 +403,17 @@ struct event_struct { char *metric_constraint; }; -#define ADD_EVENT_FIELD(field) do { if (field) { \ - es->field = strdup(field); \ +#define ADD_EVENT_FIELD(field) do { if (je->field) { \ + es->field = strdup(je->field); \ if (!es->field) \ goto out_free; \ } } while (0) #define FREE_EVENT_FIELD(field) free(es->field) -#define TRY_FIXUP_FIELD(field) do { if (es->field && !*field) {\ - *field = strdup(es->field); \ - if (!*field) \ +#define TRY_FIXUP_FIELD(field) do { if (es->field && !je->field) {\ + je->field = strdup(es->field); \ + if (!je->field) \ return -ENOMEM; \ } } while (0) @@ -402,6 +425,7 @@ struct event_struct { op(pmu); \ op(unit); \ op(perpkg); \ + op(aggr_mode); \ op(metric_expr); \ op(metric_name); \ op(metric_group); \ @@ -421,11 +445,7 @@ static void free_arch_std_events(void) } } -static int save_arch_std_events(void *data, char *name, char *event, - char *desc, char *long_desc, char *pmu, - char *unit, char *perpkg, char *metric_expr, - char *metric_name, char *metric_group, - char *deprecated, char *metric_constraint) +static int save_arch_std_events(void *data, struct json_event *je) { struct event_struct *es; @@ -485,23 +505,15 @@ static char *real_event(const char *name, char *event) } static int -try_fixup(const char *fn, char *arch_std, char **event, char **desc, - char **name, char **long_desc, char **pmu, char **filter, - char **perpkg, char **unit, char **metric_expr, char **metric_name, - char **metric_group, unsigned long long eventcode, - char **deprecated, char **metric_constraint) +try_fixup(const char *fn, char *arch_std, struct json_event *je, char **event) { /* try to find matching event from arch standard values */ struct event_struct *es; list_for_each_entry(es, &arch_std_events, list) { if (!strcmp(arch_std, es->name)) { - if (!eventcode && es->event) { - /* allow EventCode to be overridden */ - free(*event); - *event = NULL; - } FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD); + *event = je->event; return 0; } } @@ -512,14 +524,9 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc, } /* Call func with each event in the json file */ -int json_events(const char *fn, - int (*func)(void *data, char *name, char *event, char *desc, - char *long_desc, - char *pmu, char *unit, char *perpkg, - char *metric_expr, - char *metric_name, char *metric_group, - char *deprecated, char *metric_constraint), - void *data) +static int json_events(const char *fn, + int (*func)(void *data, struct json_event *je), + void *data) { int err; size_t size; @@ -537,18 +544,10 @@ int json_events(const char *fn, EXPECT(tokens->type == JSMN_ARRAY, tokens, "expected top level array"); tok = tokens + 1; for (i = 0; i < tokens->size; i++) { - char *event = NULL, *desc = NULL, *name = NULL; - char *long_desc = NULL; + char *event = NULL; char *extra_desc = NULL; - char *pmu = NULL; char *filter = NULL; - char *perpkg = NULL; - char *unit = NULL; - char *metric_expr = NULL; - char *metric_name = NULL; - char *metric_group = NULL; - char *deprecated = NULL; - char *metric_constraint = NULL; + struct json_event je = {}; char *arch_std = NULL; unsigned long long eventcode = 0; struct msrmap *msr = NULL; @@ -583,14 +582,14 @@ int json_events(const char *fn, eventcode |= strtoul(code, NULL, 0) << 21; free(code); } else if (json_streq(map, field, "EventName")) { - addfield(map, &name, "", "", val); + addfield(map, &je.name, "", "", val); } else if (json_streq(map, field, "BriefDescription")) { - addfield(map, &desc, "", "", val); - fixdesc(desc); + addfield(map, &je.desc, "", "", val); + fixdesc(je.desc); } else if (json_streq(map, field, "PublicDescription")) { - addfield(map, &long_desc, "", "", val); - fixdesc(long_desc); + addfield(map, &je.long_desc, "", "", val); + fixdesc(je.long_desc); } else if (json_streq(map, field, "PEBS") && nz) { precise = val; } else if (json_streq(map, field, "MSRIndex") && nz) { @@ -610,34 +609,36 @@ int json_events(const char *fn, ppmu = field_to_perf(unit_to_pmu, map, val); if (ppmu) { - pmu = strdup(ppmu); + je.pmu = strdup(ppmu); } else { - if (!pmu) - pmu = strdup("uncore_"); - addfield(map, &pmu, "", "", val); - for (s = pmu; *s; s++) + if (!je.pmu) + je.pmu = strdup("uncore_"); + addfield(map, &je.pmu, "", "", val); + for (s = je.pmu; *s; s++) *s = tolower(*s); } - addfield(map, &desc, ". ", "Unit: ", NULL); - addfield(map, &desc, "", pmu, NULL); - addfield(map, &desc, "", " ", NULL); + addfield(map, &je.desc, ". ", "Unit: ", NULL); + addfield(map, &je.desc, "", je.pmu, NULL); + addfield(map, &je.desc, "", " ", NULL); } else if (json_streq(map, field, "Filter")) { addfield(map, &filter, "", "", val); } else if (json_streq(map, field, "ScaleUnit")) { - addfield(map, &unit, "", "", val); + addfield(map, &je.unit, "", "", val); } else if (json_streq(map, field, "PerPkg")) { - addfield(map, &perpkg, "", "", val); + addfield(map, &je.perpkg, "", "", val); + } else if (json_streq(map, field, "AggregationMode")) { + addfield(map, &je.aggr_mode, "", "", val); } else if (json_streq(map, field, "Deprecated")) { - addfield(map, &deprecated, "", "", val); + addfield(map, &je.deprecated, "", "", val); } else if (json_streq(map, field, "MetricName")) { - addfield(map, &metric_name, "", "", val); + addfield(map, &je.metric_name, "", "", val); } else if (json_streq(map, field, "MetricGroup")) { - addfield(map, &metric_group, "", "", val); + addfield(map, &je.metric_group, "", "", val); } else if (json_streq(map, field, "MetricConstraint")) { - addfield(map, &metric_constraint, "", "", val); + addfield(map, &je.metric_constraint, "", "", val); } else if (json_streq(map, field, "MetricExpr")) { - addfield(map, &metric_expr, "", "", val); - for (s = metric_expr; *s; s++) + addfield(map, &je.metric_expr, "", "", val); + for (s = je.metric_expr; *s; s++) *s = tolower(*s); } else if (json_streq(map, field, "ArchStdEvent")) { addfield(map, &arch_std, "", "", val); @@ -646,7 +647,7 @@ int json_events(const char *fn, } /* ignore unknown fields */ } - if (precise && desc && !strstr(desc, "(Precise Event)")) { + if (precise && je.desc && !strstr(je.desc, "(Precise Event)")) { if (json_streq(map, precise, "2")) addfield(map, &extra_desc, " ", "(Must be precise)", NULL); @@ -656,48 +657,44 @@ int json_events(const char *fn, } snprintf(buf, sizeof buf, "event=%#llx", eventcode); addfield(map, &event, ",", buf, NULL); - if (desc && extra_desc) - addfield(map, &desc, " ", extra_desc, NULL); - if (long_desc && extra_desc) - addfield(map, &long_desc, " ", extra_desc, NULL); + if (je.desc && extra_desc) + addfield(map, &je.desc, " ", extra_desc, NULL); + if (je.long_desc && extra_desc) + addfield(map, &je.long_desc, " ", extra_desc, NULL); if (filter) addfield(map, &event, ",", filter, NULL); if (msr != NULL) addfield(map, &event, ",", msr->pname, msrval); - if (name) - fixname(name); + if (je.name) + fixname(je.name); if (arch_std) { /* * An arch standard event is referenced, so try to * fixup any unassigned values. */ - err = try_fixup(fn, arch_std, &event, &desc, &name, - &long_desc, &pmu, &filter, &perpkg, - &unit, &metric_expr, &metric_name, - &metric_group, eventcode, - &deprecated, &metric_constraint); + err = try_fixup(fn, arch_std, &je, &event); if (err) goto free_strings; } - err = func(data, name, real_event(name, event), desc, long_desc, - pmu, unit, perpkg, metric_expr, metric_name, - metric_group, deprecated, metric_constraint); + je.event = real_event(je.name, event); + err = func(data, &je); free_strings: free(event); - free(desc); - free(name); - free(long_desc); + free(je.desc); + free(je.name); + free(je.long_desc); free(extra_desc); - free(pmu); + free(je.pmu); free(filter); - free(perpkg); - free(deprecated); - free(unit); - free(metric_expr); - free(metric_name); - free(metric_group); - free(metric_constraint); + free(je.perpkg); + free(je.aggr_mode); + free(je.deprecated); + free(je.unit); + free(je.metric_expr); + free(je.metric_name); + free(je.metric_group); + free(je.metric_constraint); free(arch_std); if (err) diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h deleted file mode 100644 index 2afc8304529e..000000000000 --- a/tools/perf/pmu-events/jevents.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef JEVENTS_H -#define JEVENTS_H 1 - -int json_events(const char *fn, - int (*func)(void *data, char *name, char *event, char *desc, - char *long_desc, - char *pmu, - char *unit, char *perpkg, char *metric_expr, - char *metric_name, char *metric_group, - char *deprecated, char *metric_constraint), - void *data); -char *get_cpu_str(void); - -#ifndef min -#define min(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) -#endif - -#endif diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index c8f306b572f4..7da1a3743b77 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -2,6 +2,11 @@ #ifndef PMU_EVENTS_H #define PMU_EVENTS_H +enum aggr_mode_class { + PerChip = 1, + PerCore +}; + /* * Describe each PMU event. Each CPU has a table of PMU events. */ @@ -14,6 +19,7 @@ struct pmu_event { const char *pmu; const char *unit; const char *perpkg; + const char *aggr_mode; const char *metric_expr; const char *metric_name; const char *metric_group; diff --git a/tools/perf/scripts/python/futex-contention.py b/tools/perf/scripts/python/futex-contention.py index 0c4841acf75d..7e884d46f920 100644 --- a/tools/perf/scripts/python/futex-contention.py +++ b/tools/perf/scripts/python/futex-contention.py @@ -12,41 +12,46 @@ from __future__ import print_function -import os, sys -sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') +import os +import sys +sys.path.append(os.environ['PERF_EXEC_PATH'] + + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') from Util import * process_names = {} thread_thislock = {} thread_blocktime = {} -lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time -process_names = {} # long-lived pid-to-execname mapping +lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time +process_names = {} # long-lived pid-to-execname mapping + def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm, callchain, - nr, uaddr, op, val, utime, uaddr2, val3): - cmd = op & FUTEX_CMD_MASK - if cmd != FUTEX_WAIT: - return # we don't care about originators of WAKE events + nr, uaddr, op, val, utime, uaddr2, val3): + cmd = op & FUTEX_CMD_MASK + if cmd != FUTEX_WAIT: + return # we don't care about originators of WAKE events + + process_names[tid] = comm + thread_thislock[tid] = uaddr + thread_blocktime[tid] = nsecs(s, ns) - process_names[tid] = comm - thread_thislock[tid] = uaddr - thread_blocktime[tid] = nsecs(s, ns) def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm, callchain, - nr, ret): - if tid in thread_blocktime: - elapsed = nsecs(s, ns) - thread_blocktime[tid] - add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed) - del thread_blocktime[tid] - del thread_thislock[tid] + nr, ret): + if tid in thread_blocktime: + elapsed = nsecs(s, ns) - thread_blocktime[tid] + add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed) + del thread_blocktime[tid] + del thread_thislock[tid] + def trace_begin(): - print("Press control+C to stop and show the summary") + print("Press control+C to stop and show the summary") + def trace_end(): - for (tid, lock) in lock_waits: - min, max, avg, count = lock_waits[tid, lock] - print("%s[%d] lock %x contended %d times, %d avg ns" % - (process_names[tid], tid, lock, count, avg)) - + for (tid, lock) in lock_waits: + min, max, avg, count = lock_waits[tid, lock] + print("%s[%d] lock %x contended %d times, %d avg ns [max: %d ns, min %d ns]" % + (process_names[tid], tid, lock, count, avg, max, min)) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 84352fc49a20..4d15bf6041fb 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -60,6 +60,8 @@ perf-y += api-io.o perf-y += demangle-java-test.o perf-y += pfm.o perf-y += parse-metric.o +perf-y += pe-file-parsing.o +perf-y += expand-cgroup.o $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build $(call rule_mkdir) diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index a9599ab8c471..ec972e0892ab 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -30,9 +30,9 @@ #include #include #include -#include "../perf-sys.h" #include #include "event.h" +#include "util.h" #include "tests.h" #define ENV "PERF_TEST_ATTR" diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index d328caaba45d..132bdb3e6c31 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -341,6 +341,14 @@ static struct test generic_tests[] = { .desc = "Parse and process metrics", .func = test__parse_metric, }, + { + .desc = "PE file support", + .func = test__pe_file_parsing, + }, + { + .desc = "Event expansion for cgroups", + .func = test__expand_cgroup_events, + }, { .func = NULL, }, diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c new file mode 100644 index 000000000000..d5771e4d094f --- /dev/null +++ b/tools/perf/tests/expand-cgroup.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "tests.h" +#include "debug.h" +#include "evlist.h" +#include "cgroup.h" +#include "rblist.h" +#include "metricgroup.h" +#include "parse-events.h" +#include "pmu-events/pmu-events.h" +#include "pfm.h" +#include +#include +#include +#include + +static int test_expand_events(struct evlist *evlist, + struct rblist *metric_events) +{ + int i, ret = TEST_FAIL; + int nr_events; + bool was_group_event; + int nr_members; /* for the first evsel only */ + const char cgrp_str[] = "A,B,C"; + const char *cgrp_name[] = { "A", "B", "C" }; + int nr_cgrps = ARRAY_SIZE(cgrp_name); + char **ev_name; + struct evsel *evsel; + + TEST_ASSERT_VAL("evlist is empty", !perf_evlist__empty(evlist)); + + nr_events = evlist->core.nr_entries; + ev_name = calloc(nr_events, sizeof(*ev_name)); + if (ev_name == NULL) { + pr_debug("memory allocation failure\n"); + return TEST_FAIL; + } + i = 0; + evlist__for_each_entry(evlist, evsel) { + ev_name[i] = strdup(evsel->name); + if (ev_name[i] == NULL) { + pr_debug("memory allocation failure\n"); + goto out; + } + i++; + } + /* remember grouping info */ + was_group_event = evsel__is_group_event(evlist__first(evlist)); + nr_members = evlist__first(evlist)->core.nr_members; + + ret = evlist__expand_cgroup(evlist, cgrp_str, metric_events, false); + if (ret < 0) { + pr_debug("failed to expand events for cgroups\n"); + goto out; + } + + ret = TEST_FAIL; + if (evlist->core.nr_entries != nr_events * nr_cgrps) { + pr_debug("event count doesn't match\n"); + goto out; + } + + i = 0; + evlist__for_each_entry(evlist, evsel) { + if (strcmp(evsel->name, ev_name[i % nr_events])) { + pr_debug("event name doesn't match:\n"); + pr_debug(" evsel[%d]: %s\n expected: %s\n", + i, evsel->name, ev_name[i % nr_events]); + goto out; + } + if (strcmp(evsel->cgrp->name, cgrp_name[i / nr_events])) { + pr_debug("cgroup name doesn't match:\n"); + pr_debug(" evsel[%d]: %s\n expected: %s\n", + i, evsel->cgrp->name, cgrp_name[i / nr_events]); + goto out; + } + + if ((i % nr_events) == 0) { + if (evsel__is_group_event(evsel) != was_group_event) { + pr_debug("event group doesn't match: got %s, expect %s\n", + evsel__is_group_event(evsel) ? "true" : "false", + was_group_event ? "true" : "false"); + goto out; + } + if (evsel->core.nr_members != nr_members) { + pr_debug("event group member doesn't match: %d vs %d\n", + evsel->core.nr_members, nr_members); + goto out; + } + } + i++; + } + ret = TEST_OK; + +out: for (i = 0; i < nr_events; i++) + free(ev_name[i]); + free(ev_name); + return ret; +} + +static int expand_default_events(void) +{ + int ret; + struct evlist *evlist; + struct rblist metric_events; + + evlist = perf_evlist__new_default(); + TEST_ASSERT_VAL("failed to get evlist", evlist); + + rblist__init(&metric_events); + ret = test_expand_events(evlist, &metric_events); + evlist__delete(evlist); + return ret; +} + +static int expand_group_events(void) +{ + int ret; + struct evlist *evlist; + struct rblist metric_events; + struct parse_events_error err; + const char event_str[] = "{cycles,instructions}"; + + symbol_conf.event_group = true; + + evlist = evlist__new(); + TEST_ASSERT_VAL("failed to get evlist", evlist); + + ret = parse_events(evlist, event_str, &err); + if (ret < 0) { + pr_debug("failed to parse event '%s', err %d, str '%s'\n", + event_str, ret, err.str); + parse_events_print_error(&err, event_str); + goto out; + } + + rblist__init(&metric_events); + ret = test_expand_events(evlist, &metric_events); +out: + evlist__delete(evlist); + return ret; +} + +static int expand_libpfm_events(void) +{ + int ret; + struct evlist *evlist; + struct rblist metric_events; + const char event_str[] = "UNHALTED_CORE_CYCLES"; + struct option opt = { + .value = &evlist, + }; + + symbol_conf.event_group = true; + + evlist = evlist__new(); + TEST_ASSERT_VAL("failed to get evlist", evlist); + + ret = parse_libpfm_events_option(&opt, event_str, 0); + if (ret < 0) { + pr_debug("failed to parse libpfm event '%s', err %d\n", + event_str, ret); + goto out; + } + if (perf_evlist__empty(evlist)) { + pr_debug("libpfm was not enabled\n"); + goto out; + } + + rblist__init(&metric_events); + ret = test_expand_events(evlist, &metric_events); +out: + evlist__delete(evlist); + return ret; +} + +static int expand_metric_events(void) +{ + int ret; + struct evlist *evlist; + struct rblist metric_events; + const char metric_str[] = "CPI"; + + struct pmu_event pme_test[] = { + { + .metric_expr = "instructions / cycles", + .metric_name = "IPC", + }, + { + .metric_expr = "1 / IPC", + .metric_name = "CPI", + }, + { + .metric_expr = NULL, + .metric_name = NULL, + }, + }; + struct pmu_events_map ev_map = { + .cpuid = "test", + .version = "1", + .type = "core", + .table = pme_test, + }; + + evlist = evlist__new(); + TEST_ASSERT_VAL("failed to get evlist", evlist); + + rblist__init(&metric_events); + ret = metricgroup__parse_groups_test(evlist, &ev_map, metric_str, + false, false, &metric_events); + if (ret < 0) { + pr_debug("failed to parse '%s' metric\n", metric_str); + goto out; + } + + ret = test_expand_events(evlist, &metric_events); + +out: + metricgroup__rblist_exit(&metric_events); + evlist__delete(evlist); + return ret; +} + +int test__expand_cgroup_events(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + int ret; + + ret = expand_default_events(); + TEST_ASSERT_EQUAL("failed to expand default events", ret, 0); + + ret = expand_group_events(); + TEST_ASSERT_EQUAL("failed to expand event group", ret, 0); + + ret = expand_libpfm_events(); + TEST_ASSERT_EQUAL("failed to expand event group", ret, 0); + + ret = expand_metric_events(); + TEST_ASSERT_EQUAL("failed to expand metric events", ret, 0); + + return ret; +} diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 9b651dfe0a6b..a90fa043c066 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -91,6 +91,7 @@ make_no_sdt := NO_SDT=1 make_no_syscall_tbl := NO_SYSCALL_TABLE=1 make_with_clangllvm := LIBCLANGLLVM=1 make_with_libpfm4 := LIBPFM4=1 +make_with_gtk2 := GTK2=1 make_tags := tags make_cscope := cscope make_help := help @@ -154,6 +155,7 @@ run += make_no_syscall_tbl run += make_with_babeltrace run += make_with_clangllvm run += make_with_libpfm4 +run += make_with_gtk2 run += make_help run += make_doc run += make_perf_o diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index aae0fd9045c1..611512f22b34 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -557,6 +557,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); return 0; } @@ -575,6 +576,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); /* cpu/pmu-event/u*/ evsel = evsel__next(evsel); @@ -587,6 +589,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.pinned); return 0; } @@ -1277,6 +1280,49 @@ static int test__pinned_group(struct evlist *evlist) return 0; } +static int test__checkevent_exclusive_modifier(struct evlist *evlist) +{ + struct evsel *evsel = evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip); + TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive); + + return test__checkevent_symbolic_name(evlist); +} + +static int test__exclusive_group(struct evlist *evlist) +{ + struct evsel *evsel, *leader; + + TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->core.nr_entries); + + /* cycles - group leader */ + evsel = leader = evlist__first(evlist); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong group name", !evsel->group_name); + TEST_ASSERT_VAL("wrong leader", evsel->leader == leader); + TEST_ASSERT_VAL("wrong exclusive", evsel->core.attr.exclusive); + + /* cache-misses - can not be pinned, but will go on with the leader */ + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); + + /* branch-misses - ditto */ + evsel = evsel__next(evsel); + TEST_ASSERT_VAL("wrong config", + PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config); + TEST_ASSERT_VAL("wrong exclusive", !evsel->core.attr.exclusive); + + return 0; +} static int test__checkevent_breakpoint_len(struct evlist *evlist) { struct evsel *evsel = evlist__first(evlist); @@ -1765,7 +1811,17 @@ static struct evlist_test test__events[] = { .name = "cycles:k", .check = test__sym_event_dc, .id = 55, - } + }, + { + .name = "instructions:uep", + .check = test__checkevent_exclusive_modifier, + .id = 56, + }, + { + .name = "{cycles,cache-misses,branch-misses}:e", + .check = test__exclusive_group, + .id = 57, + }, }; static struct evlist_test test__events_pmu[] = { diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index cd7331aac3bd..7c1bde01cb50 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -11,8 +11,6 @@ #include "debug.h" #include "expr.h" #include "stat.h" -#include -#include static struct pmu_event pme_test[] = { { @@ -159,6 +157,7 @@ static int __compute_metric(const char *name, struct value *vals, } perf_evlist__set_maps(&evlist->core, cpus, NULL); + runtime_stat__init(&st); /* Parse the metric into metric_events list. */ err = metricgroup__parse_groups_test(evlist, &map, name, @@ -172,7 +171,6 @@ static int __compute_metric(const char *name, struct value *vals, goto out; /* Load the runtime stats with given numbers for events. */ - runtime_stat__init(&st); load_runtime_stat(&st, evlist, vals); /* And execute the metric */ diff --git a/tools/perf/tests/pe-file-parsing.c b/tools/perf/tests/pe-file-parsing.c new file mode 100644 index 000000000000..58b90c42eb38 --- /dev/null +++ b/tools/perf/tests/pe-file-parsing.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "util/build-id.h" +#include "util/symbol.h" +#include "util/dso.h" + +#include "tests.h" + +#ifdef HAVE_LIBBFD_SUPPORT + +static int run_dir(const char *d) +{ + char filename[PATH_MAX]; + char debugfile[PATH_MAX]; + struct build_id bid; + char debuglink[PATH_MAX]; + char expect_build_id[] = { + 0x5a, 0x0f, 0xd8, 0x82, 0xb5, 0x30, 0x84, 0x22, + 0x4b, 0xa4, 0x7b, 0x62, 0x4c, 0x55, 0xa4, 0x69, + }; + char expect_debuglink[PATH_MAX] = "pe-file.exe.debug"; + struct dso *dso; + struct symbol *sym; + int ret; + + scnprintf(filename, PATH_MAX, "%s/pe-file.exe", d); + ret = filename__read_build_id(filename, &bid); + TEST_ASSERT_VAL("Failed to read build_id", + ret == sizeof(expect_build_id)); + TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id, + sizeof(expect_build_id))); + + ret = filename__read_debuglink(filename, debuglink, PATH_MAX); + TEST_ASSERT_VAL("Failed to read debuglink", ret == 0); + TEST_ASSERT_VAL("Wrong debuglink", + !strcmp(debuglink, expect_debuglink)); + + scnprintf(debugfile, PATH_MAX, "%s/%s", d, debuglink); + ret = filename__read_build_id(debugfile, &bid); + TEST_ASSERT_VAL("Failed to read debug file build_id", + ret == sizeof(expect_build_id)); + TEST_ASSERT_VAL("Wrong build_id", !memcmp(bid.data, expect_build_id, + sizeof(expect_build_id))); + + dso = dso__new(filename); + TEST_ASSERT_VAL("Failed to get dso", dso); + + ret = dso__load_bfd_symbols(dso, debugfile); + TEST_ASSERT_VAL("Failed to load symbols", ret == 0); + + dso__sort_by_name(dso); + sym = dso__find_symbol_by_name(dso, "main"); + TEST_ASSERT_VAL("Failed to find main", sym); + dso__delete(dso); + + return TEST_OK; +} + +int test__pe_file_parsing(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + struct stat st; + char path_dir[PATH_MAX]; + + /* First try development tree tests. */ + if (!lstat("./tests", &st)) + return run_dir("./tests"); + + /* Then installed path. */ + snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path()); + + if (!lstat(path_dir, &st)) + return run_dir(path_dir); + + return TEST_SKIP; +} + +#else + +int test__pe_file_parsing(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + return TEST_SKIP; +} + +#endif diff --git a/tools/perf/tests/pe-file.c b/tools/perf/tests/pe-file.c new file mode 100644 index 000000000000..eb3df5e9886f --- /dev/null +++ b/tools/perf/tests/pe-file.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +// pe-file.exe and pe-file.exe.debug built with; +// x86_64-w64-mingw32-gcc -o pe-file.exe pe-file.c +// -Wl,--file-alignment,4096 -Wl,--build-id +// x86_64-w64-mingw32-objcopy --only-keep-debug +// --compress-debug-sections pe-file.exe pe-file.exe.debug +// x86_64-w64-mingw32-objcopy --strip-debug +// --add-gnu-debuglink=pe-file.exe.debug pe-file.exe + +int main(int argc, char const *argv[]) +{ + return 0; +} diff --git a/tools/perf/tests/pe-file.exe b/tools/perf/tests/pe-file.exe new file mode 100644 index 000000000000..838a46dae724 Binary files /dev/null and b/tools/perf/tests/pe-file.exe differ diff --git a/tools/perf/tests/pe-file.exe.debug b/tools/perf/tests/pe-file.exe.debug new file mode 100644 index 000000000000..287d6718d6c9 Binary files /dev/null and b/tools/perf/tests/pe-file.exe.debug differ diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c index 40ab72149ce1..98c6d474aa6f 100644 --- a/tools/perf/tests/python-use.c +++ b/tools/perf/tests/python-use.c @@ -18,6 +18,7 @@ int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unuse PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0) return -1; + pr_debug("python usage test: \"%s\"\n", cmd); ret = system(cmd) ? -1 : 0; free(cmd); return ret; diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 60f0e9ee04fb..ed76c693f65e 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c @@ -28,16 +28,16 @@ static int target_function(void) static int build_id_cache__add_file(const char *filename) { char sbuild_id[SBUILD_ID_SIZE]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int err; - err = filename__read_build_id(filename, &build_id, sizeof(build_id)); + err = filename__read_build_id(filename, &bid); if (err < 0) { pr_debug("Failed to read build id of %s\n", filename); return err; } - build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + build_id__sprintf(&bid, sbuild_id); err = build_id_cache__add_s(sbuild_id, filename, NULL, false, false); if (err < 0) pr_debug("Failed to add build id cache of %s\n", filename); diff --git a/tools/perf/tests/shell/buildid.sh b/tools/perf/tests/shell/buildid.sh new file mode 100755 index 000000000000..4861a20edee2 --- /dev/null +++ b/tools/perf/tests/shell/buildid.sh @@ -0,0 +1,101 @@ +#!/bin/sh +# build id cache operations +# SPDX-License-Identifier: GPL-2.0 + +# skip if there's no readelf +if ! [ -x "$(command -v readelf)" ]; then + echo "failed: no readelf, install binutils" + exit 2 +fi + +# skip if there's no compiler +if ! [ -x "$(command -v cc)" ]; then + echo "failed: no compiler, install gcc" + exit 2 +fi + +ex_md5=$(mktemp /tmp/perf.ex.MD5.XXX) +ex_sha1=$(mktemp /tmp/perf.ex.SHA1.XXX) + +echo 'int main(void) { return 0; }' | cc -Wl,--build-id=sha1 -o ${ex_sha1} -x c - +echo 'int main(void) { return 0; }' | cc -Wl,--build-id=md5 -o ${ex_md5} -x c - + +echo "test binaries: ${ex_sha1} ${ex_md5}" + +check() +{ + id=`readelf -n ${1} 2>/dev/null | grep 'Build ID' | awk '{print $3}'` + + echo "build id: ${id}" + + link=${build_id_dir}/.build-id/${id:0:2}/${id:2} + echo "link: ${link}" + + if [ ! -h $link ]; then + echo "failed: link ${link} does not exist" + exit 1 + fi + + file=${build_id_dir}/.build-id/${id:0:2}/`readlink ${link}`/elf + echo "file: ${file}" + + if [ ! -x $file ]; then + echo "failed: file ${file} does not exist" + exit 1 + fi + + diff ${file} ${1} + if [ $? -ne 0 ]; then + echo "failed: ${file} do not match" + exit 1 + fi + + echo "OK for ${1}" +} + +test_add() +{ + build_id_dir=$(mktemp -d /tmp/perf.debug.XXX) + perf="perf --buildid-dir ${build_id_dir}" + + ${perf} buildid-cache -v -a ${1} + if [ $? -ne 0 ]; then + echo "failed: add ${1} to build id cache" + exit 1 + fi + + check ${1} + + rm -rf ${build_id_dir} +} + +test_record() +{ + data=$(mktemp /tmp/perf.data.XXX) + build_id_dir=$(mktemp -d /tmp/perf.debug.XXX) + perf="perf --buildid-dir ${build_id_dir}" + + ${perf} record --buildid-all -o ${data} ${1} + if [ $? -ne 0 ]; then + echo "failed: record ${1}" + exit 1 + fi + + check ${1} + + rm -rf ${build_id_dir} + rm -rf ${data} +} + +# add binaries manual via perf buildid-cache -a +test_add ${ex_sha1} +test_add ${ex_md5} + +# add binaries via perf record post processing +test_record ${ex_sha1} +test_record ${ex_md5} + +# cleanup +rm ${ex_sha1} ${ex_md5} + +exit ${err} diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh new file mode 100755 index 000000000000..8d84fdbed6a6 --- /dev/null +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -0,0 +1,183 @@ +#!/bin/sh +# Check Arm CoreSight trace data recording and synthesized samples + +# Uses the 'perf record' to record trace data with Arm CoreSight sinks; +# then verify if there have any branch samples and instruction samples +# are generated by CoreSight with 'perf script' and 'perf report' +# commands. + +# SPDX-License-Identifier: GPL-2.0 +# Leo Yan , 2020 + +perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +file=$(mktemp /tmp/temporary_file.XXXXX) + +skip_if_no_cs_etm_event() { + perf list | grep -q 'cs_etm//' && return 0 + + # cs_etm event doesn't exist + return 2 +} + +skip_if_no_cs_etm_event || exit 2 + +cleanup_files() +{ + rm -f ${perfdata} + rm -f ${file} +} + +trap cleanup_files exit + +record_touch_file() { + echo "Recording trace (only user mode) with path: CPU$2 => $1" + rm -f $file + perf record -o ${perfdata} -e cs_etm/@$1/u --per-thread \ + -- taskset -c $2 touch $file +} + +perf_script_branch_samples() { + echo "Looking at perf.data file for dumping branch samples:" + + # Below is an example of the branch samples dumping: + # touch 6512 1 branches:u: ffffb220824c strcmp+0xc (/lib/aarch64-linux-gnu/ld-2.27.so) + # touch 6512 1 branches:u: ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so) + # touch 6512 1 branches:u: ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so) + perf script -F,-time -i ${perfdata} | \ + egrep " +$1 +[0-9]+ .* +branches:([u|k]:)? +" +} + +perf_report_branch_samples() { + echo "Looking at perf.data file for reporting branch samples:" + + # Below is an example of the branch samples reporting: + # 73.04% 73.04% touch libc-2.27.so [.] _dl_addr + # 7.71% 7.71% touch libc-2.27.so [.] getenv + # 2.59% 2.59% touch ld-2.27.so [.] strcmp + perf report --stdio -i ${perfdata} | \ + egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " +} + +perf_report_instruction_samples() { + echo "Looking at perf.data file for instruction samples:" + + # Below is an example of the instruction samples reporting: + # 68.12% touch libc-2.27.so [.] _dl_addr + # 5.80% touch libc-2.27.so [.] getenv + # 4.35% touch ld-2.27.so [.] _dl_fixup + perf report --itrace=i1000i --stdio -i ${perfdata} | \ + egrep " +[0-9]+\.[0-9]+% +$1" +} + +is_device_sink() { + # If the node of "enable_sink" is existed under the device path, this + # means the device is a sink device. Need to exclude 'tpiu' since it + # cannot support perf PMU. + echo "$1" | egrep -q -v "tpiu" + + if [ $? -eq 0 -a -e "$1/enable_sink" ]; then + + pmu_dev="/sys/bus/event_source/devices/cs_etm/sinks/$2" + + # Warn if the device is not supported by PMU + if ! [ -f $pmu_dev ]; then + echo "PMU doesn't support $pmu_dev" + fi + + return 0 + fi + + # Otherwise, it's not a sink device + return 1 +} + +arm_cs_iterate_devices() { + for dev in $1/connections/out\:*; do + + # Skip testing if it's not a directory + ! [ -d $dev ] && continue; + + # Read out its symbol link file name + path=`readlink -f $dev` + + # Extract device name from path, e.g. + # path = '/sys/devices/platform/20010000.etf/tmc_etf0' + # `> device_name = 'tmc_etf0' + device_name=$(basename $path) + + if is_device_sink $path $devce_name; then + + record_touch_file $device_name $2 && + perf_script_branch_samples touch && + perf_report_branch_samples touch && + perf_report_instruction_samples touch + + err=$? + + # Exit when find failure + [ $err != 0 ] && exit $err + fi + + arm_cs_iterate_devices $dev $2 + done +} + +arm_cs_etm_traverse_path_test() { + # Iterate for every ETM device + for dev in /sys/bus/coresight/devices/etm*; do + + # Find the ETM device belonging to which CPU + cpu=`cat $dev/cpu` + + echo $dev + echo $cpu + + # Use depth-first search (DFS) to iterate outputs + arm_cs_iterate_devices $dev $cpu + done +} + +arm_cs_etm_system_wide_test() { + echo "Recording trace with system wide mode" + perf record -o ${perfdata} -e cs_etm// -a -- ls + + perf_script_branch_samples perf && + perf_report_branch_samples perf && + perf_report_instruction_samples perf + + err=$? + + # Exit when find failure + [ $err != 0 ] && exit $err +} + +arm_cs_etm_snapshot_test() { + echo "Recording trace with snapshot mode" + perf record -o ${perfdata} -e cs_etm// -S \ + -- dd if=/dev/zero of=/dev/null & + PERFPID=$! + + # Wait for perf program + sleep 1 + + # Send signal to snapshot trace data + kill -USR2 $PERFPID + + # Stop perf program + kill $PERFPID + wait $PERFPID + + perf_script_branch_samples dd && + perf_report_branch_samples dd && + perf_report_instruction_samples dd + + err=$? + + # Exit when find failure + [ $err != 0 ] && exit $err +} + +arm_cs_etm_traverse_path_test +arm_cs_etm_system_wide_test +arm_cs_etm_snapshot_test +exit 0 diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 4447a516c689..c85a2c08e407 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -122,6 +122,8 @@ int test__pfm(struct test *test, int subtest); const char *test__pfm_subtest_get_desc(int subtest); int test__pfm_subtest_get_nr(void); int test__parse_metric(struct test *test, int subtest); +int test__pe_file_parsing(struct test *test, int subtest); +int test__expand_cgroup_events(struct test *test, int subtest); bool test__bp_signal_is_supported(void); bool test__bp_account_is_supported(void); diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 862c8331dded..3c5e97b93dd5 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -1,40 +1,28 @@ // SPDX-License-Identifier: LGPL-2.1 -#include #include -static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, - struct syscall_arg *arg) +#include "trace/beauty/generated/mmap_prot_array.c" +static DEFINE_STRARRAY(mmap_prot, "PROT_"); + +static size_t mmap__scnprintf_prot(unsigned long prot, char *bf, size_t size, bool show_prefix) { - const char *prot_prefix = "PROT_"; - int printed = 0, prot = arg->val; - bool show_prefix = arg->show_string_prefix; + return strarray__scnprintf_flags(&strarray__mmap_prot, bf, size, show_prefix, prot); +} - if (prot == PROT_NONE) - return scnprintf(bf, size, "%sNONE", show_prefix ? prot_prefix : ""); -#define P_MMAP_PROT(n) \ - if (prot & PROT_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prot_prefix :"", #n); \ - prot &= ~PROT_##n; \ - } +static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long prot = arg->val; - P_MMAP_PROT(READ); - P_MMAP_PROT(WRITE); - P_MMAP_PROT(EXEC); - P_MMAP_PROT(SEM); - P_MMAP_PROT(GROWSDOWN); - P_MMAP_PROT(GROWSUP); -#undef P_MMAP_PROT + if (prot == 0) + return scnprintf(bf, size, "%sNONE", arg->show_string_prefix ? strarray__mmap_prot.prefix : ""); - if (prot) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot); - - return printed; + return mmap__scnprintf_prot(prot, bf, size, arg->show_string_prefix); } #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot #include "trace/beauty/generated/mmap_flags_array.c" - static DEFINE_STRARRAY(mmap_flags, "MAP_"); +static DEFINE_STRARRAY(mmap_flags, "MAP_"); static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) { @@ -54,28 +42,22 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags -static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, - struct syscall_arg *arg) +#include "trace/beauty/generated/mremap_flags_array.c" +static DEFINE_STRARRAY(mremap_flags, "MREMAP_"); + +static size_t mremap__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix) { - const char *flags_prefix = "MREMAP_"; - bool show_prefix = arg->show_string_prefix; - int printed = 0, flags = arg->val; + return strarray__scnprintf_flags(&strarray__mremap_flags, bf, size, show_prefix, flags); +} -#define P_MREMAP_FLAG(n) \ - if (flags & MREMAP_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? flags_prefix : "", #n); \ - flags &= ~MREMAP_##n; \ - } +static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; - P_MREMAP_FLAG(MAYMOVE); - P_MREMAP_FLAG(FIXED); - P_MREMAP_FLAG(DONTUNMAP); -#undef P_MREMAP_FLAG + if (!(flags & MREMAP_FIXED)) + arg->mask |= (1 << 5); /* Mask 5th ('new_address') args, ignored */ - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; + return mremap__scnprintf_flags(flags, bf, size, arg->show_string_prefix); } #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh index 5f5eefcb3c74..39eb2595983b 100755 --- a/tools/perf/trace/beauty/mmap_flags.sh +++ b/tools/perf/trace/beauty/mmap_flags.sh @@ -21,20 +21,20 @@ printf "static const char *mmap_flags[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' egrep -q $regex ${arch_mman} && \ (egrep $regex ${arch_mman} | \ - sed -r "s/$regex/\2 \1/g" | \ - xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") + sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n") egrep -q $regex ${linux_mman} && \ (egrep $regex ${linux_mman} | \ egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \ - sed -r "s/$regex/\2 \1/g" | \ - xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") + sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n") ([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*' ${arch_mman}) && (egrep $regex ${header_dir}/mman.h | \ - sed -r "s/$regex/\2 \1/g" | \ - xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") + sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n") printf "};\n" diff --git a/tools/perf/trace/beauty/mmap_prot.sh b/tools/perf/trace/beauty/mmap_prot.sh new file mode 100755 index 000000000000..28f638f8d216 --- /dev/null +++ b/tools/perf/trace/beauty/mmap_prot.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 2 ] ; then + [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/` + asm_header_dir=tools/include/uapi/asm-generic + arch_header_dir=tools/arch/${hostarch}/include/uapi/asm +else + asm_header_dir=$1 + arch_header_dir=$2 +fi + +common_mman=${asm_header_dir}/mman-common.h +arch_mman=${arch_header_dir}/mman.h + +prefix="PROT" + +printf "static const char *mmap_prot[] = {\n" +regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+%s_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' ${prefix}` +([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+has_build_id) { - build_id__sprintf(dso->build_id, - sizeof(dso->build_id), bf + 15); + build_id__sprintf(&dso->bid, bf + 15); build_id_msg = bf; } scnprintf(buf, buflen, @@ -3127,6 +3126,8 @@ static int annotation__config(const char *var, const char *value, void *data) value); } else if (!strcmp(var, "annotate.use_offset")) { opt->use_offset = perf_config_bool("use_offset", value); + } else if (!strcmp(var, "annotate.disassembler_style")) { + opt->disassembler_style = value; } else { pr_debug("%s variable unknown, ignoring...", var); } diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 31207b6e2066..8763772f1095 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -37,6 +37,7 @@ #include #include +#include static bool no_buildid_cache; @@ -95,13 +96,13 @@ struct perf_tool build_id__mark_dso_hit_ops = { .ordered_events = true, }; -int build_id__sprintf(const u8 *build_id, int len, char *bf) +int build_id__sprintf(const struct build_id *build_id, char *bf) { char *bid = bf; - const u8 *raw = build_id; - int i; + const u8 *raw = build_id->data; + size_t i; - for (i = 0; i < len; ++i) { + for (i = 0; i < build_id->size; ++i) { sprintf(bid, "%02x", *raw); ++raw; bid += 2; @@ -113,7 +114,7 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) { char notes[PATH_MAX]; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int ret; if (!root_dir) @@ -121,25 +122,23 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); - ret = sysfs__read_build_id(notes, build_id, sizeof(build_id)); + ret = sysfs__read_build_id(notes, &bid); if (ret < 0) return ret; - return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + return build_id__sprintf(&bid, sbuild_id); } int filename__sprintf_build_id(const char *pathname, char *sbuild_id) { - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; int ret; - ret = filename__read_build_id(pathname, build_id, sizeof(build_id)); + ret = filename__read_build_id(pathname, &bid); if (ret < 0) return ret; - else if (ret != sizeof(build_id)) - return -EINVAL; - return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); + return build_id__sprintf(&bid, sbuild_id); } /* asnprintf consolidates asprintf and snprintf */ @@ -272,7 +271,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, if (!dso->has_build_id) return NULL; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); linkname = build_id_cache__linkname(sbuild_id, NULL, 0); if (!linkname) return NULL; @@ -297,7 +296,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size, continue; \ else -static int write_buildid(const char *name, size_t name_len, u8 *build_id, +static int write_buildid(const char *name, size_t name_len, struct build_id *bid, pid_t pid, u16 misc, struct feat_fd *fd) { int err; @@ -308,7 +307,9 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id, len = PERF_ALIGN(len, NAME_ALIGN); memset(&b, 0, sizeof(b)); - memcpy(&b.build_id, build_id, BUILD_ID_SIZE); + memcpy(&b.data, bid->data, bid->size); + b.size = (u8) bid->size; + misc |= PERF_RECORD_MISC_BUILD_ID_SIZE; b.pid = pid; b.header.misc = misc; b.header.size = sizeof(b) + len; @@ -355,7 +356,7 @@ static int machine__write_buildid_table(struct machine *machine, in_kernel = pos->kernel || is_kernel_module(name, PERF_RECORD_MISC_CPUMODE_UNKNOWN); - err = write_buildid(name, name_len, pos->build_id, machine->pid, + err = write_buildid(name, name_len, &pos->bid, machine->pid, in_kernel ? kmisc : umisc, fd); if (err) break; @@ -769,13 +770,13 @@ out_free: return err; } -static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, +static int build_id_cache__add_b(const struct build_id *bid, const char *name, struct nsinfo *nsi, bool is_kallsyms, bool is_vdso) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(build_id, build_id_size, sbuild_id); + build_id__sprintf(bid, sbuild_id); return build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms, is_vdso); @@ -841,8 +842,8 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine) is_kallsyms = true; name = machine->mmap_name; } - return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, - dso->nsinfo, is_kallsyms, is_vdso); + return build_id_cache__add_b(&dso->bid, name, dso->nsinfo, + is_kallsyms, is_vdso); } static int __dsos__cache_build_ids(struct list_head *head, @@ -902,3 +903,10 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) return ret; } + +void build_id__init(struct build_id *bid, const u8 *data, size_t size) +{ + WARN_ON(size > BUILD_ID_SIZE); + memcpy(bid->data, data, size); + bid->size = size; +} diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index aad419bb165c..f293f99d5dba 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -8,13 +8,19 @@ #include "tool.h" #include +struct build_id { + u8 data[BUILD_ID_SIZE]; + size_t size; +}; + struct nsinfo; extern struct perf_tool build_id__mark_dso_hit_ops; struct dso; struct feat_fd; -int build_id__sprintf(const u8 *build_id, int len, char *bf); +void build_id__init(struct build_id *bid, const u8 *data, size_t size); +int build_id__sprintf(const struct build_id *build_id, char *bf); int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf, @@ -29,6 +35,10 @@ int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, int dsos__hit_all(struct perf_session *session); +int perf_event__inject_buildid(struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, struct evsel *evsel, + struct machine *machine); + bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, struct feat_fd *fd); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 2775b752f2fa..1b60985690bb 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1613,3 +1613,102 @@ void callchain_param_setup(u64 sample_type) callchain_param.record_mode = CALLCHAIN_FP; } } + +static bool chain_match(struct callchain_list *base_chain, + struct callchain_list *pair_chain) +{ + enum match_result match; + + match = match_chain_strings(base_chain->srcline, + pair_chain->srcline); + if (match != MATCH_ERROR) + return match == MATCH_EQ; + + match = match_chain_dso_addresses(base_chain->ms.map, + base_chain->ip, + pair_chain->ms.map, + pair_chain->ip); + + return match == MATCH_EQ; +} + +bool callchain_cnode_matched(struct callchain_node *base_cnode, + struct callchain_node *pair_cnode) +{ + struct callchain_list *base_chain, *pair_chain; + bool match = false; + + pair_chain = list_first_entry(&pair_cnode->val, + struct callchain_list, + list); + + list_for_each_entry(base_chain, &base_cnode->val, list) { + if (&pair_chain->list == &pair_cnode->val) + return false; + + if (!base_chain->srcline || !pair_chain->srcline) { + pair_chain = list_next_entry(pair_chain, list); + continue; + } + + match = chain_match(base_chain, pair_chain); + if (!match) + return false; + + pair_chain = list_next_entry(pair_chain, list); + } + + /* + * Say chain1 is ABC, chain2 is ABCD, we consider they are + * not fully matched. + */ + if (pair_chain && (&pair_chain->list != &pair_cnode->val)) + return false; + + return match; +} + +static u64 count_callchain_hits(struct hist_entry *he) +{ + struct rb_root *root = &he->sorted_chain; + struct rb_node *rb_node = rb_first(root); + struct callchain_node *node; + u64 chain_hits = 0; + + while (rb_node) { + node = rb_entry(rb_node, struct callchain_node, rb_node); + chain_hits += node->hit; + rb_node = rb_next(rb_node); + } + + return chain_hits; +} + +u64 callchain_total_hits(struct hists *hists) +{ + struct rb_node *next = rb_first_cached(&hists->entries); + u64 chain_hits = 0; + + while (next) { + struct hist_entry *he = rb_entry(next, struct hist_entry, + rb_node); + + chain_hits += count_callchain_hits(he); + next = rb_next(&he->rb_node); + } + + return chain_hits; +} + +s64 callchain_avg_cycles(struct callchain_node *cnode) +{ + struct callchain_list *chain; + s64 cycles = 0; + + list_for_each_entry(chain, &cnode->val, list) { + if (chain->srcline && chain->branch_count) + cycles += chain->cycles_count / chain->branch_count; + } + + return cycles; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index fe36a9e5ccd1..5824134f983b 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -13,6 +13,7 @@ struct ip_callchain; struct map; struct perf_sample; struct thread; +struct hists; #define HELP_PAD "\t\t\t\t" @@ -298,4 +299,12 @@ int callchain_branch_counts(struct callchain_root *root, u64 *abort_count, u64 *cycles_count); void callchain_param_setup(u64 sample_type); + +bool callchain_cnode_matched(struct callchain_node *base_cnode, + struct callchain_node *pair_cnode); + +u64 callchain_total_hits(struct hists *hists); + +s64 callchain_avg_cycles(struct callchain_node *cnode); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 050dea9f1e88..b81324a13a2b 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -3,6 +3,9 @@ #include "evsel.h" #include "cgroup.h" #include "evlist.h" +#include "rblist.h" +#include "metricgroup.h" +#include "stat.h" #include #include #include @@ -48,7 +51,7 @@ static struct cgroup *evlist__find_cgroup(struct evlist *evlist, const char *str return NULL; } -static struct cgroup *cgroup__new(const char *name) +static struct cgroup *cgroup__new(const char *name, bool do_open) { struct cgroup *cgroup = zalloc(sizeof(*cgroup)); @@ -58,9 +61,14 @@ static struct cgroup *cgroup__new(const char *name) cgroup->name = strdup(name); if (!cgroup->name) goto out_err; - cgroup->fd = open_cgroup(name); - if (cgroup->fd == -1) - goto out_free_name; + + if (do_open) { + cgroup->fd = open_cgroup(name); + if (cgroup->fd == -1) + goto out_free_name; + } else { + cgroup->fd = -1; + } } return cgroup; @@ -76,7 +84,7 @@ struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name) { struct cgroup *cgroup = evlist__find_cgroup(evlist, name); - return cgroup ?: cgroup__new(name); + return cgroup ?: cgroup__new(name, true); } static int add_cgroup(struct evlist *evlist, const char *str) @@ -193,6 +201,103 @@ int parse_cgroups(const struct option *opt, const char *str, return 0; } +int evlist__expand_cgroup(struct evlist *evlist, const char *str, + struct rblist *metric_events, bool open_cgroup) +{ + struct evlist *orig_list, *tmp_list; + struct evsel *pos, *evsel, *leader; + struct rblist orig_metric_events; + struct cgroup *cgrp = NULL; + const char *p, *e, *eos = str + strlen(str); + int ret = -1; + + if (evlist->core.nr_entries == 0) { + fprintf(stderr, "must define events before cgroups\n"); + return -EINVAL; + } + + orig_list = evlist__new(); + tmp_list = evlist__new(); + if (orig_list == NULL || tmp_list == NULL) { + fprintf(stderr, "memory allocation failed\n"); + return -ENOMEM; + } + + /* save original events and init evlist */ + perf_evlist__splice_list_tail(orig_list, &evlist->core.entries); + evlist->core.nr_entries = 0; + + if (metric_events) { + orig_metric_events = *metric_events; + rblist__init(metric_events); + } else { + rblist__init(&orig_metric_events); + } + + for (;;) { + p = strchr(str, ','); + e = p ? p : eos; + + /* allow empty cgroups, i.e., skip */ + if (e - str) { + /* termination added */ + char *name = strndup(str, e - str); + if (!name) + goto out_err; + + cgrp = cgroup__new(name, open_cgroup); + free(name); + if (cgrp == NULL) + goto out_err; + } else { + cgrp = NULL; + } + + leader = NULL; + evlist__for_each_entry(orig_list, pos) { + evsel = evsel__clone(pos); + if (evsel == NULL) + goto out_err; + + cgroup__put(evsel->cgrp); + evsel->cgrp = cgroup__get(cgrp); + + if (evsel__is_group_leader(pos)) + leader = evsel; + evsel->leader = leader; + + evlist__add(tmp_list, evsel); + } + /* cgroup__new() has a refcount, release it here */ + cgroup__put(cgrp); + nr_cgroups++; + + if (metric_events) { + perf_stat__collect_metric_expr(tmp_list); + if (metricgroup__copy_metric_events(tmp_list, cgrp, + metric_events, + &orig_metric_events) < 0) + break; + } + + perf_evlist__splice_list_tail(evlist, &tmp_list->core.entries); + tmp_list->core.nr_entries = 0; + + if (!p) { + ret = 0; + break; + } + str = p+1; + } + +out_err: + evlist__delete(orig_list); + evlist__delete(tmp_list); + rblist__exit(&orig_metric_events); + + return ret; +} + static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id, bool create, const char *path) { diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index e98d5975fe55..162906f3412a 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -22,8 +22,11 @@ struct cgroup *cgroup__get(struct cgroup *cgroup); void cgroup__put(struct cgroup *cgroup); struct evlist; +struct rblist; struct cgroup *evlist__findnew_cgroup(struct evlist *evlist, const char *name); +int evlist__expand_cgroup(struct evlist *evlist, const char *cgroups, + struct rblist *metric_events, bool open_cgroup); void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 20be0504fb95..6969f82843ee 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -489,7 +489,7 @@ int perf_default_config(const char *var, const char *value, return 0; } -static int perf_config_from_file(config_fn_t fn, const char *filename, void *data) +int perf_config_from_file(config_fn_t fn, const char *filename, void *data) { int ret; FILE *f = fopen(filename, "r"); diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index c10b66dde2f3..8c881e3a3ec3 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -26,6 +26,8 @@ struct perf_config_set { extern const char *config_exclusive_filename; typedef int (*config_fn_t)(const char *, const char *, void *); + +int perf_config_from_file(config_fn_t fn, const char *filename, void *data); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); int perf_config_int(int *dest, const char *, const char *); diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 5a3b4755f0b3..55c11e854fe4 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -172,9 +172,7 @@ int dso__read_binary_type_filename(const struct dso *dso, break; } - build_id__sprintf(dso->build_id, - sizeof(dso->build_id), - build_id_hex); + build_id__sprintf(&dso->bid, build_id_hex); len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/"); snprintf(filename + len, size - len, "%.2s/%s.debug", build_id_hex, build_id_hex + 2); @@ -1328,15 +1326,16 @@ void dso__put(struct dso *dso) dso__delete(dso); } -void dso__set_build_id(struct dso *dso, void *build_id) +void dso__set_build_id(struct dso *dso, struct build_id *bid) { - memcpy(dso->build_id, build_id, sizeof(dso->build_id)); + dso->bid = *bid; dso->has_build_id = 1; } -bool dso__build_id_equal(const struct dso *dso, u8 *build_id) +bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) { - return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0; + return dso->bid.size == bid->size && + memcmp(dso->bid.data, bid->data, dso->bid.size) == 0; } void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) @@ -1346,8 +1345,7 @@ void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) if (machine__is_default_guest(machine)) return; sprintf(path, "%s/sys/kernel/notes", machine->root_dir); - if (sysfs__read_build_id(path, dso->build_id, - sizeof(dso->build_id)) == 0) + if (sysfs__read_build_id(path, &dso->bid) == 0) dso->has_build_id = true; } @@ -1365,18 +1363,17 @@ int dso__kernel_module_get_build_id(struct dso *dso, "%s/sys/module/%.*s/notes/.note.gnu.build-id", root_dir, (int)strlen(name) - 1, name); - if (sysfs__read_build_id(filename, dso->build_id, - sizeof(dso->build_id)) == 0) + if (sysfs__read_build_id(filename, &dso->bid) == 0) dso->has_build_id = true; return 0; } -size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) +static size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); return fprintf(fp, "%s", sbuild_id); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 8ad17f395a19..d8cb4f5680a4 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -176,7 +176,7 @@ struct dso { bool sorted_by_name; bool loaded; u8 rel; - u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; u64 text_offset; const char *short_name; const char *long_name; @@ -260,8 +260,8 @@ bool dso__sorted_by_name(const struct dso *dso); void dso__set_sorted_by_name(struct dso *dso); void dso__sort_by_name(struct dso *dso); -void dso__set_build_id(struct dso *dso, void *build_id); -bool dso__build_id_equal(const struct dso *dso, u8 *build_id); +void dso__set_build_id(struct dso *dso, struct build_id *bid); +bool dso__build_id_equal(const struct dso *dso, struct build_id *bid); void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine); int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir); @@ -362,7 +362,6 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, void dso__reset_find_symbol_cache(struct dso *dso); -size_t dso__fprintf_buildid(struct dso *dso, FILE *fp); size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp); size_t dso__fprintf(struct dso *dso, FILE *fp); diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 939471731ea6..183a81d5b2f9 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -73,8 +73,7 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) continue; } nsinfo__mountns_enter(pos->nsinfo, &nsc); - if (filename__read_build_id(pos->long_name, pos->build_id, - sizeof(pos->build_id)) > 0) { + if (filename__read_build_id(pos->long_name, &pos->bid) > 0) { have_build_id = true; pos->has_build_id = true; } @@ -288,10 +287,12 @@ size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, size_t ret = 0; list_for_each_entry(pos, head, node) { + char sbuild_id[SBUILD_ID_SIZE]; + if (skip && skip(pos, parm)) continue; - ret += dso__fprintf_buildid(pos, fp); - ret += fprintf(fp, " %s\n", pos->long_name); + build_id__sprintf(&pos->bid, sbuild_id); + ret += fprintf(fp, "%-40s %s\n", sbuild_id, pos->long_name); } return ret; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 317a26571845..05616d4138a9 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -398,7 +398,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) if (event->header.type == PERF_RECORD_SWITCH) return fprintf(fp, " %s\n", in_out); - return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n", + return fprintf(fp, " %s %s pid/tid: %5d/%-5d\n", in_out, out ? "next" : "prev", event->context_switch.next_prev_pid, event->context_switch.next_prev_tid); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c0768c61eb43..8bdf3d2c907c 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1732,6 +1732,91 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list, return leader; } +static int evlist__parse_control_fifo(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close) +{ + char *s, *p; + int ret = 0, fd; + + if (strncmp(str, "fifo:", 5)) + return -EINVAL; + + str += 5; + if (!*str || *str == ',') + return -EINVAL; + + s = strdup(str); + if (!s) + return -ENOMEM; + + p = strchr(s, ','); + if (p) + *p = '\0'; + + /* + * O_RDWR avoids POLLHUPs which is necessary to allow the other + * end of a FIFO to be repeatedly opened and closed. + */ + fd = open(s, O_RDWR | O_NONBLOCK | O_CLOEXEC); + if (fd < 0) { + pr_err("Failed to open '%s'\n", s); + ret = -errno; + goto out_free; + } + *ctl_fd = fd; + *ctl_fd_close = true; + + if (p && *++p) { + /* O_RDWR | O_NONBLOCK means the other end need not be open */ + fd = open(p, O_RDWR | O_NONBLOCK | O_CLOEXEC); + if (fd < 0) { + pr_err("Failed to open '%s'\n", p); + ret = -errno; + goto out_free; + } + *ctl_fd_ack = fd; + } + +out_free: + free(s); + return ret; +} + +int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close) +{ + char *comma = NULL, *endptr = NULL; + + *ctl_fd_close = false; + + if (strncmp(str, "fd:", 3)) + return evlist__parse_control_fifo(str, ctl_fd, ctl_fd_ack, ctl_fd_close); + + *ctl_fd = strtoul(&str[3], &endptr, 0); + if (endptr == &str[3]) + return -EINVAL; + + comma = strchr(str, ','); + if (comma) { + if (endptr != comma) + return -EINVAL; + + *ctl_fd_ack = strtoul(comma + 1, &endptr, 0); + if (endptr == comma + 1 || *endptr != '\0') + return -EINVAL; + } + + return 0; +} + +void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close) +{ + if (*ctl_fd_close) { + *ctl_fd_close = false; + close(ctl_fd); + if (ctl_fd_ack >= 0) + close(ctl_fd_ack); + } +} + int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack) { if (fd == -1) { @@ -1783,6 +1868,7 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, char c; size_t bytes_read = 0; + *cmd = EVLIST_CTL_CMD_UNSUPPORTED; memset(cmd_data, 0, data_size); data_size--; @@ -1794,30 +1880,39 @@ static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd, cmd_data[bytes_read++] = c; if (bytes_read == data_size) break; - } else { - if (err == -1) + continue; + } else if (err == -1) { + if (errno == EINTR) + continue; + if (errno == EAGAIN || errno == EWOULDBLOCK) + err = 0; + else pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd); - break; } + break; } while (1); pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data, bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0"); - if (err > 0) { + if (bytes_read > 0) { if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG, (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) { *cmd = EVLIST_CTL_CMD_ENABLE; } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG, (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) { *cmd = EVLIST_CTL_CMD_DISABLE; + } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_SNAPSHOT_TAG, + (sizeof(EVLIST_CTL_CMD_SNAPSHOT_TAG)-1))) { + *cmd = EVLIST_CTL_CMD_SNAPSHOT; + pr_debug("is snapshot\n"); } } - return err; + return bytes_read ? (int)bytes_read : err; } -static int evlist__ctlfd_ack(struct evlist *evlist) +int evlist__ctlfd_ack(struct evlist *evlist) { int err; @@ -1853,13 +1948,16 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) case EVLIST_CTL_CMD_DISABLE: evlist__disable(evlist); break; + case EVLIST_CTL_CMD_SNAPSHOT: + break; case EVLIST_CTL_CMD_ACK: case EVLIST_CTL_CMD_UNSUPPORTED: default: pr_debug("ctlfd: unsupported %d\n", *cmd); break; } - if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED)) + if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED || + *cmd == EVLIST_CTL_CMD_SNAPSHOT)) evlist__ctlfd_ack(evlist); } } @@ -1871,3 +1969,14 @@ int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd) return err; } + +struct evsel *evlist__find_evsel(struct evlist *evlist, int idx) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->idx == idx) + return evsel; + } + return NULL; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c73f7f7f120b..e1a450322bc5 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -363,6 +363,7 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist, #define EVLIST_CTL_CMD_ENABLE_TAG "enable" #define EVLIST_CTL_CMD_DISABLE_TAG "disable" #define EVLIST_CTL_CMD_ACK_TAG "ack\n" +#define EVLIST_CTL_CMD_SNAPSHOT_TAG "snapshot" #define EVLIST_CTL_CMD_MAX_LEN 64 @@ -370,15 +371,20 @@ enum evlist_ctl_cmd { EVLIST_CTL_CMD_UNSUPPORTED = 0, EVLIST_CTL_CMD_ENABLE, EVLIST_CTL_CMD_DISABLE, - EVLIST_CTL_CMD_ACK + EVLIST_CTL_CMD_ACK, + EVLIST_CTL_CMD_SNAPSHOT, }; +int evlist__parse_control(const char *str, int *ctl_fd, int *ctl_fd_ack, bool *ctl_fd_close); +void evlist__close_control(int ctl_fd, int ctl_fd_ack, bool *ctl_fd_close); int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack); int evlist__finalize_ctlfd(struct evlist *evlist); bool evlist__ctlfd_initialized(struct evlist *evlist); int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd); +int evlist__ctlfd_ack(struct evlist *evlist); #define EVLIST_ENABLED_MSG "Events enabled\n" #define EVLIST_DISABLED_MSG "Events disabled\n" +struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 459b51e90063..1cad6051d8b0 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -331,6 +331,110 @@ error_free: goto out; } +static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) +{ + struct evsel_config_term *pos, *tmp; + + list_for_each_entry(pos, &src->config_terms, list) { + tmp = malloc(sizeof(*tmp)); + if (tmp == NULL) + return -ENOMEM; + + *tmp = *pos; + if (tmp->free_str) { + tmp->val.str = strdup(pos->val.str); + if (tmp->val.str == NULL) { + free(tmp); + return -ENOMEM; + } + } + list_add_tail(&tmp->list, &dst->config_terms); + } + return 0; +} + +/** + * evsel__clone - create a new evsel copied from @orig + * @orig: original evsel + * + * The assumption is that @orig is not configured nor opened yet. + * So we only care about the attributes that can be set while it's parsed. + */ +struct evsel *evsel__clone(struct evsel *orig) +{ + struct evsel *evsel; + + BUG_ON(orig->core.fd); + BUG_ON(orig->counts); + BUG_ON(orig->priv); + BUG_ON(orig->per_pkg_mask); + + /* cannot handle BPF objects for now */ + if (orig->bpf_obj) + return NULL; + + evsel = evsel__new(&orig->core.attr); + if (evsel == NULL) + return NULL; + + evsel->core.cpus = perf_cpu_map__get(orig->core.cpus); + evsel->core.own_cpus = perf_cpu_map__get(orig->core.own_cpus); + evsel->core.threads = perf_thread_map__get(orig->core.threads); + evsel->core.nr_members = orig->core.nr_members; + evsel->core.system_wide = orig->core.system_wide; + + if (orig->name) { + evsel->name = strdup(orig->name); + if (evsel->name == NULL) + goto out_err; + } + if (orig->group_name) { + evsel->group_name = strdup(orig->group_name); + if (evsel->group_name == NULL) + goto out_err; + } + if (orig->pmu_name) { + evsel->pmu_name = strdup(orig->pmu_name); + if (evsel->pmu_name == NULL) + goto out_err; + } + if (orig->filter) { + evsel->filter = strdup(orig->filter); + if (evsel->filter == NULL) + goto out_err; + } + evsel->cgrp = cgroup__get(orig->cgrp); + evsel->tp_format = orig->tp_format; + evsel->handler = orig->handler; + evsel->leader = orig->leader; + + evsel->max_events = orig->max_events; + evsel->tool_event = orig->tool_event; + evsel->unit = orig->unit; + evsel->scale = orig->scale; + evsel->snapshot = orig->snapshot; + evsel->per_pkg = orig->per_pkg; + evsel->percore = orig->percore; + evsel->precise_max = orig->precise_max; + evsel->use_uncore_alias = orig->use_uncore_alias; + evsel->is_libpfm_event = orig->is_libpfm_event; + + evsel->exclude_GH = orig->exclude_GH; + evsel->sample_read = orig->sample_read; + evsel->auto_merge_stats = orig->auto_merge_stats; + evsel->collect_stat = orig->collect_stat; + evsel->weak_group = orig->weak_group; + + if (evsel__copy_config_terms(evsel, orig) < 0) + goto out_err; + + return evsel; + +out_err: + evsel__delete(evsel); + return NULL; +} + /* * Returns pointer with encoded error via interface. */ @@ -1684,6 +1788,11 @@ retry_open: FD(evsel, cpu, thread) = fd; + if (unlikely(test_attr__enabled)) { + test_attr__open(&evsel->core.attr, pid, cpus->map[cpu], + fd, group_fd, flags); + } + if (fd < 0) { err = -errno; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 35e3f6d66085..79a860d8e3ee 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -42,65 +42,79 @@ enum perf_tool_event { */ struct evsel { struct perf_evsel core; - struct evlist *evlist; - char *filter; + struct evlist *evlist; + off_t id_offset; + int idx; + int id_pos; + int is_pos; + unsigned int sample_size; + + /* + * These fields can be set in the parse-events code or similar. + * Please check evsel__clone() to copy them properly so that + * they can be released properly. + */ + struct { + char *name; + char *group_name; + const char *pmu_name; + struct tep_event *tp_format; + char *filter; + unsigned long max_events; + double scale; + const char *unit; + struct cgroup *cgrp; + enum perf_tool_event tool_event; + /* parse modifier helper */ + int exclude_GH; + int sample_read; + bool snapshot; + bool per_pkg; + bool percore; + bool precise_max; + bool use_uncore_alias; + bool is_libpfm_event; + bool auto_merge_stats; + bool collect_stat; + bool weak_group; + int bpf_fd; + struct bpf_object *bpf_obj; + }; + + /* + * metric fields are similar, but needs more care as they can have + * references to other metric (evsel). + */ + const char * metric_expr; + const char * metric_name; + struct evsel **metric_events; + struct evsel *metric_leader; + + void *handler; struct perf_counts *counts; struct perf_counts *prev_raw_counts; - int idx; - unsigned long max_events; unsigned long nr_events_printed; - char *name; - double scale; - const char *unit; - struct tep_event *tp_format; - off_t id_offset; struct perf_stat_evsel *stats; void *priv; u64 db_id; - struct cgroup *cgrp; - void *handler; - unsigned int sample_size; - int id_pos; - int is_pos; - enum perf_tool_event tool_event; bool uniquified_name; - bool snapshot; bool supported; bool needs_swap; bool disabled; bool no_aux_samples; bool immediate; bool tracking; - bool per_pkg; - bool precise_max; bool ignore_missing_thread; bool forced_leader; - bool use_uncore_alias; - bool is_libpfm_event; - /* parse modifier helper */ - int exclude_GH; - int sample_read; - unsigned long *per_pkg_mask; - struct evsel *leader; - char *group_name; bool cmdline_group_boundary; - struct list_head config_terms; - struct bpf_object *bpf_obj; - int bpf_fd; - int err; - bool auto_merge_stats; bool merged_stat; - const char * metric_expr; - const char * metric_name; - struct evsel **metric_events; - struct evsel *metric_leader; - bool collect_stat; - bool weak_group; bool reset_group; bool errored; - bool percore; + unsigned long *per_pkg_mask; + struct evsel *leader; + struct list_head config_terms; + int err; int cpu_iter; - const char *pmu_name; struct { evsel__sb_cb_t *cb; void *data; @@ -169,6 +183,7 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr) return evsel__new_idx(attr, 0); } +struct evsel *evsel__clone(struct evsel *orig); struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); /* diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h deleted file mode 100644 index f36c7e31780a..000000000000 --- a/tools/perf/util/group.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef GROUP_H -#define GROUP_H 1 - -bool arch_topdown_check_group(bool *warn); -void arch_topdown_group_warn(void); - -#endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 9cf4efdcbbbd..be850e9f8852 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2082,8 +2082,14 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, dso = machine__findnew_dso(machine, filename); if (dso != NULL) { char sbuild_id[SBUILD_ID_SIZE]; + struct build_id bid; + size_t size = BUILD_ID_SIZE; - dso__set_build_id(dso, &bev->build_id); + if (bev->header.misc & PERF_RECORD_MISC_BUILD_ID_SIZE) + size = bev->size; + + build_id__init(&bid, bev->data, size); + dso__set_build_id(dso, &bid); if (dso_space != DSO_SPACE__USER) { struct kmod_path m = { .name = NULL, }; @@ -2095,10 +2101,9 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev, free(m.name); } - build_id__sprintf(dso->build_id, sizeof(dso->build_id), - sbuild_id); - pr_debug("build id event received for %s: %s\n", - dso->long_name, sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); + pr_debug("build id event received for %s: %s [%zu]\n", + dso->long_name, sbuild_id, size); dso__put(dso); } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 0af4e81c46e2..3a0348caec7d 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1101,6 +1101,8 @@ static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, if (queue->tid == -1 || pt->have_sched_switch) { ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); + if (ptq->tid == -1) + ptq->pid = -1; thread__zput(ptq->thread); } @@ -2603,10 +2605,8 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, tid = sample->tid; } - if (tid == -1) { - pr_err("context_switch event has no tid\n"); - return -EINVAL; - } + if (tid == -1) + intel_pt_log("context_switch event has no tid\n"); ret = intel_pt_sync_switch(pt, cpu, tid, sample->time); if (ret <= 0) diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 0804308ef285..055bab7a92b3 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -374,11 +374,15 @@ static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp) if (!jd->use_arch_timestamp) return timestamp; - tc.time_shift = jd->session->time_conv.time_shift; - tc.time_mult = jd->session->time_conv.time_mult; - tc.time_zero = jd->session->time_conv.time_zero; + tc.time_shift = jd->session->time_conv.time_shift; + tc.time_mult = jd->session->time_conv.time_mult; + tc.time_zero = jd->session->time_conv.time_zero; + tc.time_cycles = jd->session->time_conv.time_cycles; + tc.time_mask = jd->session->time_conv.time_mask; + tc.cap_user_time_zero = jd->session->time_conv.cap_user_time_zero; + tc.cap_user_time_short = jd->session->time_conv.cap_user_time_short; - if (!tc.time_mult) + if (!tc.cap_user_time_zero) return 0; return tsc_to_perf_time(timestamp, &tc); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 85587de027a5..7d4194ffc5b0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -3100,3 +3100,15 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch *addrp = map->unmap_ip(map, sym->start); return sym->name; } + +int machine__for_each_dso(struct machine *machine, machine__dso_t fn, void *priv) +{ + struct dso *pos; + int err = 0; + + list_for_each_entry(pos, &machine->dsos.head, node) { + if (fn(pos, machine, priv)) + err = -1; + } + return err; +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 062c36a8433c..26368d3c1754 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -250,6 +250,10 @@ void machines__destroy_kernel_maps(struct machines *machines); size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); +typedef int (*machine__dso_t)(struct dso *dso, struct machine *machine, void *priv); + +int machine__for_each_dso(struct machine *machine, machine__dso_t fn, + void *priv); int machine__for_each_thread(struct machine *machine, int (*fn)(struct thread *thread, void *p), void *priv); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index cc0faf8f1321..f44ede437dc7 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -27,21 +27,6 @@ static void __maps__insert(struct maps *maps, struct map *map); -static inline int is_anon_memory(const char *filename, u32 flags) -{ - return flags & MAP_HUGETLB || - !strcmp(filename, "//anon") || - !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) || - !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1); -} - -static inline int is_no_dso_memory(const char *filename) -{ - return !strncmp(filename, "[stack", 6) || - !strncmp(filename, "/SYSV",5) || - !strcmp(filename, "[heap]"); -} - static inline int is_android_lib(const char *filename) { return strstarts(filename, "/data/app-lib/") || @@ -158,7 +143,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, int anon, no_dso, vdso, android; android = is_android_lib(filename); - anon = is_anon_memory(filename, flags); + anon = is_anon_memory(filename) || flags & MAP_HUGETLB; vdso = is_vdso_map(filename); no_dso = is_no_dso_memory(filename); map->prot = prot; @@ -346,9 +331,7 @@ int map__load(struct map *map) if (map->dso->has_build_id) { char sbuild_id[SBUILD_ID_SIZE]; - build_id__sprintf(map->dso->build_id, - sizeof(map->dso->build_id), - sbuild_id); + build_id__sprintf(&map->dso->bid, sbuild_id); pr_debug("%s with build id %s not found", name, sbuild_id); } else pr_debug("Failed to open %s", name); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index c2f5d28fe73a..b1c0686db1b7 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -171,4 +171,18 @@ static inline bool is_bpf_image(const char *name) return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 || strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0; } + +static inline int is_anon_memory(const char *filename) +{ + return !strcmp(filename, "//anon") || + !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) || + !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1); +} + +static inline int is_no_dso_memory(const char *filename) +{ + return !strncmp(filename, "[stack", 6) || + !strncmp(filename, "/SYSV", 5) || + !strcmp(filename, "[heap]"); +} #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index ab5030fcfed4..060454a17293 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -15,7 +15,6 @@ #include "rblist.h" #include #include -#include "pmu-events/pmu-events.h" #include "strlist.h" #include #include @@ -25,6 +24,7 @@ #include #include "util.h" #include +#include "cgroup.h" struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, @@ -150,8 +150,20 @@ static void expr_ids__exit(struct expr_ids *ids) free(ids->id[i].id); } +static bool contains_event(struct evsel **metric_events, int num_events, + const char *event_name) +{ + int i; + + for (i = 0; i < num_events; i++) { + if (!strcmp(metric_events[i]->name, event_name)) + return true; + } + return false; +} + /** - * Find a group of events in perf_evlist that correpond to those from a parsed + * Find a group of events in perf_evlist that correspond to those from a parsed * metric expression. Note, as find_evsel_group is called in the same order as * perf_evlist was constructed, metric_no_merge doesn't need to test for * underfilling a group. @@ -180,7 +192,11 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, int i = 0, matched_events = 0, events_to_match; const int idnum = (int)hashmap__size(&pctx->ids); - /* duration_time is grouped separately. */ + /* + * duration_time is always grouped separately, when events are grouped + * (ie has_constraint is false) then ignore it in the matching loop and + * add it to metric_events at the end. + */ if (!has_constraint && hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr)) events_to_match = idnum - 1; @@ -207,23 +223,20 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, sizeof(struct evsel *) * idnum); current_leader = ev->leader; } - if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) { - if (has_constraint) { - /* - * Events aren't grouped, ensure the same event - * isn't matched from two groups. - */ - for (i = 0; i < matched_events; i++) { - if (!strcmp(ev->name, - metric_events[i]->name)) { - break; - } - } - if (i != matched_events) - continue; - } + /* + * Check for duplicate events with the same name. For example, + * uncore_imc/cas_count_read/ will turn into 6 events per socket + * on skylakex. Only the first such event is placed in + * metric_events. If events aren't grouped then this also + * ensures that the same event in different sibling groups + * aren't both added to metric_events. + */ + if (contains_event(metric_events, matched_events, ev->name)) + continue; + /* Does this event belong to the parse context? */ + if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) metric_events[matched_events++] = ev; - } + if (matched_events == events_to_match) break; } @@ -239,7 +252,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, } if (matched_events != idnum) { - /* Not whole match */ + /* Not a whole match */ return NULL; } @@ -247,8 +260,32 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist, for (i = 0; i < idnum; i++) { ev = metric_events[i]; - ev->metric_leader = ev; + /* Don't free the used events. */ set_bit(ev->idx, evlist_used); + /* + * The metric leader points to the identically named event in + * metric_events. + */ + ev->metric_leader = ev; + /* + * Mark two events with identical names in the same group (or + * globally) as being in use as uncore events may be duplicated + * for each pmu. Set the metric leader of such events to be the + * event that appears in metric_events. + */ + evlist__for_each_entry_continue(perf_evlist, ev) { + /* + * If events are grouped then the search can terminate + * when then group is left. + */ + if (!has_constraint && + ev->leader != metric_events[i]->leader) + break; + if (!strcmp(metric_events[i]->name, ev->name)) { + set_bit(ev->idx, evlist_used); + ev->metric_leader = metric_events[i]; + } + } } return metric_events[0]; @@ -540,10 +577,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, } } - if (metricgroups && !raw) - printf("\nMetric Groups:\n\n"); - else if (metrics && !raw) - printf("\nMetrics:\n\n"); + if (!filter || !rblist__empty(&groups)) { + if (metricgroups && !raw) + printf("\nMetric Groups:\n\n"); + else if (metrics && !raw) + printf("\nMetrics:\n\n"); + } for (node = rb_first_cached(&groups.entries); node; node = next) { struct mep *me = container_of(node, struct mep, nd); @@ -639,7 +678,7 @@ static bool metricgroup__has_constraint(struct pmu_event *pe) return false; } -int __weak arch_get_runtimeparam(void) +int __weak arch_get_runtimeparam(struct pmu_event *pe __maybe_unused) { return 1; } @@ -910,7 +949,7 @@ static int add_metric(struct list_head *metric_list, } else { int j, count; - count = arch_get_runtimeparam(); + count = arch_get_runtimeparam(pe); /* This loop is added to create multiple * events depend on count value and add @@ -1119,3 +1158,87 @@ bool metricgroup__has_metric(const char *metric) } return false; } + +int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, + struct rblist *new_metric_events, + struct rblist *old_metric_events) +{ + unsigned i; + + for (i = 0; i < rblist__nr_entries(old_metric_events); i++) { + struct rb_node *nd; + struct metric_event *old_me, *new_me; + struct metric_expr *old_expr, *new_expr; + struct evsel *evsel; + size_t alloc_size; + int idx, nr; + + nd = rblist__entry(old_metric_events, i); + old_me = container_of(nd, struct metric_event, nd); + + evsel = evlist__find_evsel(evlist, old_me->evsel->idx); + if (!evsel) + return -EINVAL; + new_me = metricgroup__lookup(new_metric_events, evsel, true); + if (!new_me) + return -ENOMEM; + + pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n", + cgrp ? cgrp->name : "root", evsel->name, evsel->idx); + + list_for_each_entry(old_expr, &old_me->head, nd) { + new_expr = malloc(sizeof(*new_expr)); + if (!new_expr) + return -ENOMEM; + + new_expr->metric_expr = old_expr->metric_expr; + new_expr->metric_name = old_expr->metric_name; + new_expr->metric_unit = old_expr->metric_unit; + new_expr->runtime = old_expr->runtime; + + if (old_expr->metric_refs) { + /* calculate number of metric_events */ + for (nr = 0; old_expr->metric_refs[nr].metric_name; nr++) + continue; + alloc_size = sizeof(*new_expr->metric_refs); + new_expr->metric_refs = calloc(nr + 1, alloc_size); + if (!new_expr->metric_refs) { + free(new_expr); + return -ENOMEM; + } + + memcpy(new_expr->metric_refs, old_expr->metric_refs, + nr * alloc_size); + } else { + new_expr->metric_refs = NULL; + } + + /* calculate number of metric_events */ + for (nr = 0; old_expr->metric_events[nr]; nr++) + continue; + alloc_size = sizeof(*new_expr->metric_events); + new_expr->metric_events = calloc(nr + 1, alloc_size); + if (!new_expr->metric_events) { + free(new_expr->metric_refs); + free(new_expr); + return -ENOMEM; + } + + /* copy evsel in the same position */ + for (idx = 0; idx < nr; idx++) { + evsel = old_expr->metric_events[idx]; + evsel = evlist__find_evsel(evlist, evsel->idx); + if (evsel == NULL) { + free(new_expr->metric_events); + free(new_expr->metric_refs); + free(new_expr); + return -EINVAL; + } + new_expr->metric_events[idx] = evsel; + } + + list_add(&new_expr->nd, &new_me->head); + } + } + return 0; +} diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 62623a39cbec..ed1b9392e624 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -5,12 +5,15 @@ #include #include #include +#include "pmu-events/pmu-events.h" +struct evlist; struct evsel; struct evlist; struct option; struct rblist; struct pmu_events_map; +struct cgroup; struct metric_event { struct rb_node nd; @@ -52,6 +55,10 @@ int metricgroup__parse_groups_test(struct evlist *evlist, void metricgroup__print(bool metrics, bool groups, char *filter, bool raw, bool details); bool metricgroup__has_metric(const char *metric); -int arch_get_runtimeparam(void); +int arch_get_runtimeparam(struct pmu_event *pe __maybe_unused); void metricgroup__rblist_exit(struct rblist *metric_events); + +int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, + struct rblist *new_metric_events, + struct rblist *old_metric_events); #endif diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 667cbca1547a..3b273580fb84 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -353,18 +353,20 @@ __add_event(struct list_head *list, int *idx, const char *cpu_list) { struct evsel *evsel; - struct perf_cpu_map *cpus = pmu ? pmu->cpus : + struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) : cpu_list ? perf_cpu_map__new(cpu_list) : NULL; if (init_attr) event_attr_init(attr); evsel = evsel__new_idx(attr, *idx); - if (!evsel) + if (!evsel) { + perf_cpu_map__put(cpus); return NULL; + } (*idx)++; - evsel->core.cpus = perf_cpu_map__get(cpus); + evsel->core.cpus = cpus; evsel->core.own_cpus = perf_cpu_map__get(cpus); evsel->core.system_wide = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; @@ -940,12 +942,12 @@ do { \ } int parse_events_add_breakpoint(struct list_head *list, int *idx, - void *ptr, char *type, u64 len) + u64 addr, char *type, u64 len) { struct perf_event_attr attr; memset(&attr, 0, sizeof(attr)); - attr.bp_addr = (unsigned long) ptr; + attr.bp_addr = addr; if (parse_breakpoint_type(type, &attr)) return -EINVAL; @@ -1773,6 +1775,7 @@ struct event_modifier { int sample_read; int pinned; int weak; + int exclusive; }; static int get_event_modifier(struct event_modifier *mod, char *str, @@ -1788,6 +1791,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int precise_max = 0; int sample_read = 0; int pinned = evsel ? evsel->core.attr.pinned : 0; + int exclusive = evsel ? evsel->core.attr.exclusive : 0; int exclude = eu | ek | eh; int exclude_GH = evsel ? evsel->exclude_GH : 0; @@ -1831,6 +1835,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, sample_read = 1; } else if (*str == 'D') { pinned = 1; + } else if (*str == 'e') { + exclusive = 1; } else if (*str == 'W') { weak = 1; } else @@ -1864,6 +1870,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->sample_read = sample_read; mod->pinned = pinned; mod->weak = weak; + mod->exclusive = exclusive; return 0; } @@ -1877,7 +1884,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1)) + if (strlen(str) > (sizeof("ukhGHpppPSDIWe") - 1)) return -1; while (*p) { @@ -1919,8 +1926,10 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->precise_max = mod.precise_max; evsel->weak_group = mod.weak; - if (evsel__is_group_leader(evsel)) + if (evsel__is_group_leader(evsel)) { evsel->core.attr.pinned = mod.pinned; + evsel->core.attr.exclusive = mod.exclusive; + } } return 0; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 00cde7d2e30c..e80c9b74f2f2 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -190,7 +190,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, struct parse_events_error *error, struct list_head *head_config); int parse_events_add_breakpoint(struct list_head *list, int *idx, - void *ptr, char *type, u64 len); + u64 addr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 3ca5fd2829ca..9db5097317f4 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -210,7 +210,7 @@ name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpPGHSDIW]+ +modifier_event [ukhpPGHSDIWe]+ modifier_bp [rwx]{1,3} %% diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 645bf4f1859f..d5b6aff82f21 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -511,7 +511,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc list = alloc_list(); ABORT_ON(!list); err = parse_events_add_breakpoint(list, &parse_state->idx, - (void *)(uintptr_t) $2, $6, $4); + $2, $6, $4); free($6); if (err) { free(list); @@ -528,7 +528,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc list = alloc_list(); ABORT_ON(!list); if (parse_events_add_breakpoint(list, &parse_state->idx, - (void *)(uintptr_t) $2, NULL, $4)) { + $2, NULL, $4)) { free(list); YYABORT; } @@ -544,7 +544,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc list = alloc_list(); ABORT_ON(!list); err = parse_events_add_breakpoint(list, &parse_state->idx, - (void *)(uintptr_t) $2, $4, 0); + $2, $4, 0); free($4); if (err) { free(list); @@ -561,7 +561,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc list = alloc_list(); ABORT_ON(!list); if (parse_events_add_breakpoint(list, &parse_state->idx, - (void *)(uintptr_t) $2, NULL, 0)) { + $2, NULL, 0)) { free(list); YYABORT; } diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c index 599a1543871d..13fdc51c61d9 100644 --- a/tools/perf/util/print_binary.c +++ b/tools/perf/util/print_binary.c @@ -50,7 +50,7 @@ int is_printable_array(char *p, unsigned int len) len--; - for (i = 0; i < len; i++) { + for (i = 0; i < len && p[i]; i++) { if (!isprint(p[i]) && !isspace(p[i])) return 0; } diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 99d36ac77c08..8eae2afff71a 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -43,6 +43,10 @@ #include #include +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include +#endif + #define PERFPROBE_GROUP "probe" bool probe_event_dry_run; /* Dry run flag */ @@ -129,9 +133,10 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, struct map *map; /* ref_reloc_sym is just a label. Need a special fix*/ - reloc_sym = kernel_get_ref_reloc_sym(NULL); + reloc_sym = kernel_get_ref_reloc_sym(&map); if (reloc_sym && strcmp(name, reloc_sym->name) == 0) - *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; + *addr = (!map->reloc || reloc) ? reloc_sym->addr : + reloc_sym->unrelocated_addr; else { sym = machine__find_kernel_symbol_by_name(host_machine, name, &map); if (!sym) @@ -337,6 +342,8 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) map = machine__kernel_map(host_machine); dso = map->dso; + if (!dso->has_build_id) + dso__read_running_kernel_build_id(dso, host_machine); vmlinux_name = symbol_conf.vmlinux_name; dso->load_errno = 0; @@ -452,6 +459,49 @@ static int get_alternative_line_range(struct debuginfo *dinfo, return ret; } +#ifdef HAVE_DEBUGINFOD_SUPPORT +static struct debuginfo *open_from_debuginfod(struct dso *dso, struct nsinfo *nsi, + bool silent) +{ + debuginfod_client *c = debuginfod_begin(); + char sbuild_id[SBUILD_ID_SIZE + 1]; + struct debuginfo *ret = NULL; + struct nscookie nsc; + char *path; + int fd; + + if (!c) + return NULL; + + build_id__sprintf(&dso->bid, sbuild_id); + fd = debuginfod_find_debuginfo(c, (const unsigned char *)sbuild_id, + 0, &path); + if (fd >= 0) + close(fd); + debuginfod_end(c); + if (fd < 0) { + if (!silent) + pr_debug("Failed to find debuginfo in debuginfod.\n"); + return NULL; + } + if (!silent) + pr_debug("Load debuginfo from debuginfod (%s)\n", path); + + nsinfo__mountns_enter(nsi, &nsc); + ret = debuginfo__new((const char *)path); + nsinfo__mountns_exit(&nsc); + return ret; +} +#else +static inline +struct debuginfo *open_from_debuginfod(struct dso *dso __maybe_unused, + struct nsinfo *nsi __maybe_unused, + bool silent __maybe_unused) +{ + return NULL; +} +#endif + /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi, bool silent) @@ -471,6 +521,10 @@ static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi, strcpy(reason, "(unknown)"); } else dso__strerror_load(dso, reason, STRERR_BUFSIZE); + if (dso) + ret = open_from_debuginfod(dso, nsi, silent); + if (ret) + return ret; if (!silent) { if (module) pr_err("Module %s is not loaded, please specify its full path name.\n", module); @@ -795,7 +849,8 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs, free(tevs[i].point.symbol); tevs[i].point.symbol = tmp; tevs[i].point.offset = tevs[i].point.address - - reloc_sym->unrelocated_addr; + (map->reloc ? reloc_sym->unrelocated_addr : + reloc_sym->addr); } return skipped; } @@ -950,6 +1005,7 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) static int __show_line_range(struct line_range *lr, const char *module, bool user) { + struct build_id bid; int l = 1; struct int_node *ln; struct debuginfo *dinfo; @@ -957,6 +1013,7 @@ static int __show_line_range(struct line_range *lr, const char *module, int ret; char *tmp; char sbuf[STRERR_BUFSIZE]; + char sbuild_id[SBUILD_ID_SIZE] = ""; /* Search a line range */ dinfo = open_debuginfo(module, NULL, false); @@ -969,6 +1026,10 @@ static int __show_line_range(struct line_range *lr, const char *module, if (!ret) ret = debuginfo__find_line_range(dinfo, lr); } + if (dinfo->build_id) { + build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE); + build_id__sprintf(&bid, sbuild_id); + } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { pr_warning("Specified source line is not found.\n"); @@ -980,7 +1041,7 @@ static int __show_line_range(struct line_range *lr, const char *module, /* Convert source file path */ tmp = lr->path; - ret = get_real_path(tmp, lr->comp_dir, &lr->path); + ret = find_source_path(tmp, sbuild_id, lr->comp_dir, &lr->path); /* Free old path when new path is assigned */ if (tmp != lr->path) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 659024342e9a..2c4061035f77 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -31,6 +31,10 @@ #include "probe-file.h" #include "string2.h" +#ifdef HAVE_DEBUGINFOD_SUPPORT +#include +#endif + /* Kprobe tracer basic type is up to u64 */ #define MAX_BASIC_TYPE_BITS 64 @@ -51,6 +55,7 @@ static const Dwfl_Callbacks offline_callbacks = { static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, const char *path) { + GElf_Addr dummy; int fd; fd = open(path, O_RDONLY); @@ -70,6 +75,8 @@ static int debuginfo__init_offline_dwarf(struct debuginfo *dbg, if (!dbg->dbg) goto error; + dwfl_module_build_id(dbg->mod, &dbg->build_id, &dummy); + dwfl_report_end(dbg->dwfl, NULL, NULL); return 0; @@ -942,6 +949,8 @@ static int probe_point_lazy_walker(const char *fname, int lineno, /* Find probe points from lazy pattern */ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) { + struct build_id bid; + char sbuild_id[SBUILD_ID_SIZE] = ""; int ret = 0; char *fpath; @@ -949,7 +958,11 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) const char *comp_dir; comp_dir = cu_get_comp_dir(&pf->cu_die); - ret = get_real_path(pf->fname, comp_dir, &fpath); + if (pf->dbg->build_id) { + build_id__init(&bid, pf->dbg->build_id, BUILD_ID_SIZE); + build_id__sprintf(&bid, sbuild_id); + } + ret = find_source_path(pf->fname, sbuild_id, comp_dir, &fpath); if (ret < 0) { pr_warning("Failed to find source file path.\n"); return ret; @@ -1448,7 +1461,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, struct probe_trace_event **tevs) { struct trace_event_finder tf = { - .pf = {.pev = pev, .callback = add_probe_trace_event}, + .pf = {.pev = pev, .dbg = dbg, .callback = add_probe_trace_event}, .max_tevs = probe_conf.max_probes, .mod = dbg->mod}; int ret, i; @@ -1618,7 +1631,7 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct variable_list **vls) { struct available_var_finder af = { - .pf = {.pev = pev, .callback = add_available_vars}, + .pf = {.pev = pev, .dbg = dbg, .callback = add_available_vars}, .mod = dbg->mod, .max_vls = probe_conf.max_probes}; int ret; @@ -1973,17 +1986,57 @@ found: return (ret < 0) ? ret : lf.found; } +#ifdef HAVE_DEBUGINFOD_SUPPORT +/* debuginfod doesn't require the comp_dir but buildid is required */ +static int get_source_from_debuginfod(const char *raw_path, + const char *sbuild_id, char **new_path) +{ + debuginfod_client *c = debuginfod_begin(); + const char *p = raw_path; + int fd; + + if (!c) + return -ENOMEM; + + fd = debuginfod_find_source(c, (const unsigned char *)sbuild_id, + 0, p, new_path); + pr_debug("Search %s from debuginfod -> %d\n", p, fd); + if (fd >= 0) + close(fd); + debuginfod_end(c); + if (fd < 0) { + pr_debug("Failed to find %s in debuginfod (%s)\n", + raw_path, sbuild_id); + return -ENOENT; + } + pr_debug("Got a source %s\n", *new_path); + + return 0; +} +#else +static inline int get_source_from_debuginfod(const char *raw_path __maybe_unused, + const char *sbuild_id __maybe_unused, + char **new_path __maybe_unused) +{ + return -ENOTSUP; +} +#endif /* * Find a src file from a DWARF tag path. Prepend optional source path prefix * and chop off leading directories that do not exist. Result is passed back as * a newly allocated path on success. * Return 0 if file was found and readable, -errno otherwise. */ -int get_real_path(const char *raw_path, const char *comp_dir, - char **new_path) +int find_source_path(const char *raw_path, const char *sbuild_id, + const char *comp_dir, char **new_path) { const char *prefix = symbol_conf.source_prefix; + if (sbuild_id && !prefix) { + if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path)) + return 0; + } + if (!prefix) { if (raw_path[0] != '/' && comp_dir) /* If not an absolute path, try to use comp_dir */ diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 11be10080613..2febb5875678 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -4,6 +4,7 @@ #include #include "intlist.h" +#include "build-id.h" #include "probe-event.h" #include @@ -32,6 +33,7 @@ struct debuginfo { Dwfl_Module *mod; Dwfl *dwfl; Dwarf_Addr bias; + const unsigned char *build_id; }; /* This also tries to open distro debuginfo */ @@ -59,11 +61,12 @@ int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct variable_list **vls); /* Find a src file from a DWARF tag path */ -int get_real_path(const char *raw_path, const char *comp_dir, - char **new_path); +int find_source_path(const char *raw_path, const char *sbuild_id, + const char *comp_dir, char **new_path); struct probe_finder { struct perf_probe_event *pev; /* Target probe event */ + struct debuginfo *dbg; /* Callback when a probe point is found */ int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 75a9b1d62bba..ae8edde7c50e 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -15,9 +15,11 @@ #include "thread_map.h" #include "trace-event.h" #include "mmap.h" +#include "stat.h" +#include "metricgroup.h" #include "util/env.h" #include -#include "../perf-sys.h" +#include "util.h" #if PY_MAJOR_VERSION < 3 #define _PyUnicode_FromString(arg) \ @@ -60,6 +62,23 @@ int parse_callchain_record(const char *arg __maybe_unused, */ struct perf_env perf_env; +/* + * Add this one here not to drag util/stat-shadow.c + */ +void perf_stat__collect_metric_expr(struct evlist *evsel_list) +{ +} + +/* + * Add this one here not to drag util/metricgroup.c + */ +int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, + struct rblist *new_metric_events, + struct rblist *old_metric_events) +{ + return 0; +} + /* * Support debug printing even though util/debug.c is not linked. That means * implementing 'verbose' and 'eprintf'. diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index ea9aa1d7cf50..07e4b96a6625 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -14,6 +14,7 @@ #include "util/perf_api_probe.h" #include "record.h" #include "../perf-sys.h" +#include "topdown.h" /* * evsel__config_leader_sampling() uses special rules for leader sampling. @@ -24,7 +25,7 @@ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evl { struct evsel *leader = evsel->leader; - if (evsel__is_aux_event(leader)) { + if (evsel__is_aux_event(leader) || arch_topdown_sample_read(leader)) { evlist__for_each_entry(evlist, evsel) { if (evsel->leader == leader && evsel != evsel->leader) return evsel; diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index 03678ff25539..266760ac9143 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -73,6 +73,7 @@ struct record_opts { unsigned int nr_threads_synthesize; int ctl_fd; int ctl_fd_ack; + bool ctl_fd_close; }; extern const char * const *record_usage; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 739516fdf6e3..7cbd024e3e63 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1064,7 +1064,7 @@ static int python_export_dso(struct db_export *dbe, struct dso *dso, char sbuild_id[SBUILD_ID_SIZE]; PyObject *t; - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); t = tuple_new(5); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 493ec372fdec..4b57c0c07632 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -946,7 +946,6 @@ static void print_metric_headers(struct perf_stat_config *config, out.print_metric = print_metric_header; out.new_line = new_line_metric; out.force_header = true; - os.evsel = counter; perf_stat__print_shadow_stats(config, counter, 0, 0, &out, diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 924b54d15d54..901265127e36 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -241,6 +241,18 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count, else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu, count); + else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING)) + update_runtime_stat(st, STAT_TOPDOWN_RETIRING, + ctx, cpu, count); + else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC)) + update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC, + ctx, cpu, count); + else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND)) + update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND, + ctx, cpu, count); + else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND)) + update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND, + ctx, cpu, count); else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, ctx, cpu, count); @@ -705,6 +717,47 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) return sanitize_val(1.0 - sum); } +/* + * Kernel reports metrics multiplied with slots. To get back + * the ratios we need to recreate the sum. + */ + +static double td_metric_ratio(int ctx, int cpu, + enum stat_type type, + struct runtime_stat *stat) +{ + double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) + + runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) + + runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) + + runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu); + double d = runtime_stat_avg(stat, type, ctx, cpu); + + if (sum) + return d / sum; + return 0; +} + +/* + * ... but only if most of the values are actually available. + * We allow two missing. + */ + +static bool full_td(int ctx, int cpu, + struct runtime_stat *stat) +{ + int c = 0; + + if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0) + c++; + if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0) + c++; + if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0) + c++; + if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0) + c++; + return c >= 2; +} + static void print_smi_cost(struct perf_stat_config *config, int cpu, struct evsel *evsel, struct perf_stat_output_ctx *out, @@ -1073,6 +1126,42 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, be_bound * 100.); else print_metric(config, ctxp, NULL, NULL, name, 0); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) && + full_td(ctx, cpu, st)) { + double retiring = td_metric_ratio(ctx, cpu, + STAT_TOPDOWN_RETIRING, st); + + if (retiring > 0.7) + color = PERF_COLOR_GREEN; + print_metric(config, ctxp, color, "%8.1f%%", "retiring", + retiring * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) && + full_td(ctx, cpu, st)) { + double fe_bound = td_metric_ratio(ctx, cpu, + STAT_TOPDOWN_FE_BOUND, st); + + if (fe_bound > 0.2) + color = PERF_COLOR_RED; + print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", + fe_bound * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) && + full_td(ctx, cpu, st)) { + double be_bound = td_metric_ratio(ctx, cpu, + STAT_TOPDOWN_BE_BOUND, st); + + if (be_bound > 0.2) + color = PERF_COLOR_RED; + print_metric(config, ctxp, color, "%8.1f%%", "backend bound", + be_bound * 100.); + } else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) && + full_td(ctx, cpu, st)) { + double bad_spec = td_metric_ratio(ctx, cpu, + STAT_TOPDOWN_BAD_SPEC, st); + + if (bad_spec > 0.1) + color = PERF_COLOR_RED; + print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", + bad_spec * 100.); } else if (evsel->metric_expr) { generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL, evsel->name, evsel->metric_name, NULL, 1, cpu, out, st); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index cdb154381a87..bd0decd6d753 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -95,6 +95,10 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), + ID(TOPDOWN_RETIRING, topdown-retiring), + ID(TOPDOWN_BAD_SPEC, topdown-bad-spec), + ID(TOPDOWN_FE_BOUND, topdown-fe-bound), + ID(TOPDOWN_BE_BOUND, topdown-be-bound), ID(SMI_NUM, msr/smi/), ID(APERF, msr/aperf/), }; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index aa3bed48511b..487010c624be 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -28,6 +28,10 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, + PERF_STAT_EVSEL_ID__TOPDOWN_RETIRING, + PERF_STAT_EVSEL_ID__TOPDOWN_BAD_SPEC, + PERF_STAT_EVSEL_ID__TOPDOWN_FE_BOUND, + PERF_STAT_EVSEL_ID__TOPDOWN_BE_BOUND, PERF_STAT_EVSEL_ID__SMI_NUM, PERF_STAT_EVSEL_ID__APERF, PERF_STAT_EVSEL_ID__MAX, @@ -82,6 +86,10 @@ enum stat_type { STAT_TOPDOWN_SLOTS_RETIRED, STAT_TOPDOWN_FETCH_BUBBLES, STAT_TOPDOWN_RECOVERY_BUBBLES, + STAT_TOPDOWN_RETIRING, + STAT_TOPDOWN_BAD_SPEC, + STAT_TOPDOWN_FE_BOUND, + STAT_TOPDOWN_BE_BOUND, STAT_SMI_NUM, STAT_APERF, STAT_MAX @@ -136,6 +144,8 @@ struct perf_stat_config { struct rblist metric_events; int ctl_fd; int ctl_fd_ack; + bool ctl_fd_close; + const char *cgroup_list; }; void perf_stat__set_big_num(int set); diff --git a/tools/perf/util/stream.c b/tools/perf/util/stream.c new file mode 100644 index 000000000000..4bd5e5a00aa5 --- /dev/null +++ b/tools/perf/util/stream.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Compare and figure out the top N hottest streams + * Copyright (c) 2020, Intel Corporation. + * Author: Jin Yao + */ + +#include +#include +#include +#include "debug.h" +#include "hist.h" +#include "sort.h" +#include "stream.h" +#include "evlist.h" + +static void evsel_streams__delete(struct evsel_streams *es, int nr_evsel) +{ + for (int i = 0; i < nr_evsel; i++) + zfree(&es[i].streams); + + free(es); +} + +void evlist_streams__delete(struct evlist_streams *els) +{ + evsel_streams__delete(els->ev_streams, els->nr_evsel); + free(els); +} + +static struct evlist_streams *evlist_streams__new(int nr_evsel, + int nr_streams_max) +{ + struct evlist_streams *els; + struct evsel_streams *es; + + els = zalloc(sizeof(*els)); + if (!els) + return NULL; + + es = calloc(nr_evsel, sizeof(struct evsel_streams)); + if (!es) { + free(els); + return NULL; + } + + for (int i = 0; i < nr_evsel; i++) { + struct evsel_streams *s = &es[i]; + + s->streams = calloc(nr_streams_max, sizeof(struct stream)); + if (!s->streams) + goto err; + + s->nr_streams_max = nr_streams_max; + s->evsel_idx = -1; + } + + els->ev_streams = es; + els->nr_evsel = nr_evsel; + return els; + +err: + evsel_streams__delete(es, nr_evsel); + return NULL; +} + +/* + * The cnodes with high hit number are hot callchains. + */ +static void evsel_streams__set_hot_cnode(struct evsel_streams *es, + struct callchain_node *cnode) +{ + int i, idx = 0; + u64 hit; + + if (es->nr_streams < es->nr_streams_max) { + i = es->nr_streams; + es->streams[i].cnode = cnode; + es->nr_streams++; + return; + } + + /* + * Considering a few number of hot streams, only use simple + * way to find the cnode with smallest hit number and replace. + */ + hit = (es->streams[0].cnode)->hit; + for (i = 1; i < es->nr_streams; i++) { + if ((es->streams[i].cnode)->hit < hit) { + hit = (es->streams[i].cnode)->hit; + idx = i; + } + } + + if (cnode->hit > hit) + es->streams[idx].cnode = cnode; +} + +static void update_hot_callchain(struct hist_entry *he, + struct evsel_streams *es) +{ + struct rb_root *root = &he->sorted_chain; + struct rb_node *rb_node = rb_first(root); + struct callchain_node *cnode; + + while (rb_node) { + cnode = rb_entry(rb_node, struct callchain_node, rb_node); + evsel_streams__set_hot_cnode(es, cnode); + rb_node = rb_next(rb_node); + } +} + +static void init_hot_callchain(struct hists *hists, struct evsel_streams *es) +{ + struct rb_node *next = rb_first_cached(&hists->entries); + + while (next) { + struct hist_entry *he; + + he = rb_entry(next, struct hist_entry, rb_node); + update_hot_callchain(he, es); + next = rb_next(&he->rb_node); + } + + es->streams_hits = callchain_total_hits(hists); +} + +static int evlist__init_callchain_streams(struct evlist *evlist, + struct evlist_streams *els) +{ + struct evsel_streams *es = els->ev_streams; + struct evsel *pos; + int i = 0; + + BUG_ON(els->nr_evsel < evlist->core.nr_entries); + + evlist__for_each_entry(evlist, pos) { + struct hists *hists = evsel__hists(pos); + + hists__output_resort(hists, NULL); + init_hot_callchain(hists, &es[i]); + es[i].evsel_idx = pos->idx; + i++; + } + + return 0; +} + +struct evlist_streams *evlist__create_streams(struct evlist *evlist, + int nr_streams_max) +{ + int nr_evsel = evlist->core.nr_entries, ret = -1; + struct evlist_streams *els = evlist_streams__new(nr_evsel, + nr_streams_max); + + if (!els) + return NULL; + + ret = evlist__init_callchain_streams(evlist, els); + if (ret) { + evlist_streams__delete(els); + return NULL; + } + + return els; +} + +struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, + int evsel_idx) +{ + struct evsel_streams *es = els->ev_streams; + + for (int i = 0; i < els->nr_evsel; i++) { + if (es[i].evsel_idx == evsel_idx) + return &es[i]; + } + + return NULL; +} + +static struct stream *stream__callchain_match(struct stream *base_stream, + struct evsel_streams *es_pair) +{ + for (int i = 0; i < es_pair->nr_streams; i++) { + struct stream *pair_stream = &es_pair->streams[i]; + + if (callchain_cnode_matched(base_stream->cnode, + pair_stream->cnode)) { + return pair_stream; + } + } + + return NULL; +} + +static struct stream *stream__match(struct stream *base_stream, + struct evsel_streams *es_pair) +{ + return stream__callchain_match(base_stream, es_pair); +} + +static void stream__link(struct stream *base_stream, struct stream *pair_stream) +{ + base_stream->pair_cnode = pair_stream->cnode; + pair_stream->pair_cnode = base_stream->cnode; +} + +void evsel_streams__match(struct evsel_streams *es_base, + struct evsel_streams *es_pair) +{ + for (int i = 0; i < es_base->nr_streams; i++) { + struct stream *base_stream = &es_base->streams[i]; + struct stream *pair_stream; + + pair_stream = stream__match(base_stream, es_pair); + if (pair_stream) + stream__link(base_stream, pair_stream); + } +} + +static void print_callchain_pair(struct stream *base_stream, int idx, + struct evsel_streams *es_base, + struct evsel_streams *es_pair) +{ + struct callchain_node *base_cnode = base_stream->cnode; + struct callchain_node *pair_cnode = base_stream->pair_cnode; + struct callchain_list *base_chain, *pair_chain; + char buf1[512], buf2[512], cbuf1[256], cbuf2[256]; + char *s1, *s2; + double pct; + + printf("\nhot chain pair %d:\n", idx); + + pct = (double)base_cnode->hit / (double)es_base->streams_hits; + scnprintf(buf1, sizeof(buf1), "cycles: %ld, hits: %.2f%%", + callchain_avg_cycles(base_cnode), pct * 100.0); + + pct = (double)pair_cnode->hit / (double)es_pair->streams_hits; + scnprintf(buf2, sizeof(buf2), "cycles: %ld, hits: %.2f%%", + callchain_avg_cycles(pair_cnode), pct * 100.0); + + printf("%35s\t%35s\n", buf1, buf2); + + printf("%35s\t%35s\n", + "---------------------------", + "--------------------------"); + + pair_chain = list_first_entry(&pair_cnode->val, + struct callchain_list, + list); + + list_for_each_entry(base_chain, &base_cnode->val, list) { + if (&pair_chain->list == &pair_cnode->val) + return; + + s1 = callchain_list__sym_name(base_chain, cbuf1, sizeof(cbuf1), + false); + s2 = callchain_list__sym_name(pair_chain, cbuf2, sizeof(cbuf2), + false); + + scnprintf(buf1, sizeof(buf1), "%35s\t%35s", s1, s2); + printf("%s\n", buf1); + pair_chain = list_next_entry(pair_chain, list); + } +} + +static void print_stream_callchain(struct stream *stream, int idx, + struct evsel_streams *es, bool pair) +{ + struct callchain_node *cnode = stream->cnode; + struct callchain_list *chain; + char buf[512], cbuf[256], *s; + double pct; + + printf("\nhot chain %d:\n", idx); + + pct = (double)cnode->hit / (double)es->streams_hits; + scnprintf(buf, sizeof(buf), "cycles: %ld, hits: %.2f%%", + callchain_avg_cycles(cnode), pct * 100.0); + + if (pair) { + printf("%35s\t%35s\n", "", buf); + printf("%35s\t%35s\n", + "", "--------------------------"); + } else { + printf("%35s\n", buf); + printf("%35s\n", "--------------------------"); + } + + list_for_each_entry(chain, &cnode->val, list) { + s = callchain_list__sym_name(chain, cbuf, sizeof(cbuf), false); + + if (pair) + scnprintf(buf, sizeof(buf), "%35s\t%35s", "", s); + else + scnprintf(buf, sizeof(buf), "%35s", s); + + printf("%s\n", buf); + } +} + +static void callchain_streams_report(struct evsel_streams *es_base, + struct evsel_streams *es_pair) +{ + struct stream *base_stream; + int i, idx = 0; + + printf("[ Matched hot streams ]\n"); + for (i = 0; i < es_base->nr_streams; i++) { + base_stream = &es_base->streams[i]; + if (base_stream->pair_cnode) { + print_callchain_pair(base_stream, ++idx, + es_base, es_pair); + } + } + + idx = 0; + printf("\n[ Hot streams in old perf data only ]\n"); + for (i = 0; i < es_base->nr_streams; i++) { + base_stream = &es_base->streams[i]; + if (!base_stream->pair_cnode) { + print_stream_callchain(base_stream, ++idx, + es_base, false); + } + } + + idx = 0; + printf("\n[ Hot streams in new perf data only ]\n"); + for (i = 0; i < es_pair->nr_streams; i++) { + base_stream = &es_pair->streams[i]; + if (!base_stream->pair_cnode) { + print_stream_callchain(base_stream, ++idx, + es_pair, true); + } + } +} + +void evsel_streams__report(struct evsel_streams *es_base, + struct evsel_streams *es_pair) +{ + return callchain_streams_report(es_base, es_pair); +} diff --git a/tools/perf/util/stream.h b/tools/perf/util/stream.h new file mode 100644 index 000000000000..bee768874fea --- /dev/null +++ b/tools/perf/util/stream.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_STREAM_H +#define __PERF_STREAM_H + +#include "callchain.h" + +struct stream { + struct callchain_node *cnode; + struct callchain_node *pair_cnode; +}; + +struct evsel_streams { + struct stream *streams; + int nr_streams_max; + int nr_streams; + int evsel_idx; + u64 streams_hits; +}; + +struct evlist_streams { + struct evsel_streams *ev_streams; + int nr_evsel; +}; + +struct evlist; + +void evlist_streams__delete(struct evlist_streams *els); + +struct evlist_streams *evlist__create_streams(struct evlist *evlist, + int nr_streams_max); + +struct evsel_streams *evsel_streams__entry(struct evlist_streams *els, + int evsel_idx); + +void evsel_streams__match(struct evsel_streams *es_base, + struct evsel_streams *es_pair); + +void evsel_streams__report(struct evsel_streams *es_base, + struct evsel_streams *es_pair); + +#endif /* __PERF_STREAM_H */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 8cc4b0059fb0..44dd86a4f25f 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -50,6 +50,10 @@ typedef Elf64_Nhdr GElf_Nhdr; #define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ #endif +#ifdef HAVE_LIBBFD_SUPPORT +#define PACKAGE 'perf' +#include +#else #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT extern char *cplus_demangle(const char *, int); @@ -65,9 +69,7 @@ static inline char *bfd_demangle(void __maybe_unused *v, { return NULL; } -#else -#define PACKAGE 'perf' -#include +#endif #endif #endif @@ -530,8 +532,40 @@ out: return err; } -int filename__read_build_id(const char *filename, void *bf, size_t size) +#ifdef HAVE_LIBBFD_BUILDID_SUPPORT + +int filename__read_build_id(const char *filename, struct build_id *bid) { + size_t size = sizeof(bid->data); + int err = -1; + bfd *abfd; + + abfd = bfd_openr(filename, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); + goto out_close; + } + + if (!abfd->build_id || abfd->build_id->size > size) + goto out_close; + + memcpy(bid->data, abfd->build_id->data, abfd->build_id->size); + memset(bid->data + abfd->build_id->size, 0, size - abfd->build_id->size); + err = bid->size = abfd->build_id->size; + +out_close: + bfd_close(abfd); + return err; +} + +#else // HAVE_LIBBFD_BUILDID_SUPPORT + +int filename__read_build_id(const char *filename, struct build_id *bid) +{ + size_t size = sizeof(bid->data); int fd, err = -1; Elf *elf; @@ -548,7 +582,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) goto out_close; } - err = elf_read_build_id(elf, bf, size); + err = elf_read_build_id(elf, bid->data, size); + if (err > 0) + bid->size = err; elf_end(elf); out_close: @@ -557,12 +593,12 @@ out: return err; } -int sysfs__read_build_id(const char *filename, void *build_id, size_t size) -{ - int fd, err = -1; +#endif // HAVE_LIBBFD_BUILDID_SUPPORT - if (size < BUILD_ID_SIZE) - goto out; +int sysfs__read_build_id(const char *filename, struct build_id *bid) +{ + size_t size = sizeof(bid->data); + int fd, err = -1; fd = open(filename, O_RDONLY); if (fd < 0) @@ -584,8 +620,9 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) break; if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { size_t sz = min(descsz, size); - if (read(fd, build_id, sz) == (ssize_t)sz) { - memset(build_id + sz, 0, size - sz); + if (read(fd, bid->data, sz) == (ssize_t)sz) { + memset(bid->data + sz, 0, size - sz); + bid->size = sz; err = 0; break; } @@ -608,6 +645,44 @@ out: return err; } +#ifdef HAVE_LIBBFD_SUPPORT + +int filename__read_debuglink(const char *filename, char *debuglink, + size_t size) +{ + int err = -1; + asection *section; + bfd *abfd; + + abfd = bfd_openr(filename, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, filename); + goto out_close; + } + + section = bfd_get_section_by_name(abfd, ".gnu_debuglink"); + if (!section) + goto out_close; + + if (section->size > size) + goto out_close; + + if (!bfd_get_section_contents(abfd, section, debuglink, 0, + section->size)) + goto out_close; + + err = 0; + +out_close: + bfd_close(abfd); + return err; +} + +#else + int filename__read_debuglink(const char *filename, char *debuglink, size_t size) { @@ -660,6 +735,8 @@ out: return err; } +#endif + static int dso__swap_init(struct dso *dso, unsigned char eidata) { static unsigned int const endian = 1; @@ -757,13 +834,17 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, /* Always reject images with a mismatched build-id: */ if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) { u8 build_id[BUILD_ID_SIZE]; + struct build_id bid; + int size; - if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) { + size = elf_read_build_id(elf, build_id, BUILD_ID_SIZE); + if (size <= 0) { dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID; goto out_elf_end; } - if (!dso__build_id_equal(dso, build_id)) { + build_id__init(&bid, build_id, size); + if (!dso__build_id_equal(dso, &bid)) { pr_debug("%s: build id mismatch for %s.\n", __func__, name); dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID; goto out_elf_end; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index d6e99af263ec..f9eb0bee7f15 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -31,9 +31,10 @@ static bool check_need_swap(int file_endian) #define NT_GNU_BUILD_ID 3 -static int read_build_id(void *note_data, size_t note_len, void *bf, - size_t size, bool need_swap) +static int read_build_id(void *note_data, size_t note_len, struct build_id *bid, + bool need_swap) { + size_t size = sizeof(bid->data); struct { u32 n_namesz; u32 n_descsz; @@ -63,8 +64,9 @@ static int read_build_id(void *note_data, size_t note_len, void *bf, nhdr->n_namesz == sizeof("GNU")) { if (memcmp(name, "GNU", sizeof("GNU")) == 0) { size_t sz = min(size, descsz); - memcpy(bf, ptr, sz); - memset(bf + sz, 0, size - sz); + memcpy(bid->data, ptr, sz); + memset(bid->data + sz, 0, size - sz); + bid->size = sz; return 0; } } @@ -84,7 +86,7 @@ int filename__read_debuglink(const char *filename __maybe_unused, /* * Just try PT_NOTE header otherwise fails */ -int filename__read_build_id(const char *filename, void *bf, size_t size) +int filename__read_build_id(const char *filename, struct build_id *bid) { FILE *fp; int ret = -1; @@ -156,9 +158,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) if (fread(buf, buf_size, 1, fp) != 1) goto out_free; - ret = read_build_id(buf, buf_size, bf, size, need_swap); + ret = read_build_id(buf, buf_size, bid, need_swap); if (ret == 0) - ret = size; + ret = bid->size; break; } } else { @@ -207,9 +209,9 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) if (fread(buf, buf_size, 1, fp) != 1) goto out_free; - ret = read_build_id(buf, buf_size, bf, size, need_swap); + ret = read_build_id(buf, buf_size, bid, need_swap); if (ret == 0) - ret = size; + ret = bid->size; break; } } @@ -220,7 +222,7 @@ out: return ret; } -int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +int sysfs__read_build_id(const char *filename, struct build_id *bid) { int fd; int ret = -1; @@ -243,7 +245,7 @@ int sysfs__read_build_id(const char *filename, void *build_id, size_t size) if (read(fd, buf, buf_size) != (ssize_t) buf_size) goto out_free; - ret = read_build_id(buf, buf_size, build_id, size, false); + ret = read_build_id(buf, buf_size, bid, false); out_free: free(buf); out: @@ -339,16 +341,15 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, struct symsrc *runtime_ss __maybe_unused, int kmodule __maybe_unused) { - unsigned char build_id[BUILD_ID_SIZE]; + struct build_id bid; int ret; ret = fd__is_64_bit(ss->fd); if (ret >= 0) dso->is_64_bit = ret; - if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) { - dso__set_build_id(dso, build_id); - } + if (filename__read_build_id(ss->name, &bid) > 0) + dso__set_build_id(dso, &bid); return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 5151a8c0b791..6138866665df 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1526,6 +1526,138 @@ out_failure: return -1; } +#ifdef HAVE_LIBBFD_SUPPORT +#define PACKAGE 'perf' +#include + +static int bfd_symbols__cmpvalue(const void *a, const void *b) +{ + const asymbol *as = *(const asymbol **)a, *bs = *(const asymbol **)b; + + if (bfd_asymbol_value(as) != bfd_asymbol_value(bs)) + return bfd_asymbol_value(as) - bfd_asymbol_value(bs); + + return bfd_asymbol_name(as)[0] - bfd_asymbol_name(bs)[0]; +} + +static int bfd2elf_binding(asymbol *symbol) +{ + if (symbol->flags & BSF_WEAK) + return STB_WEAK; + if (symbol->flags & BSF_GLOBAL) + return STB_GLOBAL; + if (symbol->flags & BSF_LOCAL) + return STB_LOCAL; + return -1; +} + +int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) +{ + int err = -1; + long symbols_size, symbols_count; + asection *section; + asymbol **symbols, *sym; + struct symbol *symbol; + bfd *abfd; + u_int i; + u64 start, len; + + abfd = bfd_openr(dso->long_name, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, + dso->long_name); + goto out_close; + } + + if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) + goto out_close; + + section = bfd_get_section_by_name(abfd, ".text"); + if (section) + dso->text_offset = section->vma - section->filepos; + + bfd_close(abfd); + + abfd = bfd_openr(debugfile, NULL); + if (!abfd) + return -1; + + if (!bfd_check_format(abfd, bfd_object)) { + pr_debug2("%s: cannot read %s bfd file.\n", __func__, + debugfile); + goto out_close; + } + + if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) + goto out_close; + + symbols_size = bfd_get_symtab_upper_bound(abfd); + if (symbols_size == 0) { + bfd_close(abfd); + return 0; + } + + if (symbols_size < 0) + goto out_close; + + symbols = malloc(symbols_size); + if (!symbols) + goto out_close; + + symbols_count = bfd_canonicalize_symtab(abfd, symbols); + if (symbols_count < 0) + goto out_free; + + qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue); + +#ifdef bfd_get_section +#define bfd_asymbol_section bfd_get_section +#endif + for (i = 0; i < symbols_count; ++i) { + sym = symbols[i]; + section = bfd_asymbol_section(sym); + if (bfd2elf_binding(sym) < 0) + continue; + + while (i + 1 < symbols_count && + bfd_asymbol_section(symbols[i + 1]) == section && + bfd2elf_binding(symbols[i + 1]) < 0) + i++; + + if (i + 1 < symbols_count && + bfd_asymbol_section(symbols[i + 1]) == section) + len = symbols[i + 1]->value - sym->value; + else + len = section->size - sym->value; + + start = bfd_asymbol_value(sym) - dso->text_offset; + symbol = symbol__new(start, len, bfd2elf_binding(sym), STT_FUNC, + bfd_asymbol_name(sym)); + if (!symbol) + goto out_free; + + symbols__insert(&dso->symbols, symbol); + } +#ifdef bfd_get_section +#undef bfd_asymbol_section +#endif + + symbols__fixup_end(&dso->symbols); + symbols__fixup_duplicate(&dso->symbols); + dso->adjust_symbols = 1; + + err = 0; +out_free: + free(symbols); +out_close: + bfd_close(abfd); + return err; +} +#endif + static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, enum dso_binary_type type) { @@ -1623,7 +1755,7 @@ int dso__load(struct dso *dso, struct map *map) struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; bool perfmap; - unsigned char build_id[BUILD_ID_SIZE]; + struct build_id bid; struct nscookie nsc; char newmapname[PATH_MAX]; const char *map_path = dso->long_name; @@ -1685,8 +1817,8 @@ int dso__load(struct dso *dso, struct map *map) if (!dso->has_build_id && is_regular_file(dso->long_name)) { __symbol__join_symfs(name, PATH_MAX, dso->long_name); - if (filename__read_build_id(name, build_id, BUILD_ID_SIZE) > 0) - dso__set_build_id(dso, build_id); + if (filename__read_build_id(name, &bid) > 0) + dso__set_build_id(dso, &bid); } /* @@ -1699,6 +1831,7 @@ int dso__load(struct dso *dso, struct map *map) bool next_slot = false; bool is_reg; bool nsexit; + int bfdrc = -1; int sirc = -1; enum dso_binary_type symtab_type = binary_type_symtab[i]; @@ -1717,12 +1850,19 @@ int dso__load(struct dso *dso, struct map *map) nsinfo__mountns_exit(&nsc); is_reg = is_regular_file(name); +#ifdef HAVE_LIBBFD_SUPPORT if (is_reg) + bfdrc = dso__load_bfd_symbols(dso, name); +#endif + if (is_reg && bfdrc < 0) sirc = symsrc__init(ss, dso, name, symtab_type); if (nsexit) nsinfo__mountns_enter(dso->nsinfo, &nsc); + if (bfdrc == 0) + break; + if (!is_reg || sirc < 0) continue; @@ -1982,7 +2122,7 @@ static bool filename__readable(const char *file) static char *dso__find_kallsyms(struct dso *dso, struct map *map) { - u8 host_build_id[BUILD_ID_SIZE]; + struct build_id bid; char sbuild_id[SBUILD_ID_SIZE]; bool is_host = false; char path[PATH_MAX]; @@ -1995,9 +2135,8 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) goto proc_kallsyms; } - if (sysfs__read_build_id("/sys/kernel/notes", host_build_id, - sizeof(host_build_id)) == 0) - is_host = dso__build_id_equal(dso, host_build_id); + if (sysfs__read_build_id("/sys/kernel/notes", &bid) == 0) + is_host = dso__build_id_equal(dso, &bid); /* Try a fast path for /proc/kallsyms if possible */ if (is_host) { @@ -2013,7 +2152,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) goto proc_kallsyms; } - build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); + build_id__sprintf(&dso->bid, sbuild_id); /* Find kallsyms in build-id cache with kcore */ scnprintf(path, sizeof(path), "%s/%s/%s", diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 03e264a27cd3..f4801c488def 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -23,6 +23,7 @@ struct dso; struct map; struct maps; struct option; +struct build_id; /* * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; @@ -142,8 +143,8 @@ struct symbol *dso__next_symbol(struct symbol *sym); enum dso_type dso__type_fd(int fd); -int filename__read_build_id(const char *filename, void *bf, size_t size); -int sysfs__read_build_id(const char *filename, void *bf, size_t size); +int filename__read_build_id(const char *filename, struct build_id *id); +int sysfs__read_build_id(const char *filename, struct build_id *bid); int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, u64 start, u64 size)); @@ -175,6 +176,10 @@ int symbol__config_symfs(const struct option *opt __maybe_unused, struct symsrc; +#ifdef HAVE_LIBBFD_SUPPORT +int dso__load_bfd_symbols(struct dso *dso, const char *debugfile); +#endif + int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule); int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss); diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 89b390623b63..8a23391558cf 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1961,7 +1961,7 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 len = pos->long_name_len + 1; len = PERF_ALIGN(len, NAME_ALIGN); - memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id)); + memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data)); ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID; ev.build_id.header.misc = misc; ev.build_id.pid = machine->pid; @@ -2006,14 +2006,6 @@ int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct p return 0; } -int __weak perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused, - struct perf_tool *tool __maybe_unused, - perf_event__handler_t process __maybe_unused, - struct machine *machine __maybe_unused) -{ - return 0; -} - extern const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE]; int perf_event__synthesize_features(struct perf_tool *tool, struct perf_session *session, diff --git a/tools/perf/util/topdown.c b/tools/perf/util/topdown.c new file mode 100644 index 000000000000..1081b20f9891 --- /dev/null +++ b/tools/perf/util/topdown.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "pmu.h" +#include "topdown.h" + +int topdown_filter_events(const char **attr, char **str, bool use_group) +{ + int off = 0; + int i; + int len = 0; + char *s; + + for (i = 0; attr[i]; i++) { + if (pmu_have_event("cpu", attr[i])) { + len += strlen(attr[i]) + 1; + attr[i - off] = attr[i]; + } else + off++; + } + attr[i - off] = NULL; + + *str = malloc(len + 1 + 2); + if (!*str) + return -1; + s = *str; + if (i - off == 0) { + *s = 0; + return 0; + } + if (use_group) + *s++ = '{'; + for (i = 0; attr[i]; i++) { + strcpy(s, attr[i]); + s += strlen(s); + *s++ = ','; + } + if (use_group) { + s[-1] = '}'; + *s = 0; + } else + s[-1] = 0; + return 0; +} + +__weak bool arch_topdown_check_group(bool *warn) +{ + *warn = false; + return false; +} + +__weak void arch_topdown_group_warn(void) +{ +} + +__weak bool arch_topdown_sample_read(struct evsel *leader __maybe_unused) +{ + return false; +} diff --git a/tools/perf/util/topdown.h b/tools/perf/util/topdown.h new file mode 100644 index 000000000000..2f0d0b887639 --- /dev/null +++ b/tools/perf/util/topdown.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef TOPDOWN_H +#define TOPDOWN_H 1 +#include "evsel.h" + +bool arch_topdown_check_group(bool *warn); +void arch_topdown_group_warn(void); +bool arch_topdown_sample_read(struct evsel *leader); + +int topdown_filter_events(const char **attr, char **str, bool use_group); + +#endif diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index bfa782421cbd..62b4c75c966c 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -1,7 +1,16 @@ // SPDX-License-Identifier: GPL-2.0 +#include + #include +#include +#include #include +#include + +#include "event.h" +#include "synthetic-events.h" +#include "debug.h" #include "tsc.h" u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc) @@ -19,12 +28,84 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) { u64 quot, rem; + if (tc->cap_user_time_short) + cyc = tc->time_cycles + + ((cyc - tc->time_cycles) & tc->time_mask); + quot = cyc >> tc->time_shift; rem = cyc & (((u64)1 << tc->time_shift) - 1); return tc->time_zero + quot * tc->time_mult + ((rem * tc->time_mult) >> tc->time_shift); } +int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, + struct perf_tsc_conversion *tc) +{ + u32 seq; + int i = 0; + + while (1) { + seq = pc->lock; + rmb(); + tc->time_mult = pc->time_mult; + tc->time_shift = pc->time_shift; + tc->time_zero = pc->time_zero; + tc->time_cycles = pc->time_cycles; + tc->time_mask = pc->time_mask; + tc->cap_user_time_zero = pc->cap_user_time_zero; + tc->cap_user_time_short = pc->cap_user_time_short; + rmb(); + if (pc->lock == seq && !(seq & 1)) + break; + if (++i > 10000) { + pr_debug("failed to get perf_event_mmap_page lock\n"); + return -EINVAL; + } + } + + if (!tc->cap_user_time_zero) + return -EOPNOTSUPP; + + return 0; +} + +int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc, + struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event event = { + .time_conv = { + .header = { + .type = PERF_RECORD_TIME_CONV, + .size = sizeof(struct perf_record_time_conv), + }, + }, + }; + struct perf_tsc_conversion tc; + int err; + + if (!pc) + return 0; + err = perf_read_tsc_conversion(pc, &tc); + if (err == -EOPNOTSUPP) + return 0; + if (err) + return err; + + pr_debug2("Synthesizing TSC conversion information\n"); + + event.time_conv.time_mult = tc.time_mult; + event.time_conv.time_shift = tc.time_shift; + event.time_conv.time_zero = tc.time_zero; + event.time_conv.time_cycles = tc.time_cycles; + event.time_conv.time_mask = tc.time_mask; + event.time_conv.cap_user_time_zero = tc.cap_user_time_zero; + event.time_conv.cap_user_time_short = tc.cap_user_time_short; + + return process(tool, &event, NULL, machine); +} + u64 __weak rdtsc(void) { return 0; diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 3c5a632ee57c..72a15419f3b3 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -8,6 +8,11 @@ struct perf_tsc_conversion { u16 time_shift; u32 time_mult; u64 time_zero; + u64 time_cycles; + u64 time_mask; + + bool cap_user_time_zero; + bool cap_user_time_short; }; struct perf_event_mmap_page; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index f486fdd3a538..ad737052e597 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -62,4 +62,10 @@ char *perf_exe(char *buf, int len); #endif #endif +extern bool test_attr__enabled; +void test_attr__ready(void); +void test_attr__init(void); +struct perf_event_attr; +void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu, + int fd, int group_fd, unsigned long flags); #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/testing/scatterlist/Makefile b/tools/testing/scatterlist/Makefile index cbb003d9305e..c65233876622 100644 --- a/tools/testing/scatterlist/Makefile +++ b/tools/testing/scatterlist/Makefile @@ -14,7 +14,7 @@ targets: include $(TARGETS) main: $(OFILES) clean: - $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h asm/io.h + $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h linux/slab.h asm/io.h @rmdir asm scatterlist.c: ../../../lib/scatterlist.c @@ -28,4 +28,5 @@ include: ../../../include/linux/scatterlist.h @touch asm/io.h @touch linux/highmem.h @touch linux/kmemleak.h + @touch linux/slab.h @cp $< linux/scatterlist.h diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 6f9ac14aa800..6ae907f375d2 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -114,6 +114,12 @@ static inline void *kmalloc(unsigned int size, unsigned int flags) return malloc(size); } +static inline void * +kmalloc_array(unsigned int n, unsigned int size, unsigned int flags) +{ + return malloc(n * size); +} + #define kfree(x) free(x) #define kmemleak_alloc(a, b, c, d) @@ -122,4 +128,33 @@ static inline void *kmalloc(unsigned int size, unsigned int flags) #define PageSlab(p) (0) #define flush_kernel_dcache_page(p) +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) + +static inline void * __must_check ERR_PTR(long error) +{ + return (void *) error; +} + +static inline long __must_check PTR_ERR(__force const void *ptr) +{ + return (long) ptr; +} + +static inline bool __must_check IS_ERR(__force const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) +{ + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + else + return 0; +} + +#define IS_ENABLED(x) (0) + #endif diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 0a1464181226..b2c7e9f7b8d3 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -5,6 +5,15 @@ #define MAX_PAGES (64) +struct test { + int alloc_ret; + unsigned num_pages; + unsigned *pfn; + unsigned size; + unsigned int max_seg; + unsigned int expected_segments; +}; + static void set_pages(struct page **pages, const unsigned *array, unsigned num) { unsigned int i; @@ -17,17 +26,32 @@ static void set_pages(struct page **pages, const unsigned *array, unsigned num) #define pfn(...) (unsigned []){ __VA_ARGS__ } +static void fail(struct test *test, struct sg_table *st, const char *cond) +{ + unsigned int i; + + fprintf(stderr, "Failed on '%s'!\n\n", cond); + + printf("size = %u, max segment = %u, expected nents = %u\nst->nents = %u, st->orig_nents= %u\n", + test->size, test->max_seg, test->expected_segments, st->nents, + st->orig_nents); + + printf("%u input PFNs:", test->num_pages); + for (i = 0; i < test->num_pages; i++) + printf(" %x", test->pfn[i]); + printf("\n"); + + exit(1); +} + +#define VALIDATE(cond, st, test) \ + if (!(cond)) \ + fail((test), (st), #cond); + int main(void) { const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT; - struct test { - int alloc_ret; - unsigned num_pages; - unsigned *pfn; - unsigned size; - unsigned int max_seg; - unsigned int expected_segments; - } *test, tests[] = { + struct test *test, tests[] = { { -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, { -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, @@ -55,20 +79,19 @@ int main(void) for (i = 0, test = tests; test->expected_segments; test++, i++) { struct page *pages[MAX_PAGES]; struct sg_table st; - int ret; + struct scatterlist *sg; set_pages(pages, test->pfn, test->num_pages); - ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages, - 0, test->size, test->max_seg, - GFP_KERNEL); - assert(ret == test->alloc_ret); + sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, + test->size, test->max_seg, NULL, 0, GFP_KERNEL); + assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); if (test->alloc_ret) continue; - assert(st.nents == test->expected_segments); - assert(st.orig_nents == test->expected_segments); + VALIDATE(st.nents == test->expected_segments, &st, test); + VALIDATE(st.orig_nents == test->expected_segments, &st, test); sg_free_table(&st); }