mirror of
https://github.com/hardkernel/linux.git
synced 2026-06-07 03:15:31 +09:00
Merge 5.7-rc1 into android-mainline
Linux 5.7-rc1 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I40037d3be5c3181d58f7aa1122d1fb06353d98b9
This commit is contained in:
@@ -65,6 +65,12 @@ max_pid_namespaces
|
||||
The maximum number of pid namespaces that any user in the current
|
||||
user namespace may create.
|
||||
|
||||
max_time_namespaces
|
||||
===================
|
||||
|
||||
The maximum number of time namespaces that any user in the current
|
||||
user namespace may create.
|
||||
|
||||
max_user_namespaces
|
||||
===================
|
||||
|
||||
|
||||
5556
MAINTAINERS
5556
MAINTAINERS
File diff suppressed because it is too large
Load Diff
4
Makefile
4
Makefile
@@ -1,8 +1,8 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
VERSION = 5
|
||||
PATCHLEVEL = 6
|
||||
PATCHLEVEL = 7
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION =
|
||||
EXTRAVERSION = -rc1
|
||||
NAME = Kleptomaniac Octopus
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
||||
@@ -1476,6 +1476,12 @@ static const struct intel_uncore_init_fun tgl_l_uncore_init __initconst = {
|
||||
.mmio_init = tgl_l_uncore_mmio_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
|
||||
.cpu_init = icx_uncore_cpu_init,
|
||||
.pci_init = icx_uncore_pci_init,
|
||||
.mmio_init = icx_uncore_mmio_init,
|
||||
};
|
||||
|
||||
static const struct intel_uncore_init_fun snr_uncore_init __initconst = {
|
||||
.cpu_init = snr_uncore_cpu_init,
|
||||
.pci_init = snr_uncore_pci_init,
|
||||
@@ -1511,6 +1517,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_NNPI, &icl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &icx_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &icx_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, &tgl_l_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &tgl_uncore_init),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init),
|
||||
|
||||
@@ -550,6 +550,9 @@ void skx_uncore_cpu_init(void);
|
||||
int snr_uncore_pci_init(void);
|
||||
void snr_uncore_cpu_init(void);
|
||||
void snr_uncore_mmio_init(void);
|
||||
int icx_uncore_pci_init(void);
|
||||
void icx_uncore_cpu_init(void);
|
||||
void icx_uncore_mmio_init(void);
|
||||
|
||||
/* uncore_nhmex.c */
|
||||
void nhmex_uncore_cpu_init(void);
|
||||
|
||||
@@ -382,6 +382,42 @@
|
||||
#define SNR_IMC_MMIO_MEM0_OFFSET 0xd8
|
||||
#define SNR_IMC_MMIO_MEM0_MASK 0x7FF
|
||||
|
||||
/* ICX CHA */
|
||||
#define ICX_C34_MSR_PMON_CTR0 0xb68
|
||||
#define ICX_C34_MSR_PMON_CTL0 0xb61
|
||||
#define ICX_C34_MSR_PMON_BOX_CTL 0xb60
|
||||
#define ICX_C34_MSR_PMON_BOX_FILTER0 0xb65
|
||||
|
||||
/* ICX IIO */
|
||||
#define ICX_IIO_MSR_PMON_CTL0 0xa58
|
||||
#define ICX_IIO_MSR_PMON_CTR0 0xa51
|
||||
#define ICX_IIO_MSR_PMON_BOX_CTL 0xa50
|
||||
|
||||
/* ICX IRP */
|
||||
#define ICX_IRP0_MSR_PMON_CTL0 0xa4d
|
||||
#define ICX_IRP0_MSR_PMON_CTR0 0xa4b
|
||||
#define ICX_IRP0_MSR_PMON_BOX_CTL 0xa4a
|
||||
|
||||
/* ICX M2PCIE */
|
||||
#define ICX_M2PCIE_MSR_PMON_CTL0 0xa46
|
||||
#define ICX_M2PCIE_MSR_PMON_CTR0 0xa41
|
||||
#define ICX_M2PCIE_MSR_PMON_BOX_CTL 0xa40
|
||||
|
||||
/* ICX UPI */
|
||||
#define ICX_UPI_PCI_PMON_CTL0 0x350
|
||||
#define ICX_UPI_PCI_PMON_CTR0 0x320
|
||||
#define ICX_UPI_PCI_PMON_BOX_CTL 0x318
|
||||
#define ICX_UPI_CTL_UMASK_EXT 0xffffff
|
||||
|
||||
/* ICX M3UPI*/
|
||||
#define ICX_M3UPI_PCI_PMON_CTL0 0xd8
|
||||
#define ICX_M3UPI_PCI_PMON_CTR0 0xa8
|
||||
#define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0
|
||||
|
||||
/* ICX IMC */
|
||||
#define ICX_NUMBER_IMC_CHN 2
|
||||
#define ICX_IMC_MEM_STRIDE 0x4
|
||||
|
||||
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(event2, event, "config:0-6");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(event_ext, event, "config:0-7,21");
|
||||
@@ -390,6 +426,7 @@ DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask_ext, umask, "config:8-15,32-43,45-55");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask_ext2, umask, "config:8-15,32-57");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask_ext3, umask, "config:8-15,32-39");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(umask_ext4, umask, "config:8-15,32-55");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(qor, qor, "config:16");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
|
||||
DEFINE_UNCORE_FORMAT_ATTR(tid_en, tid_en, "config:19");
|
||||
@@ -4551,3 +4588,477 @@ void snr_uncore_mmio_init(void)
|
||||
}
|
||||
|
||||
/* end of SNR uncore support */
|
||||
|
||||
/* ICX uncore support */
|
||||
|
||||
static unsigned icx_cha_msr_offsets[] = {
|
||||
0x2a0, 0x2ae, 0x2bc, 0x2ca, 0x2d8, 0x2e6, 0x2f4, 0x302, 0x310,
|
||||
0x31e, 0x32c, 0x33a, 0x348, 0x356, 0x364, 0x372, 0x380, 0x38e,
|
||||
0x3aa, 0x3b8, 0x3c6, 0x3d4, 0x3e2, 0x3f0, 0x3fe, 0x40c, 0x41a,
|
||||
0x428, 0x436, 0x444, 0x452, 0x460, 0x46e, 0x47c, 0x0, 0xe,
|
||||
0x1c, 0x2a, 0x38, 0x46,
|
||||
};
|
||||
|
||||
static int icx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event_extra *reg1 = &event->hw.extra_reg;
|
||||
bool tie_en = !!(event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN);
|
||||
|
||||
if (tie_en) {
|
||||
reg1->reg = ICX_C34_MSR_PMON_BOX_FILTER0 +
|
||||
icx_cha_msr_offsets[box->pmu->pmu_idx];
|
||||
reg1->config = event->attr.config1 & SKX_CHA_MSR_PMON_BOX_FILTER_TID;
|
||||
reg1->idx = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct intel_uncore_ops icx_uncore_chabox_ops = {
|
||||
.init_box = ivbep_uncore_msr_init_box,
|
||||
.disable_box = snbep_uncore_msr_disable_box,
|
||||
.enable_box = snbep_uncore_msr_enable_box,
|
||||
.disable_event = snbep_uncore_msr_disable_event,
|
||||
.enable_event = snr_cha_enable_event,
|
||||
.read_counter = uncore_msr_read_counter,
|
||||
.hw_config = icx_cha_hw_config,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icx_uncore_chabox = {
|
||||
.name = "cha",
|
||||
.num_counters = 4,
|
||||
.perf_ctr_bits = 48,
|
||||
.event_ctl = ICX_C34_MSR_PMON_CTL0,
|
||||
.perf_ctr = ICX_C34_MSR_PMON_CTR0,
|
||||
.box_ctl = ICX_C34_MSR_PMON_BOX_CTL,
|
||||
.msr_offsets = icx_cha_msr_offsets,
|
||||
.event_mask = HSWEP_S_MSR_PMON_RAW_EVENT_MASK,
|
||||
.event_mask_ext = SNR_CHA_RAW_EVENT_MASK_EXT,
|
||||
.constraints = skx_uncore_chabox_constraints,
|
||||
.ops = &icx_uncore_chabox_ops,
|
||||
.format_group = &snr_uncore_chabox_format_group,
|
||||
};
|
||||
|
||||
static unsigned icx_msr_offsets[] = {
|
||||
0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0,
|
||||
};
|
||||
|
||||
static struct event_constraint icx_uncore_iio_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x02, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x03, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x83, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0xc0, 0xc),
|
||||
UNCORE_EVENT_CONSTRAINT(0xc5, 0xc),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icx_uncore_iio = {
|
||||
.name = "iio",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 6,
|
||||
.perf_ctr_bits = 48,
|
||||
.event_ctl = ICX_IIO_MSR_PMON_CTL0,
|
||||
.perf_ctr = ICX_IIO_MSR_PMON_CTR0,
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.event_mask_ext = SNR_IIO_PMON_RAW_EVENT_MASK_EXT,
|
||||
.box_ctl = ICX_IIO_MSR_PMON_BOX_CTL,
|
||||
.msr_offsets = icx_msr_offsets,
|
||||
.constraints = icx_uncore_iio_constraints,
|
||||
.ops = &skx_uncore_iio_ops,
|
||||
.format_group = &snr_uncore_iio_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icx_uncore_irp = {
|
||||
.name = "irp",
|
||||
.num_counters = 2,
|
||||
.num_boxes = 6,
|
||||
.perf_ctr_bits = 48,
|
||||
.event_ctl = ICX_IRP0_MSR_PMON_CTL0,
|
||||
.perf_ctr = ICX_IRP0_MSR_PMON_CTR0,
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.box_ctl = ICX_IRP0_MSR_PMON_BOX_CTL,
|
||||
.msr_offsets = icx_msr_offsets,
|
||||
.ops = &ivbep_uncore_msr_ops,
|
||||
.format_group = &ivbep_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct event_constraint icx_uncore_m2pcie_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x14, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x23, 0x3),
|
||||
UNCORE_EVENT_CONSTRAINT(0x2d, 0x3),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icx_uncore_m2pcie = {
|
||||
.name = "m2pcie",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 6,
|
||||
.perf_ctr_bits = 48,
|
||||
.event_ctl = ICX_M2PCIE_MSR_PMON_CTL0,
|
||||
.perf_ctr = ICX_M2PCIE_MSR_PMON_CTR0,
|
||||
.box_ctl = ICX_M2PCIE_MSR_PMON_BOX_CTL,
|
||||
.msr_offsets = icx_msr_offsets,
|
||||
.constraints = icx_uncore_m2pcie_constraints,
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.ops = &ivbep_uncore_msr_ops,
|
||||
.format_group = &ivbep_uncore_format_group,
|
||||
};
|
||||
|
||||
enum perf_uncore_icx_iio_freerunning_type_id {
|
||||
ICX_IIO_MSR_IOCLK,
|
||||
ICX_IIO_MSR_BW_IN,
|
||||
|
||||
ICX_IIO_FREERUNNING_TYPE_MAX,
|
||||
};
|
||||
|
||||
static unsigned icx_iio_clk_freerunning_box_offsets[] = {
|
||||
0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0,
|
||||
};
|
||||
|
||||
static unsigned icx_iio_bw_freerunning_box_offsets[] = {
|
||||
0x0, 0x10, 0x20, 0x90, 0xa0, 0xb0,
|
||||
};
|
||||
|
||||
static struct freerunning_counters icx_iio_freerunning[] = {
|
||||
[ICX_IIO_MSR_IOCLK] = { 0xa55, 0x1, 0x20, 1, 48, icx_iio_clk_freerunning_box_offsets },
|
||||
[ICX_IIO_MSR_BW_IN] = { 0xaa0, 0x1, 0x10, 8, 48, icx_iio_bw_freerunning_box_offsets },
|
||||
};
|
||||
|
||||
static struct uncore_event_desc icx_uncore_iio_freerunning_events[] = {
|
||||
/* Free-Running IIO CLOCKS Counter */
|
||||
INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"),
|
||||
/* Free-Running IIO BANDWIDTH IN Counters */
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port4, "event=0xff,umask=0x24"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port4.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port4.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port5, "event=0xff,umask=0x25"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port5.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port5.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port6, "event=0xff,umask=0x26"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port6.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port6.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port7, "event=0xff,umask=0x27"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port7.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(bw_in_port7.unit, "MiB"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icx_uncore_iio_free_running = {
|
||||
.name = "iio_free_running",
|
||||
.num_counters = 9,
|
||||
.num_boxes = 6,
|
||||
.num_freerunning_types = ICX_IIO_FREERUNNING_TYPE_MAX,
|
||||
.freerunning = icx_iio_freerunning,
|
||||
.ops = &skx_uncore_iio_freerunning_ops,
|
||||
.event_descs = icx_uncore_iio_freerunning_events,
|
||||
.format_group = &skx_uncore_iio_freerunning_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *icx_msr_uncores[] = {
|
||||
&skx_uncore_ubox,
|
||||
&icx_uncore_chabox,
|
||||
&icx_uncore_iio,
|
||||
&icx_uncore_irp,
|
||||
&icx_uncore_m2pcie,
|
||||
&skx_uncore_pcu,
|
||||
&icx_uncore_iio_free_running,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/*
|
||||
* To determine the number of CHAs, it should read CAPID6(Low) and CAPID7 (High)
|
||||
* registers which located at Device 30, Function 3
|
||||
*/
|
||||
#define ICX_CAPID6 0x9c
|
||||
#define ICX_CAPID7 0xa0
|
||||
|
||||
static u64 icx_count_chabox(void)
|
||||
{
|
||||
struct pci_dev *dev = NULL;
|
||||
u64 caps = 0;
|
||||
|
||||
dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x345b, dev);
|
||||
if (!dev)
|
||||
goto out;
|
||||
|
||||
pci_read_config_dword(dev, ICX_CAPID6, (u32 *)&caps);
|
||||
pci_read_config_dword(dev, ICX_CAPID7, (u32 *)&caps + 1);
|
||||
out:
|
||||
pci_dev_put(dev);
|
||||
return hweight64(caps);
|
||||
}
|
||||
|
||||
void icx_uncore_cpu_init(void)
|
||||
{
|
||||
u64 num_boxes = icx_count_chabox();
|
||||
|
||||
if (WARN_ON(num_boxes > ARRAY_SIZE(icx_cha_msr_offsets)))
|
||||
return;
|
||||
icx_uncore_chabox.num_boxes = num_boxes;
|
||||
uncore_msr_uncores = icx_msr_uncores;
|
||||
}
|
||||
|
||||
static struct intel_uncore_type icx_uncore_m2m = {
|
||||
.name = "m2m",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 4,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = SNR_M2M_PCI_PMON_CTR0,
|
||||
.event_ctl = SNR_M2M_PCI_PMON_CTL0,
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.box_ctl = SNR_M2M_PCI_PMON_BOX_CTL,
|
||||
.ops = &snr_m2m_uncore_pci_ops,
|
||||
.format_group = &skx_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct attribute *icx_upi_uncore_formats_attr[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_umask_ext4.attr,
|
||||
&format_attr_edge.attr,
|
||||
&format_attr_inv.attr,
|
||||
&format_attr_thresh8.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group icx_upi_uncore_format_group = {
|
||||
.name = "format",
|
||||
.attrs = icx_upi_uncore_formats_attr,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icx_uncore_upi = {
|
||||
.name = "upi",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 3,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = ICX_UPI_PCI_PMON_CTR0,
|
||||
.event_ctl = ICX_UPI_PCI_PMON_CTL0,
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.event_mask_ext = ICX_UPI_CTL_UMASK_EXT,
|
||||
.box_ctl = ICX_UPI_PCI_PMON_BOX_CTL,
|
||||
.ops = &skx_upi_uncore_pci_ops,
|
||||
.format_group = &icx_upi_uncore_format_group,
|
||||
};
|
||||
|
||||
static struct event_constraint icx_uncore_m3upi_constraints[] = {
|
||||
UNCORE_EVENT_CONSTRAINT(0x1c, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x1d, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x1e, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x1f, 0x1),
|
||||
UNCORE_EVENT_CONSTRAINT(0x40, 0x7),
|
||||
UNCORE_EVENT_CONSTRAINT(0x4e, 0x7),
|
||||
UNCORE_EVENT_CONSTRAINT(0x4f, 0x7),
|
||||
UNCORE_EVENT_CONSTRAINT(0x50, 0x7),
|
||||
EVENT_CONSTRAINT_END
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icx_uncore_m3upi = {
|
||||
.name = "m3upi",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 3,
|
||||
.perf_ctr_bits = 48,
|
||||
.perf_ctr = ICX_M3UPI_PCI_PMON_CTR0,
|
||||
.event_ctl = ICX_M3UPI_PCI_PMON_CTL0,
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.box_ctl = ICX_M3UPI_PCI_PMON_BOX_CTL,
|
||||
.constraints = icx_uncore_m3upi_constraints,
|
||||
.ops = &ivbep_uncore_pci_ops,
|
||||
.format_group = &skx_uncore_format_group,
|
||||
};
|
||||
|
||||
enum {
|
||||
ICX_PCI_UNCORE_M2M,
|
||||
ICX_PCI_UNCORE_UPI,
|
||||
ICX_PCI_UNCORE_M3UPI,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *icx_pci_uncores[] = {
|
||||
[ICX_PCI_UNCORE_M2M] = &icx_uncore_m2m,
|
||||
[ICX_PCI_UNCORE_UPI] = &icx_uncore_upi,
|
||||
[ICX_PCI_UNCORE_M3UPI] = &icx_uncore_m3upi,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct pci_device_id icx_uncore_pci_ids[] = {
|
||||
{ /* M2M 0 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(12, 0, ICX_PCI_UNCORE_M2M, 0),
|
||||
},
|
||||
{ /* M2M 1 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(13, 0, ICX_PCI_UNCORE_M2M, 1),
|
||||
},
|
||||
{ /* M2M 2 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(14, 0, ICX_PCI_UNCORE_M2M, 2),
|
||||
},
|
||||
{ /* M2M 3 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x344a),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(15, 0, ICX_PCI_UNCORE_M2M, 3),
|
||||
},
|
||||
{ /* UPI Link 0 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(2, 1, ICX_PCI_UNCORE_UPI, 0),
|
||||
},
|
||||
{ /* UPI Link 1 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(3, 1, ICX_PCI_UNCORE_UPI, 1),
|
||||
},
|
||||
{ /* UPI Link 2 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3441),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(4, 1, ICX_PCI_UNCORE_UPI, 2),
|
||||
},
|
||||
{ /* M3UPI Link 0 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(5, 1, ICX_PCI_UNCORE_M3UPI, 0),
|
||||
},
|
||||
{ /* M3UPI Link 1 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(6, 1, ICX_PCI_UNCORE_M3UPI, 1),
|
||||
},
|
||||
{ /* M3UPI Link 2 */
|
||||
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3446),
|
||||
.driver_data = UNCORE_PCI_DEV_FULL_DATA(7, 1, ICX_PCI_UNCORE_M3UPI, 2),
|
||||
},
|
||||
{ /* end: all zeroes */ }
|
||||
};
|
||||
|
||||
static struct pci_driver icx_uncore_pci_driver = {
|
||||
.name = "icx_uncore",
|
||||
.id_table = icx_uncore_pci_ids,
|
||||
};
|
||||
|
||||
int icx_uncore_pci_init(void)
|
||||
{
|
||||
/* ICX UBOX DID */
|
||||
int ret = snbep_pci2phy_map_init(0x3450, SKX_CPUNODEID,
|
||||
SKX_GIDNIDMAP, true);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
uncore_pci_uncores = icx_pci_uncores;
|
||||
uncore_pci_driver = &icx_uncore_pci_driver;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void icx_uncore_imc_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
unsigned int box_ctl = box->pmu->type->box_ctl +
|
||||
box->pmu->type->mmio_offset * (box->pmu->pmu_idx % ICX_NUMBER_IMC_CHN);
|
||||
int mem_offset = (box->pmu->pmu_idx / ICX_NUMBER_IMC_CHN) * ICX_IMC_MEM_STRIDE +
|
||||
SNR_IMC_MMIO_MEM0_OFFSET;
|
||||
|
||||
__snr_uncore_mmio_init_box(box, box_ctl, mem_offset);
|
||||
}
|
||||
|
||||
static struct intel_uncore_ops icx_uncore_mmio_ops = {
|
||||
.init_box = icx_uncore_imc_init_box,
|
||||
.exit_box = uncore_mmio_exit_box,
|
||||
.disable_box = snr_uncore_mmio_disable_box,
|
||||
.enable_box = snr_uncore_mmio_enable_box,
|
||||
.disable_event = snr_uncore_mmio_disable_event,
|
||||
.enable_event = snr_uncore_mmio_enable_event,
|
||||
.read_counter = uncore_mmio_read_counter,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icx_uncore_imc = {
|
||||
.name = "imc",
|
||||
.num_counters = 4,
|
||||
.num_boxes = 8,
|
||||
.perf_ctr_bits = 48,
|
||||
.fixed_ctr_bits = 48,
|
||||
.fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR,
|
||||
.fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL,
|
||||
.event_descs = hswep_uncore_imc_events,
|
||||
.perf_ctr = SNR_IMC_MMIO_PMON_CTR0,
|
||||
.event_ctl = SNR_IMC_MMIO_PMON_CTL0,
|
||||
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
|
||||
.box_ctl = SNR_IMC_MMIO_PMON_BOX_CTL,
|
||||
.mmio_offset = SNR_IMC_MMIO_OFFSET,
|
||||
.ops = &icx_uncore_mmio_ops,
|
||||
.format_group = &skx_uncore_format_group,
|
||||
};
|
||||
|
||||
enum perf_uncore_icx_imc_freerunning_type_id {
|
||||
ICX_IMC_DCLK,
|
||||
ICX_IMC_DDR,
|
||||
ICX_IMC_DDRT,
|
||||
|
||||
ICX_IMC_FREERUNNING_TYPE_MAX,
|
||||
};
|
||||
|
||||
static struct freerunning_counters icx_imc_freerunning[] = {
|
||||
[ICX_IMC_DCLK] = { 0x22b0, 0x0, 0, 1, 48 },
|
||||
[ICX_IMC_DDR] = { 0x2290, 0x8, 0, 2, 48 },
|
||||
[ICX_IMC_DDRT] = { 0x22a0, 0x8, 0, 2, 48 },
|
||||
};
|
||||
|
||||
static struct uncore_event_desc icx_uncore_imc_freerunning_events[] = {
|
||||
INTEL_UNCORE_EVENT_DESC(dclk, "event=0xff,umask=0x10"),
|
||||
|
||||
INTEL_UNCORE_EVENT_DESC(read, "event=0xff,umask=0x20"),
|
||||
INTEL_UNCORE_EVENT_DESC(read.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(read.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(write, "event=0xff,umask=0x21"),
|
||||
INTEL_UNCORE_EVENT_DESC(write.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(write.unit, "MiB"),
|
||||
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_read, "event=0xff,umask=0x30"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_read.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_read.unit, "MiB"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_write, "event=0xff,umask=0x31"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_write.scale, "3.814697266e-6"),
|
||||
INTEL_UNCORE_EVENT_DESC(ddrt_write.unit, "MiB"),
|
||||
{ /* end: all zeroes */ },
|
||||
};
|
||||
|
||||
static void icx_uncore_imc_freerunning_init_box(struct intel_uncore_box *box)
|
||||
{
|
||||
int mem_offset = box->pmu->pmu_idx * ICX_IMC_MEM_STRIDE +
|
||||
SNR_IMC_MMIO_MEM0_OFFSET;
|
||||
|
||||
__snr_uncore_mmio_init_box(box, uncore_mmio_box_ctl(box), mem_offset);
|
||||
}
|
||||
|
||||
static struct intel_uncore_ops icx_uncore_imc_freerunning_ops = {
|
||||
.init_box = icx_uncore_imc_freerunning_init_box,
|
||||
.exit_box = uncore_mmio_exit_box,
|
||||
.read_counter = uncore_mmio_read_counter,
|
||||
.hw_config = uncore_freerunning_hw_config,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type icx_uncore_imc_free_running = {
|
||||
.name = "imc_free_running",
|
||||
.num_counters = 5,
|
||||
.num_boxes = 4,
|
||||
.num_freerunning_types = ICX_IMC_FREERUNNING_TYPE_MAX,
|
||||
.freerunning = icx_imc_freerunning,
|
||||
.ops = &icx_uncore_imc_freerunning_ops,
|
||||
.event_descs = icx_uncore_imc_freerunning_events,
|
||||
.format_group = &skx_uncore_iio_freerunning_format_group,
|
||||
};
|
||||
|
||||
static struct intel_uncore_type *icx_mmio_uncores[] = {
|
||||
&icx_uncore_imc,
|
||||
&icx_uncore_imc_free_running,
|
||||
NULL,
|
||||
};
|
||||
|
||||
void icx_uncore_mmio_init(void)
|
||||
{
|
||||
uncore_mmio_uncores = icx_mmio_uncores;
|
||||
}
|
||||
|
||||
/* end of ICX uncore support */
|
||||
|
||||
@@ -44,6 +44,7 @@ unsigned int x86_stepping(unsigned int sig);
|
||||
extern void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c);
|
||||
extern void switch_to_sld(unsigned long tifn);
|
||||
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
|
||||
extern bool handle_guest_split_lock(unsigned long ip);
|
||||
#else
|
||||
static inline void __init cpu_set_core_cap_bits(struct cpuinfo_x86 *c) {}
|
||||
static inline void switch_to_sld(unsigned long tifn) {}
|
||||
@@ -51,5 +52,10 @@ static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool handle_guest_split_lock(unsigned long ip)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
#endif /* _ASM_X86_CPU_H */
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include <asm/elf.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/cmdline.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <linux/topology.h>
|
||||
@@ -1066,13 +1067,10 @@ static void split_lock_init(void)
|
||||
split_lock_verify_msr(sld_state != sld_off);
|
||||
}
|
||||
|
||||
bool handle_user_split_lock(struct pt_regs *regs, long error_code)
|
||||
static void split_lock_warn(unsigned long ip)
|
||||
{
|
||||
if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
|
||||
return false;
|
||||
|
||||
pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n",
|
||||
current->comm, current->pid, regs->ip);
|
||||
current->comm, current->pid, ip);
|
||||
|
||||
/*
|
||||
* Disable the split lock detection for this task so it can make
|
||||
@@ -1081,6 +1079,31 @@ bool handle_user_split_lock(struct pt_regs *regs, long error_code)
|
||||
*/
|
||||
sld_update_msr(false);
|
||||
set_tsk_thread_flag(current, TIF_SLD);
|
||||
}
|
||||
|
||||
bool handle_guest_split_lock(unsigned long ip)
|
||||
{
|
||||
if (sld_state == sld_warn) {
|
||||
split_lock_warn(ip);
|
||||
return true;
|
||||
}
|
||||
|
||||
pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n",
|
||||
current->comm, current->pid,
|
||||
sld_state == sld_fatal ? "fatal" : "bogus", ip);
|
||||
|
||||
current->thread.error_code = 0;
|
||||
current->thread.trap_nr = X86_TRAP_AC;
|
||||
force_sig_fault(SIGBUS, BUS_ADRALN, NULL);
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(handle_guest_split_lock);
|
||||
|
||||
bool handle_user_split_lock(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal)
|
||||
return false;
|
||||
split_lock_warn(regs->ip);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -4588,6 +4588,26 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the host has split lock detection disabled, then #AC is
|
||||
* unconditionally injected into the guest, which is the pre split lock
|
||||
* detection behaviour.
|
||||
*
|
||||
* If the host has split lock detection enabled then #AC is
|
||||
* only injected into the guest when:
|
||||
* - Guest CPL == 3 (user mode)
|
||||
* - Guest has #AC detection enabled in CR0
|
||||
* - Guest EFLAGS has AC bit set
|
||||
*/
|
||||
static inline bool guest_inject_ac(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
|
||||
return true;
|
||||
|
||||
return vmx_get_cpl(vcpu) == 3 && kvm_read_cr0_bits(vcpu, X86_CR0_AM) &&
|
||||
(kvm_get_rflags(vcpu) & X86_EFLAGS_AC);
|
||||
}
|
||||
|
||||
static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
@@ -4653,9 +4673,6 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||
return handle_rmode_exception(vcpu, ex_no, error_code);
|
||||
|
||||
switch (ex_no) {
|
||||
case AC_VECTOR:
|
||||
kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
|
||||
return 1;
|
||||
case DB_VECTOR:
|
||||
dr6 = vmcs_readl(EXIT_QUALIFICATION);
|
||||
if (!(vcpu->guest_debug &
|
||||
@@ -4684,6 +4701,20 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
||||
kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
|
||||
kvm_run->debug.arch.exception = ex_no;
|
||||
break;
|
||||
case AC_VECTOR:
|
||||
if (guest_inject_ac(vcpu)) {
|
||||
kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle split lock. Depending on detection mode this will
|
||||
* either warn and disable split lock detection for this
|
||||
* task or force SIGBUS on it.
|
||||
*/
|
||||
if (handle_guest_split_lock(kvm_rip_read(vcpu)))
|
||||
return 1;
|
||||
fallthrough;
|
||||
default:
|
||||
kvm_run->exit_reason = KVM_EXIT_EXCEPTION;
|
||||
kvm_run->ex.exception = ex_no;
|
||||
|
||||
@@ -5839,6 +5839,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
|
||||
{
|
||||
struct kvm_host_map map;
|
||||
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
|
||||
u64 page_line_mask;
|
||||
gpa_t gpa;
|
||||
char *kaddr;
|
||||
bool exchanged;
|
||||
@@ -5853,7 +5854,16 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
|
||||
(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
|
||||
goto emul_write;
|
||||
|
||||
if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK))
|
||||
/*
|
||||
* Emulate the atomic as a straight write to avoid #AC if SLD is
|
||||
* enabled in the host and the access splits a cache line.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
|
||||
page_line_mask = ~(cache_line_size() - 1);
|
||||
else
|
||||
page_line_mask = PAGE_MASK;
|
||||
|
||||
if (((gpa + bytes - 1) & page_line_mask) != (gpa & page_line_mask))
|
||||
goto emul_write;
|
||||
|
||||
if (kvm_vcpu_map(vcpu, gpa_to_gfn(gpa), &map))
|
||||
|
||||
@@ -202,7 +202,7 @@ config CIFS_SMB_DIRECT
|
||||
help
|
||||
Enables SMB Direct support for SMB 3.0, 3.02 and 3.1.1.
|
||||
SMB Direct allows transferring SMB packets over RDMA. If unsure,
|
||||
say N.
|
||||
say Y.
|
||||
|
||||
config CIFS_FSCACHE
|
||||
bool "Provide CIFS client caching support"
|
||||
|
||||
@@ -323,10 +323,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
|
||||
atomic_read(&server->smbd_conn->send_credits),
|
||||
atomic_read(&server->smbd_conn->receive_credits),
|
||||
server->smbd_conn->receive_credit_target);
|
||||
seq_printf(m, "\nPending send_pending: %x "
|
||||
"send_payload_pending: %x",
|
||||
atomic_read(&server->smbd_conn->send_pending),
|
||||
atomic_read(&server->smbd_conn->send_payload_pending));
|
||||
seq_printf(m, "\nPending send_pending: %x ",
|
||||
atomic_read(&server->smbd_conn->send_pending));
|
||||
seq_printf(m, "\nReceive buffers count_receive_queue: %x "
|
||||
"count_empty_packet_queue: %x",
|
||||
server->smbd_conn->count_receive_queue,
|
||||
|
||||
@@ -1208,6 +1208,10 @@ static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off,
|
||||
{
|
||||
unsigned int xid = get_xid();
|
||||
ssize_t rc;
|
||||
struct cifsFileInfo *cfile = dst_file->private_data;
|
||||
|
||||
if (cfile->swapfile)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
rc = cifs_file_copychunk_range(xid, src_file, off, dst_file, destoff,
|
||||
len, flags);
|
||||
|
||||
@@ -426,7 +426,8 @@ struct smb_version_operations {
|
||||
/* generate new lease key */
|
||||
void (*new_lease_key)(struct cifs_fid *);
|
||||
int (*generate_signingkey)(struct cifs_ses *);
|
||||
int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *);
|
||||
int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *,
|
||||
bool allocate_crypto);
|
||||
int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon,
|
||||
struct cifsFileInfo *src_file);
|
||||
int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon,
|
||||
@@ -1312,6 +1313,7 @@ struct cifsFileInfo {
|
||||
struct tcon_link *tlink;
|
||||
unsigned int f_flags;
|
||||
bool invalidHandle:1; /* file closed via session abend */
|
||||
bool swapfile:1;
|
||||
bool oplock_break_cancelled:1;
|
||||
unsigned int oplock_epoch; /* epoch from the lease break */
|
||||
__u32 oplock_level; /* oplock/lease level from the lease break */
|
||||
|
||||
@@ -4808,6 +4808,60 @@ cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int cifs_swap_activate(struct swap_info_struct *sis,
|
||||
struct file *swap_file, sector_t *span)
|
||||
{
|
||||
struct cifsFileInfo *cfile = swap_file->private_data;
|
||||
struct inode *inode = swap_file->f_mapping->host;
|
||||
unsigned long blocks;
|
||||
long long isize;
|
||||
|
||||
cifs_dbg(FYI, "swap activate\n");
|
||||
|
||||
spin_lock(&inode->i_lock);
|
||||
blocks = inode->i_blocks;
|
||||
isize = inode->i_size;
|
||||
spin_unlock(&inode->i_lock);
|
||||
if (blocks*512 < isize) {
|
||||
pr_warn("swap activate: swapfile has holes\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
*span = sis->pages;
|
||||
|
||||
printk_once(KERN_WARNING "Swap support over SMB3 is experimental\n");
|
||||
|
||||
/*
|
||||
* TODO: consider adding ACL (or documenting how) to prevent other
|
||||
* users (on this or other systems) from reading it
|
||||
*/
|
||||
|
||||
|
||||
/* TODO: add sk_set_memalloc(inet) or similar */
|
||||
|
||||
if (cfile)
|
||||
cfile->swapfile = true;
|
||||
/*
|
||||
* TODO: Since file already open, we can't open with DENY_ALL here
|
||||
* but we could add call to grab a byte range lock to prevent others
|
||||
* from reading or writing the file
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cifs_swap_deactivate(struct file *file)
|
||||
{
|
||||
struct cifsFileInfo *cfile = file->private_data;
|
||||
|
||||
cifs_dbg(FYI, "swap deactivate\n");
|
||||
|
||||
/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
|
||||
|
||||
if (cfile)
|
||||
cfile->swapfile = false;
|
||||
|
||||
/* do we need to unpin (or unlock) the file */
|
||||
}
|
||||
|
||||
const struct address_space_operations cifs_addr_ops = {
|
||||
.readpage = cifs_readpage,
|
||||
@@ -4821,6 +4875,13 @@ const struct address_space_operations cifs_addr_ops = {
|
||||
.direct_IO = cifs_direct_io,
|
||||
.invalidatepage = cifs_invalidate_page,
|
||||
.launder_page = cifs_launder_page,
|
||||
/*
|
||||
* TODO: investigate and if useful we could add an cifs_migratePage
|
||||
* helper (under an CONFIG_MIGRATION) in the future, and also
|
||||
* investigate and add an is_dirty_writeback helper if needed
|
||||
*/
|
||||
.swap_activate = cifs_swap_activate,
|
||||
.swap_deactivate = cifs_swap_deactivate,
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@@ -2026,6 +2026,10 @@ cifs_revalidate_mapping(struct inode *inode)
|
||||
int rc;
|
||||
unsigned long *flags = &CIFS_I(inode)->flags;
|
||||
|
||||
/* swapfiles are not supposed to be shared */
|
||||
if (IS_SWAPFILE(inode))
|
||||
return 0;
|
||||
|
||||
rc = wait_on_bit_lock_action(flags, CIFS_INO_LOCK, cifs_wait_bit_killable,
|
||||
TASK_KILLABLE);
|
||||
if (rc)
|
||||
|
||||
@@ -246,7 +246,7 @@ cifs_posix_to_fattr(struct cifs_fattr *fattr, struct smb2_posix_info *info,
|
||||
*/
|
||||
fattr->cf_mode = le32_to_cpu(info->Mode) & ~S_IFMT;
|
||||
|
||||
cifs_dbg(VFS, "XXX dev %d, reparse %d, mode %o",
|
||||
cifs_dbg(FYI, "posix fattr: dev %d, reparse %d, mode %o",
|
||||
le32_to_cpu(info->DeviceId),
|
||||
le32_to_cpu(info->ReparseTag),
|
||||
le32_to_cpu(info->Mode));
|
||||
|
||||
@@ -766,6 +766,20 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
|
||||
|
||||
cifs_dbg(FYI, "%s: tc_count=%d\n", __func__, tcon->tc_count);
|
||||
spin_lock(&cifs_tcp_ses_lock);
|
||||
if (tcon->tc_count <= 0) {
|
||||
struct TCP_Server_Info *server = NULL;
|
||||
|
||||
WARN_ONCE(tcon->tc_count < 0, "tcon refcount is negative");
|
||||
spin_unlock(&cifs_tcp_ses_lock);
|
||||
|
||||
if (tcon->ses)
|
||||
server = tcon->ses->server;
|
||||
|
||||
cifs_server_dbg(FYI, "tid=%u: tcon is closing, skipping async close retry of fid %llu %llu\n",
|
||||
tcon->tid, persistent_fid, volatile_fid);
|
||||
|
||||
return 0;
|
||||
}
|
||||
tcon->tc_count++;
|
||||
spin_unlock(&cifs_tcp_ses_lock);
|
||||
|
||||
|
||||
@@ -55,9 +55,11 @@ extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
|
||||
extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server,
|
||||
__u64 ses_id, __u32 tid);
|
||||
extern int smb2_calc_signature(struct smb_rqst *rqst,
|
||||
struct TCP_Server_Info *server);
|
||||
struct TCP_Server_Info *server,
|
||||
bool allocate_crypto);
|
||||
extern int smb3_calc_signature(struct smb_rqst *rqst,
|
||||
struct TCP_Server_Info *server);
|
||||
struct TCP_Server_Info *server,
|
||||
bool allocate_crypto);
|
||||
extern void smb2_echo_request(struct work_struct *work);
|
||||
extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode);
|
||||
extern bool smb2_is_valid_oplock_break(char *buffer,
|
||||
|
||||
@@ -40,14 +40,6 @@
|
||||
#include "smb2status.h"
|
||||
#include "smb2glob.h"
|
||||
|
||||
static int
|
||||
smb2_crypto_shash_allocate(struct TCP_Server_Info *server)
|
||||
{
|
||||
return cifs_alloc_hash("hmac(sha256)",
|
||||
&server->secmech.hmacsha256,
|
||||
&server->secmech.sdeschmacsha256);
|
||||
}
|
||||
|
||||
static int
|
||||
smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
|
||||
{
|
||||
@@ -219,7 +211,8 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid)
|
||||
}
|
||||
|
||||
int
|
||||
smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
|
||||
bool allocate_crypto)
|
||||
{
|
||||
int rc;
|
||||
unsigned char smb2_signature[SMB2_HMACSHA256_SIZE];
|
||||
@@ -228,6 +221,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base;
|
||||
struct cifs_ses *ses;
|
||||
struct shash_desc *shash;
|
||||
struct crypto_shash *hash;
|
||||
struct sdesc *sdesc = NULL;
|
||||
struct smb_rqst drqst;
|
||||
|
||||
ses = smb2_find_smb_ses(server, shdr->SessionId);
|
||||
@@ -239,24 +234,32 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
|
||||
memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE);
|
||||
|
||||
rc = smb2_crypto_shash_allocate(server);
|
||||
if (rc) {
|
||||
cifs_server_dbg(VFS, "%s: sha256 alloc failed\n", __func__);
|
||||
return rc;
|
||||
if (allocate_crypto) {
|
||||
rc = cifs_alloc_hash("hmac(sha256)", &hash, &sdesc);
|
||||
if (rc) {
|
||||
cifs_server_dbg(VFS,
|
||||
"%s: sha256 alloc failed\n", __func__);
|
||||
return rc;
|
||||
}
|
||||
shash = &sdesc->shash;
|
||||
} else {
|
||||
hash = server->secmech.hmacsha256;
|
||||
shash = &server->secmech.sdeschmacsha256->shash;
|
||||
}
|
||||
|
||||
rc = crypto_shash_setkey(server->secmech.hmacsha256,
|
||||
ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
|
||||
rc = crypto_shash_setkey(hash, ses->auth_key.response,
|
||||
SMB2_NTLMV2_SESSKEY_SIZE);
|
||||
if (rc) {
|
||||
cifs_server_dbg(VFS, "%s: Could not update with response\n", __func__);
|
||||
return rc;
|
||||
cifs_server_dbg(VFS,
|
||||
"%s: Could not update with response\n",
|
||||
__func__);
|
||||
goto out;
|
||||
}
|
||||
|
||||
shash = &server->secmech.sdeschmacsha256->shash;
|
||||
rc = crypto_shash_init(shash);
|
||||
if (rc) {
|
||||
cifs_server_dbg(VFS, "%s: Could not init sha256", __func__);
|
||||
return rc;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -271,9 +274,10 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
rc = crypto_shash_update(shash, iov[0].iov_base,
|
||||
iov[0].iov_len);
|
||||
if (rc) {
|
||||
cifs_server_dbg(VFS, "%s: Could not update with payload\n",
|
||||
__func__);
|
||||
return rc;
|
||||
cifs_server_dbg(VFS,
|
||||
"%s: Could not update with payload\n",
|
||||
__func__);
|
||||
goto out;
|
||||
}
|
||||
drqst.rq_iov++;
|
||||
drqst.rq_nvec--;
|
||||
@@ -283,6 +287,9 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
if (!rc)
|
||||
memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
|
||||
|
||||
out:
|
||||
if (allocate_crypto)
|
||||
cifs_free_hash(&hash, &sdesc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -504,14 +511,17 @@ generate_smb311signingkey(struct cifs_ses *ses)
|
||||
}
|
||||
|
||||
int
|
||||
smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
|
||||
bool allocate_crypto)
|
||||
{
|
||||
int rc;
|
||||
unsigned char smb3_signature[SMB2_CMACAES_SIZE];
|
||||
unsigned char *sigptr = smb3_signature;
|
||||
struct kvec *iov = rqst->rq_iov;
|
||||
struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)iov[0].iov_base;
|
||||
struct shash_desc *shash = &server->secmech.sdesccmacaes->shash;
|
||||
struct shash_desc *shash;
|
||||
struct crypto_shash *hash;
|
||||
struct sdesc *sdesc = NULL;
|
||||
struct smb_rqst drqst;
|
||||
u8 key[SMB3_SIGN_KEY_SIZE];
|
||||
|
||||
@@ -519,14 +529,24 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
if (rc)
|
||||
return 0;
|
||||
|
||||
if (allocate_crypto) {
|
||||
rc = cifs_alloc_hash("cmac(aes)", &hash, &sdesc);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
shash = &sdesc->shash;
|
||||
} else {
|
||||
hash = server->secmech.cmacaes;
|
||||
shash = &server->secmech.sdesccmacaes->shash;
|
||||
}
|
||||
|
||||
memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE);
|
||||
memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE);
|
||||
|
||||
rc = crypto_shash_setkey(server->secmech.cmacaes,
|
||||
key, SMB2_CMACAES_SIZE);
|
||||
rc = crypto_shash_setkey(hash, key, SMB2_CMACAES_SIZE);
|
||||
if (rc) {
|
||||
cifs_server_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__);
|
||||
return rc;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -537,7 +557,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
rc = crypto_shash_init(shash);
|
||||
if (rc) {
|
||||
cifs_server_dbg(VFS, "%s: Could not init cmac aes\n", __func__);
|
||||
return rc;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -554,7 +574,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
if (rc) {
|
||||
cifs_server_dbg(VFS, "%s: Could not update with payload\n",
|
||||
__func__);
|
||||
return rc;
|
||||
goto out;
|
||||
}
|
||||
drqst.rq_iov++;
|
||||
drqst.rq_nvec--;
|
||||
@@ -564,6 +584,9 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
if (!rc)
|
||||
memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
|
||||
|
||||
out:
|
||||
if (allocate_crypto)
|
||||
cifs_free_hash(&hash, &sdesc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -593,7 +616,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
return 0;
|
||||
}
|
||||
|
||||
rc = server->ops->calc_signature(rqst, server);
|
||||
rc = server->ops->calc_signature(rqst, server, false);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@@ -631,9 +654,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
|
||||
|
||||
memset(shdr->Signature, 0, SMB2_SIGNATURE_SIZE);
|
||||
|
||||
mutex_lock(&server->srv_mutex);
|
||||
rc = server->ops->calc_signature(rqst, server);
|
||||
mutex_unlock(&server->srv_mutex);
|
||||
rc = server->ops->calc_signature(rqst, server, true);
|
||||
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
@@ -284,13 +284,10 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
|
||||
request->sge[i].length,
|
||||
DMA_TO_DEVICE);
|
||||
|
||||
if (request->has_payload) {
|
||||
if (atomic_dec_and_test(&request->info->send_payload_pending))
|
||||
wake_up(&request->info->wait_send_payload_pending);
|
||||
} else {
|
||||
if (atomic_dec_and_test(&request->info->send_pending))
|
||||
wake_up(&request->info->wait_send_pending);
|
||||
}
|
||||
if (atomic_dec_and_test(&request->info->send_pending))
|
||||
wake_up(&request->info->wait_send_pending);
|
||||
|
||||
wake_up(&request->info->wait_post_send);
|
||||
|
||||
mempool_free(request, request->info->request_mempool);
|
||||
}
|
||||
@@ -383,27 +380,6 @@ static bool process_negotiation_response(
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check and schedule to send an immediate packet
|
||||
* This is used to extend credtis to remote peer to keep the transport busy
|
||||
*/
|
||||
static void check_and_send_immediate(struct smbd_connection *info)
|
||||
{
|
||||
if (info->transport_status != SMBD_CONNECTED)
|
||||
return;
|
||||
|
||||
info->send_immediate = true;
|
||||
|
||||
/*
|
||||
* Promptly send a packet if our peer is running low on receive
|
||||
* credits
|
||||
*/
|
||||
if (atomic_read(&info->receive_credits) <
|
||||
info->receive_credit_target - 1)
|
||||
queue_delayed_work(
|
||||
info->workqueue, &info->send_immediate_work, 0);
|
||||
}
|
||||
|
||||
static void smbd_post_send_credits(struct work_struct *work)
|
||||
{
|
||||
int ret = 0;
|
||||
@@ -453,10 +429,16 @@ static void smbd_post_send_credits(struct work_struct *work)
|
||||
info->new_credits_offered += ret;
|
||||
spin_unlock(&info->lock_new_credits_offered);
|
||||
|
||||
atomic_add(ret, &info->receive_credits);
|
||||
|
||||
/* Check if we can post new receive and grant credits to peer */
|
||||
check_and_send_immediate(info);
|
||||
/* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */
|
||||
info->send_immediate = true;
|
||||
if (atomic_read(&info->receive_credits) <
|
||||
info->receive_credit_target - 1) {
|
||||
if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
|
||||
info->send_immediate) {
|
||||
log_keep_alive(INFO, "send an empty message\n");
|
||||
smbd_post_send_empty(info);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Called from softirq, when recv is done */
|
||||
@@ -551,12 +533,6 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
|
||||
info->keep_alive_requested = KEEP_ALIVE_PENDING;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if we need to send something to remote peer to
|
||||
* grant more credits or respond to KEEP_ALIVE packet
|
||||
*/
|
||||
check_and_send_immediate(info);
|
||||
|
||||
return;
|
||||
|
||||
default:
|
||||
@@ -749,7 +725,6 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info)
|
||||
request->sge[0].addr,
|
||||
request->sge[0].length, request->sge[0].lkey);
|
||||
|
||||
request->has_payload = false;
|
||||
atomic_inc(&info->send_pending);
|
||||
rc = ib_post_send(info->id->qp, &send_wr, NULL);
|
||||
if (!rc)
|
||||
@@ -806,120 +781,9 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Build and prepare the SMBD packet header
|
||||
* This function waits for avaialbe send credits and build a SMBD packet
|
||||
* header. The caller then optional append payload to the packet after
|
||||
* the header
|
||||
* intput values
|
||||
* size: the size of the payload
|
||||
* remaining_data_length: remaining data to send if this is part of a
|
||||
* fragmented packet
|
||||
* output values
|
||||
* request_out: the request allocated from this function
|
||||
* return values: 0 on success, otherwise actual error code returned
|
||||
*/
|
||||
static int smbd_create_header(struct smbd_connection *info,
|
||||
int size, int remaining_data_length,
|
||||
struct smbd_request **request_out)
|
||||
{
|
||||
struct smbd_request *request;
|
||||
struct smbd_data_transfer *packet;
|
||||
int header_length;
|
||||
int rc;
|
||||
|
||||
/* Wait for send credits. A SMBD packet needs one credit */
|
||||
rc = wait_event_interruptible(info->wait_send_queue,
|
||||
atomic_read(&info->send_credits) > 0 ||
|
||||
info->transport_status != SMBD_CONNECTED);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (info->transport_status != SMBD_CONNECTED) {
|
||||
log_outgoing(ERR, "disconnected not sending\n");
|
||||
return -EAGAIN;
|
||||
}
|
||||
atomic_dec(&info->send_credits);
|
||||
|
||||
request = mempool_alloc(info->request_mempool, GFP_KERNEL);
|
||||
if (!request) {
|
||||
rc = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
||||
request->info = info;
|
||||
|
||||
/* Fill in the packet header */
|
||||
packet = smbd_request_payload(request);
|
||||
packet->credits_requested = cpu_to_le16(info->send_credit_target);
|
||||
packet->credits_granted =
|
||||
cpu_to_le16(manage_credits_prior_sending(info));
|
||||
info->send_immediate = false;
|
||||
|
||||
packet->flags = 0;
|
||||
if (manage_keep_alive_before_sending(info))
|
||||
packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);
|
||||
|
||||
packet->reserved = 0;
|
||||
if (!size)
|
||||
packet->data_offset = 0;
|
||||
else
|
||||
packet->data_offset = cpu_to_le32(24);
|
||||
packet->data_length = cpu_to_le32(size);
|
||||
packet->remaining_data_length = cpu_to_le32(remaining_data_length);
|
||||
packet->padding = 0;
|
||||
|
||||
log_outgoing(INFO, "credits_requested=%d credits_granted=%d "
|
||||
"data_offset=%d data_length=%d remaining_data_length=%d\n",
|
||||
le16_to_cpu(packet->credits_requested),
|
||||
le16_to_cpu(packet->credits_granted),
|
||||
le32_to_cpu(packet->data_offset),
|
||||
le32_to_cpu(packet->data_length),
|
||||
le32_to_cpu(packet->remaining_data_length));
|
||||
|
||||
/* Map the packet to DMA */
|
||||
header_length = sizeof(struct smbd_data_transfer);
|
||||
/* If this is a packet without payload, don't send padding */
|
||||
if (!size)
|
||||
header_length = offsetof(struct smbd_data_transfer, padding);
|
||||
|
||||
request->num_sge = 1;
|
||||
request->sge[0].addr = ib_dma_map_single(info->id->device,
|
||||
(void *)packet,
|
||||
header_length,
|
||||
DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
|
||||
mempool_free(request, info->request_mempool);
|
||||
rc = -EIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
request->sge[0].length = header_length;
|
||||
request->sge[0].lkey = info->pd->local_dma_lkey;
|
||||
|
||||
*request_out = request;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
atomic_inc(&info->send_credits);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void smbd_destroy_header(struct smbd_connection *info,
|
||||
struct smbd_request *request)
|
||||
{
|
||||
|
||||
ib_dma_unmap_single(info->id->device,
|
||||
request->sge[0].addr,
|
||||
request->sge[0].length,
|
||||
DMA_TO_DEVICE);
|
||||
mempool_free(request, info->request_mempool);
|
||||
atomic_inc(&info->send_credits);
|
||||
}
|
||||
|
||||
/* Post the send request */
|
||||
static int smbd_post_send(struct smbd_connection *info,
|
||||
struct smbd_request *request, bool has_payload)
|
||||
struct smbd_request *request)
|
||||
{
|
||||
struct ib_send_wr send_wr;
|
||||
int rc, i;
|
||||
@@ -944,24 +808,9 @@ static int smbd_post_send(struct smbd_connection *info,
|
||||
send_wr.opcode = IB_WR_SEND;
|
||||
send_wr.send_flags = IB_SEND_SIGNALED;
|
||||
|
||||
if (has_payload) {
|
||||
request->has_payload = true;
|
||||
atomic_inc(&info->send_payload_pending);
|
||||
} else {
|
||||
request->has_payload = false;
|
||||
atomic_inc(&info->send_pending);
|
||||
}
|
||||
|
||||
rc = ib_post_send(info->id->qp, &send_wr, NULL);
|
||||
if (rc) {
|
||||
log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
|
||||
if (has_payload) {
|
||||
if (atomic_dec_and_test(&info->send_payload_pending))
|
||||
wake_up(&info->wait_send_payload_pending);
|
||||
} else {
|
||||
if (atomic_dec_and_test(&info->send_pending))
|
||||
wake_up(&info->wait_send_pending);
|
||||
}
|
||||
smbd_disconnect_rdma_connection(info);
|
||||
rc = -EAGAIN;
|
||||
} else
|
||||
@@ -977,14 +826,107 @@ static int smbd_post_send_sgl(struct smbd_connection *info,
|
||||
{
|
||||
int num_sgs;
|
||||
int i, rc;
|
||||
int header_length;
|
||||
struct smbd_request *request;
|
||||
struct smbd_data_transfer *packet;
|
||||
int new_credits;
|
||||
struct scatterlist *sg;
|
||||
|
||||
rc = smbd_create_header(
|
||||
info, data_length, remaining_data_length, &request);
|
||||
wait_credit:
|
||||
/* Wait for send credits. A SMBD packet needs one credit */
|
||||
rc = wait_event_interruptible(info->wait_send_queue,
|
||||
atomic_read(&info->send_credits) > 0 ||
|
||||
info->transport_status != SMBD_CONNECTED);
|
||||
if (rc)
|
||||
return rc;
|
||||
goto err_wait_credit;
|
||||
|
||||
if (info->transport_status != SMBD_CONNECTED) {
|
||||
log_outgoing(ERR, "disconnected not sending on wait_credit\n");
|
||||
rc = -EAGAIN;
|
||||
goto err_wait_credit;
|
||||
}
|
||||
if (unlikely(atomic_dec_return(&info->send_credits) < 0)) {
|
||||
atomic_inc(&info->send_credits);
|
||||
goto wait_credit;
|
||||
}
|
||||
|
||||
wait_send_queue:
|
||||
wait_event(info->wait_post_send,
|
||||
atomic_read(&info->send_pending) < info->send_credit_target ||
|
||||
info->transport_status != SMBD_CONNECTED);
|
||||
|
||||
if (info->transport_status != SMBD_CONNECTED) {
|
||||
log_outgoing(ERR, "disconnected not sending on wait_send_queue\n");
|
||||
rc = -EAGAIN;
|
||||
goto err_wait_send_queue;
|
||||
}
|
||||
|
||||
if (unlikely(atomic_inc_return(&info->send_pending) >
|
||||
info->send_credit_target)) {
|
||||
atomic_dec(&info->send_pending);
|
||||
goto wait_send_queue;
|
||||
}
|
||||
|
||||
request = mempool_alloc(info->request_mempool, GFP_KERNEL);
|
||||
if (!request) {
|
||||
rc = -ENOMEM;
|
||||
goto err_alloc;
|
||||
}
|
||||
|
||||
request->info = info;
|
||||
|
||||
/* Fill in the packet header */
|
||||
packet = smbd_request_payload(request);
|
||||
packet->credits_requested = cpu_to_le16(info->send_credit_target);
|
||||
|
||||
new_credits = manage_credits_prior_sending(info);
|
||||
atomic_add(new_credits, &info->receive_credits);
|
||||
packet->credits_granted = cpu_to_le16(new_credits);
|
||||
|
||||
info->send_immediate = false;
|
||||
|
||||
packet->flags = 0;
|
||||
if (manage_keep_alive_before_sending(info))
|
||||
packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);
|
||||
|
||||
packet->reserved = 0;
|
||||
if (!data_length)
|
||||
packet->data_offset = 0;
|
||||
else
|
||||
packet->data_offset = cpu_to_le32(24);
|
||||
packet->data_length = cpu_to_le32(data_length);
|
||||
packet->remaining_data_length = cpu_to_le32(remaining_data_length);
|
||||
packet->padding = 0;
|
||||
|
||||
log_outgoing(INFO, "credits_requested=%d credits_granted=%d "
|
||||
"data_offset=%d data_length=%d remaining_data_length=%d\n",
|
||||
le16_to_cpu(packet->credits_requested),
|
||||
le16_to_cpu(packet->credits_granted),
|
||||
le32_to_cpu(packet->data_offset),
|
||||
le32_to_cpu(packet->data_length),
|
||||
le32_to_cpu(packet->remaining_data_length));
|
||||
|
||||
/* Map the packet to DMA */
|
||||
header_length = sizeof(struct smbd_data_transfer);
|
||||
/* If this is a packet without payload, don't send padding */
|
||||
if (!data_length)
|
||||
header_length = offsetof(struct smbd_data_transfer, padding);
|
||||
|
||||
request->num_sge = 1;
|
||||
request->sge[0].addr = ib_dma_map_single(info->id->device,
|
||||
(void *)packet,
|
||||
header_length,
|
||||
DMA_TO_DEVICE);
|
||||
if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
|
||||
rc = -EIO;
|
||||
request->sge[0].addr = 0;
|
||||
goto err_dma;
|
||||
}
|
||||
|
||||
request->sge[0].length = header_length;
|
||||
request->sge[0].lkey = info->pd->local_dma_lkey;
|
||||
|
||||
/* Fill in the packet data payload */
|
||||
num_sgs = sgl ? sg_nents(sgl) : 0;
|
||||
for_each_sg(sgl, sg, num_sgs, i) {
|
||||
request->sge[i+1].addr =
|
||||
@@ -994,25 +936,41 @@ static int smbd_post_send_sgl(struct smbd_connection *info,
|
||||
info->id->device, request->sge[i+1].addr)) {
|
||||
rc = -EIO;
|
||||
request->sge[i+1].addr = 0;
|
||||
goto dma_mapping_failure;
|
||||
goto err_dma;
|
||||
}
|
||||
request->sge[i+1].length = sg->length;
|
||||
request->sge[i+1].lkey = info->pd->local_dma_lkey;
|
||||
request->num_sge++;
|
||||
}
|
||||
|
||||
rc = smbd_post_send(info, request, data_length);
|
||||
rc = smbd_post_send(info, request);
|
||||
if (!rc)
|
||||
return 0;
|
||||
|
||||
dma_mapping_failure:
|
||||
for (i = 1; i < request->num_sge; i++)
|
||||
err_dma:
|
||||
for (i = 0; i < request->num_sge; i++)
|
||||
if (request->sge[i].addr)
|
||||
ib_dma_unmap_single(info->id->device,
|
||||
request->sge[i].addr,
|
||||
request->sge[i].length,
|
||||
DMA_TO_DEVICE);
|
||||
smbd_destroy_header(info, request);
|
||||
mempool_free(request, info->request_mempool);
|
||||
|
||||
/* roll back receive credits and credits to be offered */
|
||||
spin_lock(&info->lock_new_credits_offered);
|
||||
info->new_credits_offered += new_credits;
|
||||
spin_unlock(&info->lock_new_credits_offered);
|
||||
atomic_sub(new_credits, &info->receive_credits);
|
||||
|
||||
err_alloc:
|
||||
if (atomic_dec_and_test(&info->send_pending))
|
||||
wake_up(&info->wait_send_pending);
|
||||
|
||||
err_wait_send_queue:
|
||||
/* roll back send credits and pending */
|
||||
atomic_inc(&info->send_credits);
|
||||
|
||||
err_wait_credit:
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -1334,25 +1292,6 @@ static void destroy_receive_buffers(struct smbd_connection *info)
|
||||
mempool_free(response, info->response_mempool);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check and send an immediate or keep alive packet
|
||||
* The condition to send those packets are defined in [MS-SMBD] 3.1.1.1
|
||||
* Connection.KeepaliveRequested and Connection.SendImmediate
|
||||
* The idea is to extend credits to server as soon as it becomes available
|
||||
*/
|
||||
static void send_immediate_work(struct work_struct *work)
|
||||
{
|
||||
struct smbd_connection *info = container_of(
|
||||
work, struct smbd_connection,
|
||||
send_immediate_work.work);
|
||||
|
||||
if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
|
||||
info->send_immediate) {
|
||||
log_keep_alive(INFO, "send an empty message\n");
|
||||
smbd_post_send_empty(info);
|
||||
}
|
||||
}
|
||||
|
||||
/* Implement idle connection timer [MS-SMBD] 3.1.6.2 */
|
||||
static void idle_connection_timer(struct work_struct *work)
|
||||
{
|
||||
@@ -1407,14 +1346,10 @@ void smbd_destroy(struct TCP_Server_Info *server)
|
||||
|
||||
log_rdma_event(INFO, "cancelling idle timer\n");
|
||||
cancel_delayed_work_sync(&info->idle_timer_work);
|
||||
log_rdma_event(INFO, "cancelling send immediate work\n");
|
||||
cancel_delayed_work_sync(&info->send_immediate_work);
|
||||
|
||||
log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
|
||||
wait_event(info->wait_send_pending,
|
||||
atomic_read(&info->send_pending) == 0);
|
||||
wait_event(info->wait_send_payload_pending,
|
||||
atomic_read(&info->send_payload_pending) == 0);
|
||||
|
||||
/* It's not posssible for upper layer to get to reassembly */
|
||||
log_rdma_event(INFO, "drain the reassembly queue\n");
|
||||
@@ -1744,15 +1679,13 @@ static struct smbd_connection *_smbd_get_connection(
|
||||
|
||||
init_waitqueue_head(&info->wait_send_queue);
|
||||
INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer);
|
||||
INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work);
|
||||
queue_delayed_work(info->workqueue, &info->idle_timer_work,
|
||||
info->keep_alive_interval*HZ);
|
||||
|
||||
init_waitqueue_head(&info->wait_send_pending);
|
||||
atomic_set(&info->send_pending, 0);
|
||||
|
||||
init_waitqueue_head(&info->wait_send_payload_pending);
|
||||
atomic_set(&info->send_payload_pending, 0);
|
||||
init_waitqueue_head(&info->wait_post_send);
|
||||
|
||||
INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work);
|
||||
INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits);
|
||||
@@ -2226,8 +2159,8 @@ done:
|
||||
* that means all the I/Os have been out and we are good to return
|
||||
*/
|
||||
|
||||
wait_event(info->wait_send_payload_pending,
|
||||
atomic_read(&info->send_payload_pending) == 0);
|
||||
wait_event(info->wait_send_pending,
|
||||
atomic_read(&info->send_pending) == 0);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@@ -114,8 +114,7 @@ struct smbd_connection {
|
||||
/* Activity accoutning */
|
||||
atomic_t send_pending;
|
||||
wait_queue_head_t wait_send_pending;
|
||||
atomic_t send_payload_pending;
|
||||
wait_queue_head_t wait_send_payload_pending;
|
||||
wait_queue_head_t wait_post_send;
|
||||
|
||||
/* Receive queue */
|
||||
struct list_head receive_queue;
|
||||
@@ -154,7 +153,6 @@ struct smbd_connection {
|
||||
|
||||
struct workqueue_struct *workqueue;
|
||||
struct delayed_work idle_timer_work;
|
||||
struct delayed_work send_immediate_work;
|
||||
|
||||
/* Memory pool for preallocating buffers */
|
||||
/* request pool for RDMA send */
|
||||
@@ -234,9 +232,6 @@ struct smbd_request {
|
||||
struct smbd_connection *info;
|
||||
struct ib_cqe cqe;
|
||||
|
||||
/* true if this request carries upper layer payload */
|
||||
bool has_payload;
|
||||
|
||||
/* the SGE entries for this packet */
|
||||
struct ib_sge sge[SMBDIRECT_MAX_SGE];
|
||||
int num_sge;
|
||||
|
||||
@@ -501,6 +501,7 @@ pnfs_alloc_ds_commits_list(struct list_head *list,
|
||||
rcu_read_lock();
|
||||
pnfs_put_commit_array(array, cinfo->inode);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -38,11 +38,24 @@
|
||||
* atomic operations, then the count will continue to edge closer to 0. If it
|
||||
* reaches a value of 1 before /any/ of the threads reset it to the saturated
|
||||
* value, then a concurrent refcount_dec_and_test() may erroneously free the
|
||||
* underlying object. Given the precise timing details involved with the
|
||||
* round-robin scheduling of each thread manipulating the refcount and the need
|
||||
* to hit the race multiple times in succession, there doesn't appear to be a
|
||||
* practical avenue of attack even if using refcount_add() operations with
|
||||
* larger increments.
|
||||
* underlying object.
|
||||
* Linux limits the maximum number of tasks to PID_MAX_LIMIT, which is currently
|
||||
* 0x400000 (and can't easily be raised in the future beyond FUTEX_TID_MASK).
|
||||
* With the current PID limit, if no batched refcounting operations are used and
|
||||
* the attacker can't repeatedly trigger kernel oopses in the middle of refcount
|
||||
* operations, this makes it impossible for a saturated refcount to leave the
|
||||
* saturation range, even if it is possible for multiple uses of the same
|
||||
* refcount to nest in the context of a single task:
|
||||
*
|
||||
* (UINT_MAX+1-REFCOUNT_SATURATED) / PID_MAX_LIMIT =
|
||||
* 0x40000000 / 0x400000 = 0x100 = 256
|
||||
*
|
||||
* If hundreds of references are added/removed with a single refcounting
|
||||
* operation, it may potentially be possible to leave the saturation range; but
|
||||
* given the precise timing details involved with the round-robin scheduling of
|
||||
* each thread manipulating the refcount and the need to hit the race multiple
|
||||
* times in succession, there doesn't appear to be a practical avenue of attack
|
||||
* even if using refcount_add() operations with larger increments.
|
||||
*
|
||||
* Memory ordering
|
||||
* ===============
|
||||
|
||||
@@ -983,16 +983,10 @@ perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
|
||||
event->shadow_ctx_time = now - t->timestamp;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update cpuctx->cgrp so that it is set when first cgroup event is added and
|
||||
* cleared when last cgroup event is removed.
|
||||
*/
|
||||
static inline void
|
||||
list_update_cgroup_event(struct perf_event *event,
|
||||
struct perf_event_context *ctx, bool add)
|
||||
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct list_head *cpuctx_entry;
|
||||
|
||||
if (!is_cgroup_event(event))
|
||||
return;
|
||||
@@ -1009,28 +1003,41 @@ list_update_cgroup_event(struct perf_event *event,
|
||||
* because if the first would mismatch, the second would not try again
|
||||
* and we would leave cpuctx->cgrp unset.
|
||||
*/
|
||||
if (add && !cpuctx->cgrp) {
|
||||
if (ctx->is_active && !cpuctx->cgrp) {
|
||||
struct perf_cgroup *cgrp = perf_cgroup_from_task(current, ctx);
|
||||
|
||||
if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
|
||||
cpuctx->cgrp = cgrp;
|
||||
}
|
||||
|
||||
if (add && ctx->nr_cgroups++)
|
||||
return;
|
||||
else if (!add && --ctx->nr_cgroups)
|
||||
if (ctx->nr_cgroups++)
|
||||
return;
|
||||
|
||||
/* no cgroup running */
|
||||
if (!add)
|
||||
list_add(&cpuctx->cgrp_cpuctx_entry,
|
||||
per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
|
||||
}
|
||||
|
||||
static inline void
|
||||
perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx;
|
||||
|
||||
if (!is_cgroup_event(event))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Because cgroup events are always per-cpu events,
|
||||
* @ctx == &cpuctx->ctx.
|
||||
*/
|
||||
cpuctx = container_of(ctx, struct perf_cpu_context, ctx);
|
||||
|
||||
if (--ctx->nr_cgroups)
|
||||
return;
|
||||
|
||||
if (ctx->is_active && cpuctx->cgrp)
|
||||
cpuctx->cgrp = NULL;
|
||||
|
||||
cpuctx_entry = &cpuctx->cgrp_cpuctx_entry;
|
||||
if (add)
|
||||
list_add(cpuctx_entry,
|
||||
per_cpu_ptr(&cgrp_cpuctx_list, event->cpu));
|
||||
else
|
||||
list_del(cpuctx_entry);
|
||||
list_del(&cpuctx->cgrp_cpuctx_entry);
|
||||
}
|
||||
|
||||
#else /* !CONFIG_CGROUP_PERF */
|
||||
@@ -1096,11 +1103,14 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
|
||||
}
|
||||
|
||||
static inline void
|
||||
list_update_cgroup_event(struct perf_event *event,
|
||||
struct perf_event_context *ctx, bool add)
|
||||
perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
perf_cgroup_event_disable(struct perf_event *event, struct perf_event_context *ctx)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -1791,13 +1801,14 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
|
||||
add_event_to_groups(event, ctx);
|
||||
}
|
||||
|
||||
list_update_cgroup_event(event, ctx, true);
|
||||
|
||||
list_add_rcu(&event->event_entry, &ctx->event_list);
|
||||
ctx->nr_events++;
|
||||
if (event->attr.inherit_stat)
|
||||
ctx->nr_stat++;
|
||||
|
||||
if (event->state > PERF_EVENT_STATE_OFF)
|
||||
perf_cgroup_event_enable(event, ctx);
|
||||
|
||||
ctx->generation++;
|
||||
}
|
||||
|
||||
@@ -1976,8 +1987,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
|
||||
|
||||
event->attach_state &= ~PERF_ATTACH_CONTEXT;
|
||||
|
||||
list_update_cgroup_event(event, ctx, false);
|
||||
|
||||
ctx->nr_events--;
|
||||
if (event->attr.inherit_stat)
|
||||
ctx->nr_stat--;
|
||||
@@ -1994,8 +2003,10 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
|
||||
* of error state is by explicit re-enabling
|
||||
* of the event
|
||||
*/
|
||||
if (event->state > PERF_EVENT_STATE_OFF)
|
||||
if (event->state > PERF_EVENT_STATE_OFF) {
|
||||
perf_cgroup_event_disable(event, ctx);
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_OFF);
|
||||
}
|
||||
|
||||
ctx->generation++;
|
||||
}
|
||||
@@ -2226,6 +2237,7 @@ event_sched_out(struct perf_event *event,
|
||||
|
||||
if (READ_ONCE(event->pending_disable) >= 0) {
|
||||
WRITE_ONCE(event->pending_disable, -1);
|
||||
perf_cgroup_event_disable(event, ctx);
|
||||
state = PERF_EVENT_STATE_OFF;
|
||||
}
|
||||
perf_event_set_state(event, state);
|
||||
@@ -2363,6 +2375,7 @@ static void __perf_event_disable(struct perf_event *event,
|
||||
event_sched_out(event, cpuctx, ctx);
|
||||
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_OFF);
|
||||
perf_cgroup_event_disable(event, ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2746,7 +2759,7 @@ static int __perf_install_in_context(void *info)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
if (is_cgroup_event(event)) {
|
||||
if (event->state > PERF_EVENT_STATE_OFF && is_cgroup_event(event)) {
|
||||
/*
|
||||
* If the current cgroup doesn't match the event's
|
||||
* cgroup, we should not try to schedule it.
|
||||
@@ -2906,6 +2919,7 @@ static void __perf_event_enable(struct perf_event *event,
|
||||
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
|
||||
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_INACTIVE);
|
||||
perf_cgroup_event_enable(event, ctx);
|
||||
|
||||
if (!ctx->is_active)
|
||||
return;
|
||||
@@ -3508,7 +3522,8 @@ static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx,
|
||||
|
||||
static bool perf_less_group_idx(const void *l, const void *r)
|
||||
{
|
||||
const struct perf_event *le = l, *re = r;
|
||||
const struct perf_event *le = *(const struct perf_event **)l;
|
||||
const struct perf_event *re = *(const struct perf_event **)r;
|
||||
|
||||
return le->group_index < re->group_index;
|
||||
}
|
||||
@@ -3616,8 +3631,10 @@ static int merge_sched_in(struct perf_event *event, void *data)
|
||||
}
|
||||
|
||||
if (event->state == PERF_EVENT_STATE_INACTIVE) {
|
||||
if (event->attr.pinned)
|
||||
if (event->attr.pinned) {
|
||||
perf_cgroup_event_disable(event, ctx);
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
|
||||
}
|
||||
|
||||
*can_add_hw = 0;
|
||||
ctx->rotate_necessary = 1;
|
||||
@@ -6917,9 +6934,12 @@ static u64 perf_virt_to_phys(u64 virt)
|
||||
* Try IRQ-safe __get_user_pages_fast first.
|
||||
* If failed, leave phys_addr as 0.
|
||||
*/
|
||||
if ((current->mm != NULL) &&
|
||||
(__get_user_pages_fast(virt, 1, 0, &p) == 1))
|
||||
phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
|
||||
if (current->mm != NULL) {
|
||||
pagefault_disable();
|
||||
if (__get_user_pages_fast(virt, 1, 0, &p) == 1)
|
||||
phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
|
||||
pagefault_enable();
|
||||
}
|
||||
|
||||
if (p)
|
||||
put_page(p);
|
||||
|
||||
@@ -3952,10 +3952,36 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline short task_wait_context(struct task_struct *curr)
|
||||
{
|
||||
/*
|
||||
* Set appropriate wait type for the context; for IRQs we have to take
|
||||
* into account force_irqthread as that is implied by PREEMPT_RT.
|
||||
*/
|
||||
if (curr->hardirq_context) {
|
||||
/*
|
||||
* Check if force_irqthreads will run us threaded.
|
||||
*/
|
||||
if (curr->hardirq_threaded || curr->irq_config)
|
||||
return LD_WAIT_CONFIG;
|
||||
|
||||
return LD_WAIT_SPIN;
|
||||
} else if (curr->softirq_context) {
|
||||
/*
|
||||
* Softirqs are always threaded.
|
||||
*/
|
||||
return LD_WAIT_CONFIG;
|
||||
}
|
||||
|
||||
return LD_WAIT_MAX;
|
||||
}
|
||||
|
||||
static int
|
||||
print_lock_invalid_wait_context(struct task_struct *curr,
|
||||
struct held_lock *hlock)
|
||||
{
|
||||
short curr_inner;
|
||||
|
||||
if (!debug_locks_off())
|
||||
return 0;
|
||||
if (debug_locks_silent)
|
||||
@@ -3971,6 +3997,10 @@ print_lock_invalid_wait_context(struct task_struct *curr,
|
||||
print_lock(hlock);
|
||||
|
||||
pr_warn("other info that might help us debug this:\n");
|
||||
|
||||
curr_inner = task_wait_context(curr);
|
||||
pr_warn("context-{%d:%d}\n", curr_inner, curr_inner);
|
||||
|
||||
lockdep_print_held_locks(curr);
|
||||
|
||||
pr_warn("stack backtrace:\n");
|
||||
@@ -4017,26 +4047,7 @@ static int check_wait_context(struct task_struct *curr, struct held_lock *next)
|
||||
}
|
||||
depth++;
|
||||
|
||||
/*
|
||||
* Set appropriate wait type for the context; for IRQs we have to take
|
||||
* into account force_irqthread as that is implied by PREEMPT_RT.
|
||||
*/
|
||||
if (curr->hardirq_context) {
|
||||
/*
|
||||
* Check if force_irqthreads will run us threaded.
|
||||
*/
|
||||
if (curr->hardirq_threaded || curr->irq_config)
|
||||
curr_inner = LD_WAIT_CONFIG;
|
||||
else
|
||||
curr_inner = LD_WAIT_SPIN;
|
||||
} else if (curr->softirq_context) {
|
||||
/*
|
||||
* Softirqs are always threaded.
|
||||
*/
|
||||
curr_inner = LD_WAIT_CONFIG;
|
||||
} else {
|
||||
curr_inner = LD_WAIT_MAX;
|
||||
}
|
||||
curr_inner = task_wait_context(curr);
|
||||
|
||||
for (; depth < curr->lockdep_depth; depth++) {
|
||||
struct held_lock *prev = curr->held_locks + depth;
|
||||
|
||||
@@ -118,14 +118,15 @@ static int percpu_rwsem_wake_function(struct wait_queue_entry *wq_entry,
|
||||
unsigned int mode, int wake_flags,
|
||||
void *key)
|
||||
{
|
||||
struct task_struct *p = get_task_struct(wq_entry->private);
|
||||
bool reader = wq_entry->flags & WQ_FLAG_CUSTOM;
|
||||
struct percpu_rw_semaphore *sem = key;
|
||||
struct task_struct *p;
|
||||
|
||||
/* concurrent against percpu_down_write(), can get stolen */
|
||||
if (!__percpu_rwsem_trylock(sem, reader))
|
||||
return 1;
|
||||
|
||||
p = get_task_struct(wq_entry->private);
|
||||
list_del_init(&wq_entry->entry);
|
||||
smp_store_release(&wq_entry->private, NULL);
|
||||
|
||||
|
||||
@@ -2120,12 +2120,6 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
|
||||
return cpu;
|
||||
}
|
||||
|
||||
static void update_avg(u64 *avg, u64 sample)
|
||||
{
|
||||
s64 diff = sample - *avg;
|
||||
*avg += diff >> 3;
|
||||
}
|
||||
|
||||
void sched_set_stop_task(int cpu, struct task_struct *stop)
|
||||
{
|
||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
|
||||
@@ -4127,7 +4121,8 @@ static inline void sched_submit_work(struct task_struct *tsk)
|
||||
* it wants to wake up a task to maintain concurrency.
|
||||
* As this function is called inside the schedule() context,
|
||||
* we disable preemption to avoid it calling schedule() again
|
||||
* in the possible wakeup of a kworker.
|
||||
* in the possible wakeup of a kworker and because wq_worker_sleeping()
|
||||
* requires it.
|
||||
*/
|
||||
if (tsk->flags & (PF_WQ_WORKER | PF_IO_WORKER)) {
|
||||
preempt_disable();
|
||||
@@ -6687,7 +6682,6 @@ void __init sched_init(void)
|
||||
|
||||
rq_attach_root(rq, &def_root_domain);
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
rq->last_load_update_tick = jiffies;
|
||||
rq->last_blocked_load_update_tick = jiffies;
|
||||
atomic_set(&rq->nohz_flags, 0);
|
||||
#endif
|
||||
|
||||
@@ -816,10 +816,12 @@ static int __init init_sched_debug_procfs(void)
|
||||
|
||||
__initcall(init_sched_debug_procfs);
|
||||
|
||||
#define __P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
|
||||
#define P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
|
||||
#define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
|
||||
#define PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
|
||||
#define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
|
||||
#define __P(F) __PS(#F, F)
|
||||
#define P(F) __PS(#F, p->F)
|
||||
#define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
|
||||
#define __PN(F) __PSN(#F, F)
|
||||
#define PN(F) __PSN(#F, p->F)
|
||||
|
||||
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
@@ -868,18 +870,9 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||
SEQ_printf(m,
|
||||
"---------------------------------------------------------"
|
||||
"----------\n");
|
||||
#define __P(F) \
|
||||
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
|
||||
#define P(F) \
|
||||
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
|
||||
#define P_SCHEDSTAT(F) \
|
||||
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
|
||||
#define __PN(F) \
|
||||
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
|
||||
#define PN(F) \
|
||||
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
|
||||
#define PN_SCHEDSTAT(F) \
|
||||
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))
|
||||
|
||||
#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F))
|
||||
#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
|
||||
|
||||
PN(se.exec_start);
|
||||
PN(se.vruntime);
|
||||
@@ -939,10 +932,8 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||
}
|
||||
|
||||
__P(nr_switches);
|
||||
SEQ_printf(m, "%-45s:%21Ld\n",
|
||||
"nr_voluntary_switches", (long long)p->nvcsw);
|
||||
SEQ_printf(m, "%-45s:%21Ld\n",
|
||||
"nr_involuntary_switches", (long long)p->nivcsw);
|
||||
__PS("nr_voluntary_switches", p->nvcsw);
|
||||
__PS("nr_involuntary_switches", p->nivcsw);
|
||||
|
||||
P(se.load.weight);
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -955,6 +946,12 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||
P(se.avg.last_update_time);
|
||||
P(se.avg.util_est.ewma);
|
||||
P(se.avg.util_est.enqueued);
|
||||
#endif
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
__PS("uclamp.min", p->uclamp[UCLAMP_MIN].value);
|
||||
__PS("uclamp.max", p->uclamp[UCLAMP_MAX].value);
|
||||
__PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN));
|
||||
__PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX));
|
||||
#endif
|
||||
P(policy);
|
||||
P(prio);
|
||||
@@ -963,11 +960,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||
P(dl.deadline);
|
||||
}
|
||||
#undef PN_SCHEDSTAT
|
||||
#undef PN
|
||||
#undef __PN
|
||||
#undef P_SCHEDSTAT
|
||||
#undef P
|
||||
#undef __P
|
||||
|
||||
{
|
||||
unsigned int this_cpu = raw_smp_processor_id();
|
||||
@@ -975,8 +968,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
|
||||
|
||||
t0 = cpu_clock(this_cpu);
|
||||
t1 = cpu_clock(this_cpu);
|
||||
SEQ_printf(m, "%-45s:%21Ld\n",
|
||||
"clock-delta", (long long)(t1-t0));
|
||||
__PS("clock-delta", t1-t0);
|
||||
}
|
||||
|
||||
sched_show_numa(p, m);
|
||||
|
||||
@@ -4836,11 +4836,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
|
||||
resched_curr(rq);
|
||||
}
|
||||
|
||||
static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
|
||||
static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
|
||||
{
|
||||
struct cfs_rq *cfs_rq;
|
||||
u64 runtime;
|
||||
u64 starting_runtime = remaining;
|
||||
u64 runtime, remaining = 1;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
|
||||
@@ -4855,10 +4854,13 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
|
||||
/* By the above check, this should never be true */
|
||||
SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);
|
||||
|
||||
raw_spin_lock(&cfs_b->lock);
|
||||
runtime = -cfs_rq->runtime_remaining + 1;
|
||||
if (runtime > remaining)
|
||||
runtime = remaining;
|
||||
remaining -= runtime;
|
||||
if (runtime > cfs_b->runtime)
|
||||
runtime = cfs_b->runtime;
|
||||
cfs_b->runtime -= runtime;
|
||||
remaining = cfs_b->runtime;
|
||||
raw_spin_unlock(&cfs_b->lock);
|
||||
|
||||
cfs_rq->runtime_remaining += runtime;
|
||||
|
||||
@@ -4873,8 +4875,6 @@ next:
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return starting_runtime - remaining;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4885,7 +4885,6 @@ next:
|
||||
*/
|
||||
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
|
||||
{
|
||||
u64 runtime;
|
||||
int throttled;
|
||||
|
||||
/* no need to continue the timer with no bandwidth constraint */
|
||||
@@ -4914,24 +4913,17 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u
|
||||
cfs_b->nr_throttled += overrun;
|
||||
|
||||
/*
|
||||
* This check is repeated as we are holding onto the new bandwidth while
|
||||
* we unthrottle. This can potentially race with an unthrottled group
|
||||
* trying to acquire new bandwidth from the global pool. This can result
|
||||
* in us over-using our runtime if it is all used during this loop, but
|
||||
* only by limited amounts in that extreme case.
|
||||
* This check is repeated as we release cfs_b->lock while we unthrottle.
|
||||
*/
|
||||
while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
|
||||
runtime = cfs_b->runtime;
|
||||
cfs_b->distribute_running = 1;
|
||||
raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
/* we can't nest cfs_b->lock while distributing bandwidth */
|
||||
runtime = distribute_cfs_runtime(cfs_b, runtime);
|
||||
distribute_cfs_runtime(cfs_b);
|
||||
raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||
|
||||
cfs_b->distribute_running = 0;
|
||||
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
|
||||
|
||||
lsub_positive(&cfs_b->runtime, runtime);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -5065,10 +5057,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
|
||||
if (!runtime)
|
||||
return;
|
||||
|
||||
runtime = distribute_cfs_runtime(cfs_b, runtime);
|
||||
distribute_cfs_runtime(cfs_b);
|
||||
|
||||
raw_spin_lock_irqsave(&cfs_b->lock, flags);
|
||||
lsub_positive(&cfs_b->runtime, runtime);
|
||||
cfs_b->distribute_running = 0;
|
||||
raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
|
||||
}
|
||||
@@ -6080,8 +6071,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
|
||||
struct sched_domain *this_sd;
|
||||
u64 avg_cost, avg_idle;
|
||||
u64 time, cost;
|
||||
s64 delta;
|
||||
u64 time;
|
||||
int this = smp_processor_id();
|
||||
int cpu, nr = INT_MAX;
|
||||
|
||||
@@ -6119,9 +6109,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
|
||||
}
|
||||
|
||||
time = cpu_clock(this) - time;
|
||||
cost = this_sd->avg_scan_cost;
|
||||
delta = (s64)(time - cost) / 8;
|
||||
this_sd->avg_scan_cost += delta;
|
||||
update_avg(&this_sd->avg_scan_cost, time);
|
||||
|
||||
return cpu;
|
||||
}
|
||||
@@ -9091,6 +9079,14 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
|
||||
sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
|
||||
sds->total_capacity;
|
||||
/*
|
||||
* If the local group is more loaded than the selected
|
||||
* busiest group don't try to pull any tasks.
|
||||
*/
|
||||
if (local->avg_load >= busiest->avg_load) {
|
||||
env->imbalance = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -195,6 +195,12 @@ static inline int task_has_dl_policy(struct task_struct *p)
|
||||
|
||||
#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
|
||||
|
||||
static inline void update_avg(u64 *avg, u64 sample)
|
||||
{
|
||||
s64 diff = sample - *avg;
|
||||
*avg += diff / 8;
|
||||
}
|
||||
|
||||
/*
|
||||
* !! For sched_setattr_nocheck() (kernel) only !!
|
||||
*
|
||||
@@ -884,7 +890,6 @@ struct rq {
|
||||
#endif
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned long last_load_update_tick;
|
||||
unsigned long last_blocked_load_update_tick;
|
||||
unsigned int has_blocked_load;
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
@@ -447,6 +447,7 @@ const struct proc_ns_operations timens_operations = {
|
||||
|
||||
const struct proc_ns_operations timens_for_children_operations = {
|
||||
.name = "time_for_children",
|
||||
.real_ns_name = "time",
|
||||
.type = CLONE_NEWTIME,
|
||||
.get = timens_for_children_get,
|
||||
.put = timens_put,
|
||||
|
||||
@@ -69,6 +69,7 @@ static struct ctl_table user_table[] = {
|
||||
UCOUNT_ENTRY("max_net_namespaces"),
|
||||
UCOUNT_ENTRY("max_mnt_namespaces"),
|
||||
UCOUNT_ENTRY("max_cgroup_namespaces"),
|
||||
UCOUNT_ENTRY("max_time_namespaces"),
|
||||
#ifdef CONFIG_INOTIFY_USER
|
||||
UCOUNT_ENTRY("max_inotify_instances"),
|
||||
UCOUNT_ENTRY("max_inotify_watches"),
|
||||
@@ -81,6 +82,8 @@ bool setup_userns_sysctls(struct user_namespace *ns)
|
||||
{
|
||||
#ifdef CONFIG_SYSCTL
|
||||
struct ctl_table *tbl;
|
||||
|
||||
BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1);
|
||||
setup_sysctl_set(&ns->set, &set_root, set_is_seen);
|
||||
tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
|
||||
if (tbl) {
|
||||
|
||||
@@ -858,7 +858,8 @@ void wq_worker_running(struct task_struct *task)
|
||||
* @task: task going to sleep
|
||||
*
|
||||
* This function is called from schedule() when a busy worker is
|
||||
* going to sleep.
|
||||
* going to sleep. Preemption needs to be disabled to protect ->sleeping
|
||||
* assignment.
|
||||
*/
|
||||
void wq_worker_sleeping(struct task_struct *task)
|
||||
{
|
||||
@@ -875,7 +876,8 @@ void wq_worker_sleeping(struct task_struct *task)
|
||||
|
||||
pool = worker->pool;
|
||||
|
||||
if (WARN_ON_ONCE(worker->sleeping))
|
||||
/* Return if preempted before wq_worker_running() was reached */
|
||||
if (worker->sleeping)
|
||||
return;
|
||||
|
||||
worker->sleeping = 1;
|
||||
|
||||
Reference in New Issue
Block a user