Merge 7e284070ab ("Merge tag 'for-5.19/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm") into android-mainline

Steps on the way to 5.19-rc1

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: I9981265de6b27657eaaf51bd1fa19ce0ce9f2cc6
This commit is contained in:
Greg Kroah-Hartman
2022-06-22 09:27:37 +02:00
159 changed files with 3890 additions and 3045 deletions

View File

@@ -434,6 +434,8 @@ prototypes::
void (*lm_break)(struct file_lock *); /* break_lease callback */
int (*lm_change)(struct file_lock **, int);
bool (*lm_breaker_owns_lease)(struct file_lock *);
bool (*lm_lock_expirable)(struct file_lock *);
void (*lm_expire_lock)(void);
locking rules:
@@ -445,6 +447,8 @@ lm_grant: no no no
lm_break: yes no no
lm_change yes no no
lm_breaker_owns_lease: yes no no
lm_lock_expirable yes no no
lm_expire_lock no no yes
====================== ============= ================= =========
buffer_head

View File

@@ -36,8 +36,10 @@ config ARC
select HAVE_KERNEL_LZMA
select HAVE_KPROBES
select HAVE_KRETPROBES
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_PERF_EVENTS
select HAVE_SYSCALL_TRACEPOINTS
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
select OF

View File

@@ -63,4 +63,8 @@ struct arc_reg_cc_build {
#define PERF_COUNT_ARC_HW_MAX (PERF_COUNT_HW_MAX + 8)
#ifdef CONFIG_PERF_EVENTS
#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
#endif
#endif /* __ASM_PERF_EVENT_H */

View File

@@ -8,6 +8,7 @@
#define __ASM_ARC_PTRACE_H
#include <uapi/asm/ptrace.h>
#include <linux/compiler.h>
#ifndef __ASSEMBLY__
@@ -54,6 +55,9 @@ struct pt_regs {
unsigned long user_r25;
};
#define MAX_REG_OFFSET offsetof(struct pt_regs, user_r25)
#else
struct pt_regs {
@@ -102,6 +106,8 @@ struct pt_regs {
unsigned long status32;
};
#define MAX_REG_OFFSET offsetof(struct pt_regs, status32)
#endif
/* Callee saved registers - need to be saved only when you are scheduled out */
@@ -154,6 +160,27 @@ static inline void instruction_pointer_set(struct pt_regs *regs,
{
instruction_pointer(regs) = val;
}
static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
{
return regs->sp;
}
extern int regs_query_register_offset(const char *name);
extern const char *regs_query_register_name(unsigned int offset);
extern bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr);
extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
unsigned int n);
static inline unsigned long regs_get_register(struct pt_regs *regs,
unsigned int offset)
{
if (unlikely(offset > MAX_REG_OFFSET))
return 0;
return *(unsigned long *)((unsigned long)regs + offset);
}
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PTRACE_H */

View File

@@ -12,6 +12,8 @@
#include <asm/unistd.h>
#include <asm/ptrace.h> /* in_syscall() */
extern void *sys_call_table[];
static inline long
syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{

View File

@@ -78,9 +78,9 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */
#define TIF_NOTIFY_SIGNAL 5 /* signal notifications exist */
#define TIF_SYSCALL_TRACE 15 /* syscall trace active */
/* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_MEMDIE 16
#define TIF_SYSCALL_TRACEPOINT 17 /* syscall tracepoint instrumentation */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
@@ -89,11 +89,14 @@ static inline __attribute_const__ struct thread_info *current_thread_info(void)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_NOTIFY_SIGNAL (1<<TIF_NOTIFY_SIGNAL)
#define _TIF_MEMDIE (1<<TIF_MEMDIE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
_TIF_NOTIFY_RESUME | _TIF_NOTIFY_SIGNAL)
#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
/*
* _TIF_ALLWORK_MASK includes SYSCALL_TRACE, but we don't need it.
* SYSCALL_TRACE is anyway separately/unconditionally tested right after a

View File

@@ -0,0 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
#define _UAPI__ASM_BPF_PERF_EVENT_H__
#include <asm/ptrace.h>
typedef struct user_regs_struct bpf_user_pt_regs_t;
#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */

View File

@@ -434,14 +434,31 @@ long __kprobes get_reg(int reg, struct pt_regs *regs,
{
long *p;
#if defined(CONFIG_ISA_ARCOMPACT)
if (reg <= 12) {
p = &regs->r0;
return p[-reg];
}
#else /* CONFIG_ISA_ARCV2 */
if (reg <= 11) {
p = &regs->r0;
return p[reg];
}
if (reg == 12)
return regs->r12;
if (reg == 30)
return regs->r30;
#ifdef CONFIG_ARC_HAS_ACCL_REGS
if (reg == 58)
return regs->r58;
if (reg == 59)
return regs->r59;
#endif
#endif
if (cregs && (reg <= 25)) {
p = &cregs->r13;
return p[13-reg];
return p[13 - reg];
}
if (reg == 26)
@@ -461,6 +478,7 @@ void __kprobes set_reg(int reg, long val, struct pt_regs *regs,
{
long *p;
#if defined(CONFIG_ISA_ARCOMPACT)
switch (reg) {
case 0 ... 12:
p = &regs->r0;
@@ -469,7 +487,7 @@ void __kprobes set_reg(int reg, long val, struct pt_regs *regs,
case 13 ... 25:
if (cregs) {
p = &cregs->r13;
p[13-reg] = val;
p[13 - reg] = val;
}
break;
case 26:
@@ -487,6 +505,48 @@ void __kprobes set_reg(int reg, long val, struct pt_regs *regs,
default:
break;
}
#else /* CONFIG_ISA_ARCV2 */
switch (reg) {
case 0 ... 11:
p = &regs->r0;
p[reg] = val;
break;
case 12:
regs->r12 = val;
break;
case 13 ... 25:
if (cregs) {
p = &cregs->r13;
p[13 - reg] = val;
}
break;
case 26:
regs->r26 = val;
break;
case 27:
regs->fp = val;
break;
case 28:
regs->sp = val;
break;
case 30:
regs->r30 = val;
break;
case 31:
regs->blink = val;
break;
#ifdef CONFIG_ARC_HAS_ACCL_REGS
case 58:
regs->r58 = val;
break;
case 59:
regs->r59 = val;
break;
#endif
default:
break;
}
#endif
}
/*

View File

@@ -29,8 +29,8 @@ ENTRY(sys_clone_wrapper)
DISCARD_CALLEE_SAVED_USER
GET_CURR_THR_INFO_FLAGS r10
btst r10, TIF_SYSCALL_TRACE
bnz tracesys_exit
and.f 0, r10, _TIF_SYSCALL_WORK
bnz tracesys_exit
b .Lret_from_system_call
END(sys_clone_wrapper)
@@ -41,8 +41,8 @@ ENTRY(sys_clone3_wrapper)
DISCARD_CALLEE_SAVED_USER
GET_CURR_THR_INFO_FLAGS r10
btst r10, TIF_SYSCALL_TRACE
bnz tracesys_exit
and.f 0, r10, _TIF_SYSCALL_WORK
bnz tracesys_exit
b .Lret_from_system_call
END(sys_clone3_wrapper)
@@ -247,8 +247,8 @@ ENTRY(EV_Trap)
; If syscall tracing ongoing, invoke pre-post-hooks
GET_CURR_THR_INFO_FLAGS r10
btst r10, TIF_SYSCALL_TRACE
bnz tracesys ; this never comes back
and.f 0, r10, _TIF_SYSCALL_WORK
bnz tracesys ; this never comes back
;============ Normal syscall case

View File

@@ -9,6 +9,92 @@
#include <linux/unistd.h>
#include <linux/elf.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
struct pt_regs_offset {
const char *name;
int offset;
};
#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
#define REG_OFFSET_END {.name = NULL, .offset = 0}
#ifdef CONFIG_ISA_ARCOMPACT
static const struct pt_regs_offset regoffset_table[] = {
REG_OFFSET_NAME(bta),
REG_OFFSET_NAME(lp_start),
REG_OFFSET_NAME(lp_end),
REG_OFFSET_NAME(lp_count),
REG_OFFSET_NAME(status32),
REG_OFFSET_NAME(ret),
REG_OFFSET_NAME(blink),
REG_OFFSET_NAME(fp),
REG_OFFSET_NAME(r26),
REG_OFFSET_NAME(r12),
REG_OFFSET_NAME(r11),
REG_OFFSET_NAME(r10),
REG_OFFSET_NAME(r9),
REG_OFFSET_NAME(r8),
REG_OFFSET_NAME(r7),
REG_OFFSET_NAME(r6),
REG_OFFSET_NAME(r5),
REG_OFFSET_NAME(r4),
REG_OFFSET_NAME(r3),
REG_OFFSET_NAME(r2),
REG_OFFSET_NAME(r1),
REG_OFFSET_NAME(r0),
REG_OFFSET_NAME(sp),
REG_OFFSET_NAME(orig_r0),
REG_OFFSET_NAME(event),
REG_OFFSET_NAME(user_r25),
REG_OFFSET_END,
};
#else
static const struct pt_regs_offset regoffset_table[] = {
REG_OFFSET_NAME(orig_r0),
REG_OFFSET_NAME(event),
REG_OFFSET_NAME(bta),
REG_OFFSET_NAME(user_r25),
REG_OFFSET_NAME(r26),
REG_OFFSET_NAME(fp),
REG_OFFSET_NAME(sp),
REG_OFFSET_NAME(r12),
REG_OFFSET_NAME(r30),
#ifdef CONFIG_ARC_HAS_ACCL_REGS
REG_OFFSET_NAME(r58),
REG_OFFSET_NAME(r59),
#endif
#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
REG_OFFSET_NAME(DSP_CTRL),
#endif
REG_OFFSET_NAME(r0),
REG_OFFSET_NAME(r1),
REG_OFFSET_NAME(r2),
REG_OFFSET_NAME(r3),
REG_OFFSET_NAME(r4),
REG_OFFSET_NAME(r5),
REG_OFFSET_NAME(r6),
REG_OFFSET_NAME(r7),
REG_OFFSET_NAME(r8),
REG_OFFSET_NAME(r9),
REG_OFFSET_NAME(r10),
REG_OFFSET_NAME(r11),
REG_OFFSET_NAME(blink),
REG_OFFSET_NAME(lp_end),
REG_OFFSET_NAME(lp_start),
REG_OFFSET_NAME(lp_count),
REG_OFFSET_NAME(ei),
REG_OFFSET_NAME(ldi),
REG_OFFSET_NAME(jli),
REG_OFFSET_NAME(ret),
REG_OFFSET_NAME(status32),
REG_OFFSET_END,
};
#endif
static struct callee_regs *task_callee_regs(struct task_struct *tsk)
{
struct callee_regs *tmp = (struct callee_regs *)tsk->thread.callee_reg;
@@ -257,13 +343,61 @@ long arch_ptrace(struct task_struct *child, long request,
asmlinkage int syscall_trace_entry(struct pt_regs *regs)
{
if (ptrace_report_syscall_entry(regs))
return ULONG_MAX;
if (test_thread_flag(TIF_SYSCALL_TRACE))
if (ptrace_report_syscall_entry(regs))
return ULONG_MAX;
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
trace_sys_enter(regs, syscall_get_nr(current, regs));
#endif
return regs->r8;
}
asmlinkage void syscall_trace_exit(struct pt_regs *regs)
{
ptrace_report_syscall_exit(regs, 0);
if (test_thread_flag(TIF_SYSCALL_TRACE))
ptrace_report_syscall_exit(regs, 0);
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
trace_sys_exit(regs, regs_return_value(regs));
#endif
}
int regs_query_register_offset(const char *name)
{
const struct pt_regs_offset *roff;
for (roff = regoffset_table; roff->name != NULL; roff++)
if (!strcmp(roff->name, name))
return roff->offset;
return -EINVAL;
}
const char *regs_query_register_name(unsigned int offset)
{
const struct pt_regs_offset *roff;
for (roff = regoffset_table; roff->name != NULL; roff++)
if (roff->offset == offset)
return roff->name;
return NULL;
}
bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
{
return (addr & ~(THREAD_SIZE - 1)) ==
(kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1));
}
unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
{
unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
addr += n;
if (regs_within_kernel_stack(regs, (unsigned long)addr))
return *addr;
else
return 0;
}

View File

@@ -113,6 +113,59 @@ config OPENRISC_HAVE_INST_DIV
default y
help
Select this if your implementation has a hardware divide instruction
config OPENRISC_HAVE_INST_CMOV
bool "Have instruction l.cmov for conditional move"
default n
help
This config enables gcc to generate l.cmov instructions when compiling
the kernel which in general will improve performance and reduce the
binary size.
Select this if your implementation has support for the Class II
l.cmov conistional move instruction.
Say N if you are unsure.
config OPENRISC_HAVE_INST_ROR
bool "Have instruction l.ror for rotate right"
default n
help
This config enables gcc to generate l.ror instructions when compiling
the kernel which in general will improve performance and reduce the
binary size.
Select this if your implementation has support for the Class II
l.ror rotate right instruction.
Say N if you are unsure.
config OPENRISC_HAVE_INST_RORI
bool "Have instruction l.rori for rotate right with immediate"
default n
help
This config enables gcc to generate l.rori instructions when compiling
the kernel which in general will improve performance and reduce the
binary size.
Select this if your implementation has support for the Class II
l.rori rotate right with immediate instruction.
Say N if you are unsure.
config OPENRISC_HAVE_INST_SEXT
bool "Have instructions l.ext* for sign extension"
default n
help
This config enables gcc to generate l.ext* instructions when compiling
the kernel which in general will improve performance and reduce the
binary size.
Select this if your implementation has support for the Class II
l.exths, l.extbs, l.exthz and l.extbz size extend instructions.
Say N if you are unsure.
endmenu
config NR_CPUS

View File

@@ -21,6 +21,7 @@ OBJCOPYFLAGS := -O binary -R .note -R .comment -S
LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
KBUILD_CFLAGS += -pipe -ffixed-r10 -D__linux__
KBUILD_CFLAGS += -msfimm -mshftimm
all: vmlinux.bin
@@ -38,6 +39,22 @@ else
KBUILD_CFLAGS += $(call cc-option,-msoft-div)
endif
ifeq ($(CONFIG_OPENRISC_HAVE_INST_CMOV),y)
KBUILD_CFLAGS += $(call cc-option,-mcmov)
endif
ifeq ($(CONFIG_OPENRISC_HAVE_INST_ROR),y)
KBUILD_CFLAGS += $(call cc-option,-mror)
endif
ifeq ($(CONFIG_OPENRISC_HAVE_INST_RORI),y)
KBUILD_CFLAGS += $(call cc-option,-mrori)
endif
ifeq ($(CONFIG_OPENRISC_HAVE_INST_SEXT),y)
KBUILD_CFLAGS += $(call cc-option,-msext)
endif
head-y := arch/openrisc/kernel/head.o
libs-y += $(LIBGCC)

View File

@@ -1,22 +1,54 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_CGROUPS=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_SGETMASK_SYSCALL=y
CONFIG_EMBEDDED=y
CONFIG_OPENRISC_BUILTIN_DTB="or1klitex"
CONFIG_HZ_100=y
CONFIG_OPENRISC_HAVE_SHADOW_GPRS=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=y
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_INET_UDP_DIAG=y
CONFIG_INET_RAW_DIAG=y
# CONFIG_WIRELESS is not set
# CONFIG_ETHTOOL_NETLINK is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_OF_OVERLAY=y
CONFIG_NETDEVICES=y
CONFIG_LITEX_LITEETH=y
# CONFIG_WLAN is not set
CONFIG_SERIAL_LITEUART=y
CONFIG_SERIAL_LITEUART_CONSOLE=y
CONFIG_TTY_PRINTK=y
# CONFIG_GPIO_CDEV is not set
CONFIG_MMC=y
CONFIG_MMC_LITEX=y
# CONFIG_VHOST_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_LITEX_SOC_CONTROLLER=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_EXFAT_FS=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity,bpf"
CONFIG_PRINTK_TIME=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_SOFTLOCKUP_DETECTOR=y

View File

@@ -601,7 +601,7 @@ UNHANDLED_EXCEPTION(_vector_0xb00,0xb00)
*/
_string_syscall_return:
.string "syscall return %ld \n\r\0"
.string "syscall r9:0x%08x -> syscall(%ld) return %ld\0"
.align 4
ENTRY(_sys_call_handler)
@@ -679,15 +679,25 @@ _syscall_return:
_syscall_debug:
l.movhi r3,hi(_string_syscall_return)
l.ori r3,r3,lo(_string_syscall_return)
l.ori r27,r0,1
l.ori r27,r0,2
l.sw -4(r1),r27
l.sw -8(r1),r11
l.addi r1,r1,-8
l.lwz r29,PT_ORIG_GPR11(r1)
l.sw -12(r1),r29
l.lwz r29,PT_GPR9(r1)
l.sw -16(r1),r29
l.movhi r27,hi(_printk)
l.ori r27,r27,lo(_printk)
l.jalr r27
l.nop
l.addi r1,r1,8
l.addi r1,r1,-16
l.addi r1,r1,16
#endif
#if 0
_syscall_show_regs:
l.movhi r27,hi(show_registers)
l.ori r27,r27,lo(show_registers)
l.jalr r27
l.or r3,r1,r1
#endif
_syscall_check_trace_leave:

View File

@@ -297,19 +297,23 @@
/* temporary store r3, r9 into r1, r10 */ ;\
l.addi r1,r3,0x0 ;\
l.addi r10,r9,0x0 ;\
/* the string referenced by r3 must be low enough */ ;\
LOAD_SYMBOL_2_GPR(r9,_string_unhandled_exception) ;\
tophys (r3,r9) ;\
l.jal _emergency_print ;\
l.ori r3,r0,lo(_string_unhandled_exception) ;\
l.nop ;\
l.mfspr r3,r0,SPR_NPC ;\
l.jal _emergency_print_nr ;\
l.andi r3,r3,0x1f00 ;\
/* the string referenced by r3 must be low enough */ ;\
l.andi r3,r3,0x1f00 ;\
LOAD_SYMBOL_2_GPR(r9,_string_epc_prefix) ;\
tophys (r3,r9) ;\
l.jal _emergency_print ;\
l.ori r3,r0,lo(_string_epc_prefix) ;\
l.nop ;\
l.jal _emergency_print_nr ;\
l.mfspr r3,r0,SPR_EPCR_BASE ;\
l.mfspr r3,r0,SPR_EPCR_BASE ;\
LOAD_SYMBOL_2_GPR(r9,_string_nl) ;\
tophys (r3,r9) ;\
l.jal _emergency_print ;\
l.ori r3,r0,lo(_string_nl) ;\
l.nop ;\
/* end of printing */ ;\
l.addi r3,r1,0x0 ;\
l.addi r9,r10,0x0 ;\
@@ -1330,215 +1334,63 @@ i_pte_not_present:
/* =================================================[ debugging aids ]=== */
.align 64
_immu_trampoline:
.space 64
_immu_trampoline_top:
/*
* DESC: Prints ASCII character stored in r7
*
* PRMS: r7 - a 32-bit value with an ASCII character in the first byte
* position.
*
* PREQ: The UART at UART_BASE_ADD has to be initialized
*
* POST: internally used but restores:
* r4 - to store UART_BASE_ADD
* r5 - for loading OFF_TXFULL / THRE,TEMT
* r6 - for storing bitmask (SERIAL_8250)
*/
ENTRY(_emergency_putc)
EMERGENCY_PRINT_STORE_GPR4
EMERGENCY_PRINT_STORE_GPR5
EMERGENCY_PRINT_STORE_GPR6
#define TRAMP_SLOT_0 (0x0)
#define TRAMP_SLOT_1 (0x4)
#define TRAMP_SLOT_2 (0x8)
#define TRAMP_SLOT_3 (0xc)
#define TRAMP_SLOT_4 (0x10)
#define TRAMP_SLOT_5 (0x14)
#define TRAMP_FRAME_SIZE (0x18)
l.movhi r4,hi(UART_BASE_ADD)
l.ori r4,r4,lo(UART_BASE_ADD)
ENTRY(_immu_trampoline_workaround)
// r2 EEA
// r6 is physical EEA
tophys(r6,r2)
#if defined(CONFIG_SERIAL_LITEUART)
/* Check OFF_TXFULL status */
1: l.lwz r5,4(r4)
l.andi r5,r5,0xff
l.sfnei r5,0
l.bf 1b
l.nop
LOAD_SYMBOL_2_GPR(r5,_immu_trampoline)
tophys (r3,r5) // r3 is trampoline (physical)
/* Write character */
l.andi r7,r7,0xff
l.sw 0(r4),r7
#elif defined(CONFIG_SERIAL_8250)
/* Check UART LSR THRE (hold) bit */
l.addi r6,r0,0x20
1: l.lbz r5,5(r4)
l.andi r5,r5,0x20
l.sfeq r5,r6
l.bnf 1b
l.nop
LOAD_SYMBOL_2_GPR(r4,0x15000000)
l.sw TRAMP_SLOT_0(r3),r4
l.sw TRAMP_SLOT_1(r3),r4
l.sw TRAMP_SLOT_4(r3),r4
l.sw TRAMP_SLOT_5(r3),r4
// EPC = EEA - 0x4
l.lwz r4,0x0(r6) // load op @ EEA + 0x0 (fc address)
l.sw TRAMP_SLOT_3(r3),r4 // store it to _immu_trampoline_data
l.lwz r4,-0x4(r6) // load op @ EEA - 0x4 (f8 address)
l.sw TRAMP_SLOT_2(r3),r4 // store it to _immu_trampoline_data
l.srli r5,r4,26 // check opcode for write access
l.sfeqi r5,0 // l.j
l.bf 0f
l.sfeqi r5,0x11 // l.jr
l.bf 1f
l.sfeqi r5,1 // l.jal
l.bf 2f
l.sfeqi r5,0x12 // l.jalr
l.bf 3f
l.sfeqi r5,3 // l.bnf
l.bf 4f
l.sfeqi r5,4 // l.bf
l.bf 5f
99:
l.nop
l.j 99b // should never happen
l.nop 1
// r2 is EEA
// r3 is trampoline address (physical)
// r4 is instruction
// r6 is physical(EEA)
//
// r5
2: // l.jal
/* 19 20 aa aa l.movhi r9,0xaaaa
* a9 29 bb bb l.ori r9,0xbbbb
*
* where 0xaaaabbbb is EEA + 0x4 shifted right 2
*/
l.addi r6,r2,0x4 // this is 0xaaaabbbb
// l.movhi r9,0xaaaa
l.ori r5,r0,0x1920 // 0x1920 == l.movhi r9
l.sh (TRAMP_SLOT_0+0x0)(r3),r5
l.srli r5,r6,16
l.sh (TRAMP_SLOT_0+0x2)(r3),r5
// l.ori r9,0xbbbb
l.ori r5,r0,0xa929 // 0xa929 == l.ori r9
l.sh (TRAMP_SLOT_1+0x0)(r3),r5
l.andi r5,r6,0xffff
l.sh (TRAMP_SLOT_1+0x2)(r3),r5
/* falthrough, need to set up new jump offset */
0: // l.j
l.slli r6,r4,6 // original offset shifted left 6 - 2
// l.srli r6,r6,6 // original offset shifted right 2
l.slli r4,r2,4 // old jump position: EEA shifted left 4
// l.srli r4,r4,6 // old jump position: shifted right 2
l.addi r5,r3,0xc // new jump position (physical)
l.slli r5,r5,4 // new jump position: shifted left 4
// calculate new jump offset
// new_off = old_off + (old_jump - new_jump)
l.sub r5,r4,r5 // old_jump - new_jump
l.add r5,r6,r5 // orig_off + (old_jump - new_jump)
l.srli r5,r5,6 // new offset shifted right 2
// r5 is new jump offset
// l.j has opcode 0x0...
l.sw TRAMP_SLOT_2(r3),r5 // write it back
l.j trampoline_out
l.nop
/* ----------------------------- */
3: // l.jalr
/* 19 20 aa aa l.movhi r9,0xaaaa
* a9 29 bb bb l.ori r9,0xbbbb
*
* where 0xaaaabbbb is EEA + 0x4 shifted right 2
*/
l.addi r6,r2,0x4 // this is 0xaaaabbbb
// l.movhi r9,0xaaaa
l.ori r5,r0,0x1920 // 0x1920 == l.movhi r9
l.sh (TRAMP_SLOT_0+0x0)(r3),r5
l.srli r5,r6,16
l.sh (TRAMP_SLOT_0+0x2)(r3),r5
// l.ori r9,0xbbbb
l.ori r5,r0,0xa929 // 0xa929 == l.ori r9
l.sh (TRAMP_SLOT_1+0x0)(r3),r5
l.andi r5,r6,0xffff
l.sh (TRAMP_SLOT_1+0x2)(r3),r5
l.lhz r5,(TRAMP_SLOT_2+0x0)(r3) // load hi part of jump instruction
l.andi r5,r5,0x3ff // clear out opcode part
l.ori r5,r5,0x4400 // opcode changed from l.jalr -> l.jr
l.sh (TRAMP_SLOT_2+0x0)(r3),r5 // write it back
/* falthrough */
1: // l.jr
l.j trampoline_out
l.nop
/* ----------------------------- */
4: // l.bnf
5: // l.bf
l.slli r6,r4,6 // original offset shifted left 6 - 2
// l.srli r6,r6,6 // original offset shifted right 2
l.slli r4,r2,4 // old jump position: EEA shifted left 4
// l.srli r4,r4,6 // old jump position: shifted right 2
l.addi r5,r3,0xc // new jump position (physical)
l.slli r5,r5,4 // new jump position: shifted left 4
// calculate new jump offset
// new_off = old_off + (old_jump - new_jump)
l.add r6,r6,r4 // (orig_off + old_jump)
l.sub r6,r6,r5 // (orig_off + old_jump) - new_jump
l.srli r6,r6,6 // new offset shifted right 2
// r6 is new jump offset
l.lwz r4,(TRAMP_SLOT_2+0x0)(r3) // load jump instruction
l.srli r4,r4,16
l.andi r4,r4,0xfc00 // get opcode part
l.slli r4,r4,16
l.or r6,r4,r6 // l.b(n)f new offset
l.sw TRAMP_SLOT_2(r3),r6 // write it back
/* we need to add l.j to EEA + 0x8 */
tophys (r4,r2) // may not be needed (due to shifts down_
l.addi r4,r4,(0x8 - 0x8) // jump target = r2 + 0x8 (compensate for 0x8)
// jump position = r5 + 0x8 (0x8 compensated)
l.sub r4,r4,r5 // jump offset = target - new_position + 0x8
l.slli r4,r4,4 // the amount of info in imediate of jump
l.srli r4,r4,6 // jump instruction with offset
l.sw TRAMP_SLOT_4(r3),r4 // write it to 4th slot
/* fallthrough */
trampoline_out:
// set up new EPC to point to our trampoline code
LOAD_SYMBOL_2_GPR(r5,_immu_trampoline)
l.mtspr r0,r5,SPR_EPCR_BASE
// immu_trampoline is (4x) CACHE_LINE aligned
// and only 6 instructions long,
// so we need to invalidate only 2 lines
/* Establish cache block size
If BS=0, 16;
If BS=1, 32;
r14 contain block size
*/
l.mfspr r21,r0,SPR_ICCFGR
l.andi r21,r21,SPR_ICCFGR_CBS
l.srli r21,r21,7
l.ori r23,r0,16
l.sll r14,r23,r21
l.mtspr r0,r5,SPR_ICBIR
l.add r5,r5,r14
l.mtspr r0,r5,SPR_ICBIR
/* Write character */
l.sb 0(r4),r7
/* Check UART LSR THRE|TEMT (hold, empty) bits */
l.addi r6,r0,0x60
1: l.lbz r5,5(r4)
l.andi r5,r5,0x60
l.sfeq r5,r6
l.bnf 1b
l.nop
#endif
EMERGENCY_PRINT_LOAD_GPR6
EMERGENCY_PRINT_LOAD_GPR5
EMERGENCY_PRINT_LOAD_GPR4
l.jr r9
l.nop
l.nop
/*
* DSCR: prints a string referenced by r3.
@@ -1551,53 +1403,41 @@ trampoline_out:
* POST: caller should be aware that r3, r9 are changed
*/
ENTRY(_emergency_print)
EMERGENCY_PRINT_STORE_GPR4
EMERGENCY_PRINT_STORE_GPR5
EMERGENCY_PRINT_STORE_GPR6
EMERGENCY_PRINT_STORE_GPR7
2:
l.lbz r7,0(r3)
l.sfeq r7,r0
EMERGENCY_PRINT_STORE_GPR9
/* Load character to r7, check for null terminator */
2: l.lbz r7,0(r3)
l.sfeqi r7,0x0
l.bf 9f
l.nop
l.nop
// putc:
l.movhi r4,hi(UART_BASE_ADD)
l.addi r6,r0,0x20
1: l.lbz r5,5(r4)
l.andi r5,r5,0x20
l.sfeq r5,r6
l.bnf 1b
l.nop
l.sb 0(r4),r7
l.addi r6,r0,0x60
1: l.lbz r5,5(r4)
l.andi r5,r5,0x60
l.sfeq r5,r6
l.bnf 1b
l.nop
l.jal _emergency_putc
l.nop
/* next character */
l.j 2b
l.addi r3,r3,0x1
l.addi r3,r3,0x1
9:
EMERGENCY_PRINT_LOAD_GPR9
EMERGENCY_PRINT_LOAD_GPR7
EMERGENCY_PRINT_LOAD_GPR6
EMERGENCY_PRINT_LOAD_GPR5
EMERGENCY_PRINT_LOAD_GPR4
l.jr r9
l.nop
l.nop
/*
* DSCR: prints a number in r3 in hex.
*
* PRMS: r3 - a 32-bit unsigned integer
*
* PREQ: UART at UART_BASE_ADD has to be initialized
*
* POST: caller should be aware that r3, r9 are changed
*/
ENTRY(_emergency_print_nr)
EMERGENCY_PRINT_STORE_GPR4
EMERGENCY_PRINT_STORE_GPR5
EMERGENCY_PRINT_STORE_GPR6
EMERGENCY_PRINT_STORE_GPR7
EMERGENCY_PRINT_STORE_GPR8
EMERGENCY_PRINT_STORE_GPR9
l.addi r8,r0,32 // shift register
@@ -1609,58 +1449,39 @@ ENTRY(_emergency_print_nr)
/* don't skip the last zero if number == 0x0 */
l.sfeqi r8,0x4
l.bf 2f
l.nop
l.nop
l.sfeq r7,r0
l.bf 1b
l.nop
l.nop
2:
l.srl r7,r3,r8
l.andi r7,r7,0xf
l.sflts r8,r0
l.bf 9f
l.bf 9f
/* Numbers greater than 9 translate to a-f */
l.sfgtui r7,0x9
l.bnf 8f
l.nop
l.nop
l.addi r7,r7,0x27
8:
l.addi r7,r7,0x30
// putc:
l.movhi r4,hi(UART_BASE_ADD)
l.addi r6,r0,0x20
1: l.lbz r5,5(r4)
l.andi r5,r5,0x20
l.sfeq r5,r6
l.bnf 1b
l.nop
l.sb 0(r4),r7
l.addi r6,r0,0x60
1: l.lbz r5,5(r4)
l.andi r5,r5,0x60
l.sfeq r5,r6
l.bnf 1b
l.nop
/* Convert to ascii and output character */
8: l.jal _emergency_putc
l.addi r7,r7,0x30
/* next character */
l.j 2b
l.addi r8,r8,-0x4
9:
EMERGENCY_PRINT_LOAD_GPR9
EMERGENCY_PRINT_LOAD_GPR8
EMERGENCY_PRINT_LOAD_GPR7
EMERGENCY_PRINT_LOAD_GPR6
EMERGENCY_PRINT_LOAD_GPR5
EMERGENCY_PRINT_LOAD_GPR4
l.jr r9
l.nop
l.nop
/*
* This should be used for debugging only.
@@ -1685,7 +1506,9 @@ ENTRY(_emergency_print_nr)
ENTRY(_early_uart_init)
l.movhi r3,hi(UART_BASE_ADD)
l.ori r3,r3,lo(UART_BASE_ADD)
#if defined(CONFIG_SERIAL_8250)
l.addi r4,r0,0x7
l.sb 0x2(r3),r4
@@ -1703,9 +1526,10 @@ ENTRY(_early_uart_init)
l.addi r4,r0,((UART_DIVISOR) & 0x000000ff)
l.sb UART_DLL(r3),r4
l.sb 0x3(r3),r5
#endif
l.jr r9
l.nop
l.nop
.align 0x1000
.global _secondary_evbar
@@ -1720,13 +1544,13 @@ _secondary_evbar:
.section .rodata
_string_unhandled_exception:
.string "\n\rRunarunaround: Unhandled exception 0x\0"
.string "\r\nRunarunaround: Unhandled exception 0x\0"
_string_epc_prefix:
.string ": EPC=0x\0"
_string_nl:
.string "\n\r\0"
.string "\r\n\0"
/* ========================================[ page aligned structures ]=== */

View File

@@ -52,6 +52,8 @@ void machine_restart(char *cmd)
{
do_kernel_restart(cmd);
__asm__("l.nop 13");
/* Give a grace period for failure to restart of 1s */
mdelay(1000);
@@ -60,6 +62,16 @@ void machine_restart(char *cmd)
while (1);
}
/*
* This is used if pm_power_off has not been set by a power management
* driver, in this case we can assume we are on a simulator. On
* OpenRISC simulators l.nop 1 will trigger the simulator exit.
*/
static void default_power_off(void)
{
__asm__("l.nop 1");
}
/*
* Similar to machine_power_off, but don't shut off power. Add code
* here to freeze the system for e.g. post-mortem debug purpose when
@@ -75,7 +87,10 @@ void machine_halt(void)
void machine_power_off(void)
{
printk(KERN_INFO "*** MACHINE POWER OFF ***\n");
__asm__("l.nop 1");
if (pm_power_off != NULL)
pm_power_off();
else
default_power_off();
}
/*
@@ -89,7 +104,7 @@ void arch_cpu_idle(void)
mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
}
void (*pm_power_off) (void) = machine_power_off;
void (*pm_power_off)(void) = NULL;
EXPORT_SYMBOL(pm_power_off);
/*

View File

@@ -23,6 +23,7 @@
#include <linux/of_clk.h>
#include <asm/cpuinfo.h>
#include <asm/time.h>
/* Test the timer ticks to count, used in sync routine */
inline void openrisc_timer_set(unsigned long count)
@@ -61,7 +62,7 @@ static int openrisc_timer_set_next_event(unsigned long delta,
* timers) we cannot enable the PERIODIC feature. The tick timer can run using
* one-shot events, so no problem.
*/
DEFINE_PER_CPU(struct clock_event_device, clockevent_openrisc_timer);
static DEFINE_PER_CPU(struct clock_event_device, clockevent_openrisc_timer);
void openrisc_clockevent_init(void)
{

View File

@@ -34,11 +34,11 @@
#include <asm/unwinder.h>
#include <asm/sections.h>
int kstack_depth_to_print = 0x180;
static int kstack_depth_to_print = 0x180;
int lwa_flag;
unsigned long __user *lwa_addr;
static unsigned long __user *lwa_addr;
void print_trace(void *data, unsigned long addr, int reliable)
static void print_trace(void *data, unsigned long addr, int reliable)
{
const char *loglvl = data;
@@ -46,6 +46,14 @@ void print_trace(void *data, unsigned long addr, int reliable)
(void *) addr);
}
static void print_data(unsigned long base_addr, unsigned long word, int i)
{
if (i == 0)
printk("(%08lx:)\t%08lx", base_addr + (i * 4), word);
else
printk(" %08lx:\t%08lx", base_addr + (i * 4), word);
}
/* displays a short stack trace */
void show_stack(struct task_struct *task, unsigned long *esp, const char *loglvl)
{
@@ -99,22 +107,36 @@ void show_registers(struct pt_regs *regs)
printk("\nStack: ");
show_stack(NULL, (unsigned long *)esp, KERN_EMERG);
if (esp < PAGE_OFFSET)
goto bad_stack;
printk("\n");
for (i = -8; i < 24; i += 1) {
unsigned long word;
if (__get_user(word, &((unsigned long *)esp)[i])) {
bad_stack:
printk(" Bad Stack value.");
break;
}
print_data(esp, word, i);
}
printk("\nCode: ");
if (regs->pc < PAGE_OFFSET)
goto bad;
for (i = -24; i < 24; i++) {
unsigned char c;
if (__get_user(c, &((unsigned char *)regs->pc)[i])) {
for (i = -6; i < 6; i += 1) {
unsigned long word;
if (__get_user(word, &((unsigned long *)regs->pc)[i])) {
bad:
printk(" Bad PC value.");
break;
}
if (i == 0)
printk("(%02x) ", c);
else
printk("%02x ", c);
print_data(regs->pc, word, i);
}
}
printk("\n");
@@ -185,13 +207,11 @@ void nommu_dump_state(struct pt_regs *regs,
printk("\nCode: ");
for (i = -24; i < 24; i++) {
unsigned char c;
c = ((unsigned char *)(__pa(regs->pc)))[i];
unsigned long word;
if (i == 0)
printk("(%02x) ", c);
else
printk("%02x ", c);
word = ((unsigned long *)(__pa(regs->pc)))[i];
print_data(regs->pc, word, i);
}
printk("\n");
}
@@ -215,16 +235,7 @@ void __noreturn die(const char *str, struct pt_regs *regs, long err)
make_task_dead(SIGSEGV);
}
/* This is normally the 'Oops' routine */
void die_if_kernel(const char *str, struct pt_regs *regs, long err)
{
if (user_mode(regs))
return;
die(str, regs, err);
}
void unhandled_exception(struct pt_regs *regs, int ea, int vector)
asmlinkage void unhandled_exception(struct pt_regs *regs, int ea, int vector)
{
printk("Unable to handle exception at EA =0x%x, vector 0x%x",
ea, vector);

View File

@@ -15,6 +15,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/init.h>
#include <linux/timex.h>
#include <asm/param.h>
#include <asm/delay.h>
#include <asm/timex.h>

View File

@@ -18,15 +18,13 @@
#include <linux/perf_event.h>
#include <linux/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/siginfo.h>
#include <asm/signal.h>
#define NUM_TLB_ENTRIES 64
#define TLB_OFFSET(add) (((add) >> PAGE_SHIFT) & (NUM_TLB_ENTRIES-1))
unsigned long pte_misses; /* updated by do_page_fault() */
unsigned long pte_errors; /* updated by do_page_fault() */
/* __PHX__ :: - check the vmalloc_fault in do_page_fault()
* - also look into include/asm/mmu_context.h
*/
@@ -223,8 +221,6 @@ no_context:
{
const struct exception_table_entry *entry;
__asm__ __volatile__("l.nop 42");
if ((entry = search_exception_tables(regs->pc)) != NULL) {
/* Adjust the instruction pointer in the stackframe */
regs->pc = entry->fixup;
@@ -252,9 +248,6 @@ no_context:
*/
out_of_memory:
__asm__ __volatile__("l.nop 42");
__asm__ __volatile__("l.nop 1");
mmap_read_unlock(mm);
if (!user_mode(regs))
goto no_context;

View File

@@ -128,7 +128,7 @@ void local_flush_tlb_mm(struct mm_struct *mm)
/* Was seeing bugs with the mm struct passed to us. Scrapped most of
this function. */
/* Several architctures do this */
/* Several architectures do this */
local_flush_tlb_all();
}

View File

@@ -58,6 +58,7 @@ struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_comp_unbound_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
static struct workqueue_struct *ib_unreg_wq;
/*
* Each of the three rwsem locks (devices, clients, client_data) protects the
@@ -1602,7 +1603,7 @@ void ib_unregister_device_queued(struct ib_device *ib_dev)
WARN_ON(!refcount_read(&ib_dev->refcount));
WARN_ON(!ib_dev->ops.dealloc_driver);
get_device(&ib_dev->dev);
if (!queue_work(system_unbound_wq, &ib_dev->unregistration_work))
if (!queue_work(ib_unreg_wq, &ib_dev->unregistration_work))
put_device(&ib_dev->dev);
}
EXPORT_SYMBOL(ib_unregister_device_queued);
@@ -2751,27 +2752,28 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
static int __init ib_core_init(void)
{
int ret;
int ret = -ENOMEM;
ib_wq = alloc_workqueue("infiniband", 0, 0);
if (!ib_wq)
return -ENOMEM;
ib_unreg_wq = alloc_workqueue("ib-unreg-wq", WQ_UNBOUND,
WQ_UNBOUND_MAX_ACTIVE);
if (!ib_unreg_wq)
goto err;
ib_comp_wq = alloc_workqueue("ib-comp-wq",
WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
if (!ib_comp_wq) {
ret = -ENOMEM;
goto err;
}
if (!ib_comp_wq)
goto err_unbound;
ib_comp_unbound_wq =
alloc_workqueue("ib-comp-unb-wq",
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM |
WQ_SYSFS, WQ_UNBOUND_MAX_ACTIVE);
if (!ib_comp_unbound_wq) {
ret = -ENOMEM;
if (!ib_comp_unbound_wq)
goto err_comp;
}
ret = class_register(&ib_class);
if (ret) {
@@ -2831,6 +2833,8 @@ err_comp_unbound:
destroy_workqueue(ib_comp_unbound_wq);
err_comp:
destroy_workqueue(ib_comp_wq);
err_unbound:
destroy_workqueue(ib_unreg_wq);
err:
destroy_workqueue(ib_wq);
return ret;
@@ -2852,7 +2856,7 @@ static void __exit ib_core_cleanup(void)
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
flush_workqueue(system_unbound_wq);
destroy_workqueue(ib_unreg_wq);
WARN_ON(!xa_empty(&clients));
WARN_ON(!xa_empty(&devices));
}

View File

@@ -1739,7 +1739,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!device)
return -EINVAL;
if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
ib_device_put(device);
return -EINVAL;
}

View File

@@ -1034,10 +1034,9 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
struct netlink_ext_ack *extack)
{
unsigned long flags;
struct ib_sa_query *query;
struct ib_sa_query *query = NULL, *iter;
struct ib_mad_send_buf *send_buf;
struct ib_mad_send_wc mad_send_wc;
int found = 0;
int ret;
if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
@@ -1045,20 +1044,21 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb,
return -EPERM;
spin_lock_irqsave(&ib_nl_request_lock, flags);
list_for_each_entry(query, &ib_nl_request_list, list) {
list_for_each_entry(iter, &ib_nl_request_list, list) {
/*
* If the query is cancelled, let the timeout routine
* take care of it.
*/
if (nlh->nlmsg_seq == query->seq) {
found = !ib_sa_query_cancelled(query);
if (found)
list_del(&query->list);
if (nlh->nlmsg_seq == iter->seq) {
if (!ib_sa_query_cancelled(iter)) {
list_del(&iter->list);
query = iter;
}
break;
}
}
if (!found) {
if (!query) {
spin_unlock_irqrestore(&ib_nl_request_lock, flags);
goto resp_out;
}

View File

@@ -455,7 +455,7 @@ retry:
break;
}
}
/* upon sucesss lock should stay on hold for the callee */
/* upon success lock should stay on hold for the callee */
if (!ret)
ret = dma_index - start_idx;
else

View File

@@ -337,7 +337,7 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->hw_ver = attr->hw_ver;
resp->max_qp = attr->max_qp;
resp->max_qp_wr = attr->max_qp_wr;
resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge);
resp->max_sge_rd = attr->max_sge_rd;
resp->max_cq = attr->max_cq;

View File

@@ -281,7 +281,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
}
rdma_restrack_add(&pd->res);
if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
if (device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY)
pd->local_dma_lkey = device->local_dma_lkey;
else
mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
@@ -308,7 +308,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
pd->__internal_mr = mr;
if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY))
if (!(device->attrs.kernel_cap_flags & IBK_LOCAL_DMA_LKEY))
pd->local_dma_lkey = pd->__internal_mr->lkey;
if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
@@ -2131,8 +2131,8 @@ struct ib_mr *ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mr *mr;
if (access_flags & IB_ACCESS_ON_DEMAND) {
if (!(pd->device->attrs.device_cap_flags &
IB_DEVICE_ON_DEMAND_PAGING)) {
if (!(pd->device->attrs.kernel_cap_flags &
IBK_ON_DEMAND_PAGING)) {
pr_debug("ODP support not available\n");
return ERR_PTR(-EINVAL);
}

View File

@@ -146,13 +146,13 @@ int bnxt_re_query_device(struct ib_device *ibdev,
| IB_DEVICE_RC_RNR_NAK_GEN
| IB_DEVICE_SHUTDOWN_PORT
| IB_DEVICE_SYS_IMAGE_GUID
| IB_DEVICE_LOCAL_DMA_LKEY
| IB_DEVICE_RESIZE_MAX_WR
| IB_DEVICE_PORT_ACTIVE_EVENT
| IB_DEVICE_N_NOTIFY_CQ
| IB_DEVICE_MEM_WINDOW
| IB_DEVICE_MEM_WINDOW_TYPE_2B
| IB_DEVICE_MEM_MGT_EXTENSIONS;
ib_attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
ib_attr->max_send_sge = dev_attr->max_qp_sges;
ib_attr->max_recv_sge = dev_attr->max_qp_sges;
ib_attr->max_sge_rd = dev_attr->max_qp_sges;

View File

@@ -314,7 +314,6 @@ enum db_state {
struct c4iw_dev {
struct ib_device ibdev;
struct c4iw_rdev rdev;
u32 device_cap_flags;
struct xarray cqs;
struct xarray qps;
struct xarray mrs;

View File

@@ -269,7 +269,10 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
dev->rdev.lldi.ports[0]->dev_addr);
props->hw_ver = CHELSIO_CHIP_RELEASE(dev->rdev.lldi.adapter_type);
props->fw_ver = dev->rdev.lldi.fw_vers;
props->device_cap_flags = dev->device_cap_flags;
props->device_cap_flags = IB_DEVICE_MEM_WINDOW;
props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
if (fastreg_support)
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
props->page_size_cap = T4_PAGESIZE_MASK;
props->vendor_id = (u32)dev->rdev.lldi.pdev->vendor;
props->vendor_part_id = (u32)dev->rdev.lldi.pdev->device;
@@ -529,9 +532,6 @@ void c4iw_register_device(struct work_struct *work)
pr_debug("c4iw_dev %p\n", dev);
addrconf_addr_eui48((u8 *)&dev->ibdev.node_guid,
dev->rdev.lldi.ports[0]->dev_addr);
dev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_WINDOW;
if (fastreg_support)
dev->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
dev->ibdev.local_dma_lkey = 0;
dev->ibdev.node_type = RDMA_NODE_RNIC;
BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX);

View File

@@ -137,61 +137,6 @@
#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \
HFI1_USER_SWMINOR)
#ifndef HFI1_KERN_TYPE
#define HFI1_KERN_TYPE 0
#endif
/*
* Similarly, this is the kernel version going back to the user. It's
* slightly different, in that we want to tell if the driver was built as
* part of a Intel release, or from the driver from openfabrics.org,
* kernel.org, or a standard distribution, for support reasons.
* The high bit is 0 for non-Intel and 1 for Intel-built/supplied.
*
* It's returned by the driver to the user code during initialization in the
* spi_sw_version field of hfi1_base_info, so the user code can in turn
* check for compatibility with the kernel.
*/
#define HFI1_KERN_SWVERSION ((HFI1_KERN_TYPE << 31) | HFI1_USER_SWVERSION)
/*
* Define the driver version number. This is something that refers only
* to the driver itself, not the software interfaces it supports.
*/
#ifndef HFI1_DRIVER_VERSION_BASE
#define HFI1_DRIVER_VERSION_BASE "0.9-294"
#endif
/* create the final driver version string */
#ifdef HFI1_IDSTR
#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE " " HFI1_IDSTR
#else
#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE
#endif
/*
* Diagnostics can send a packet by writing the following
* struct to the diag packet special file.
*
* This allows a custom PBC qword, so that special modes and deliberate
* changes to CRCs can be used.
*/
#define _DIAG_PKT_VERS 1
struct diag_pkt {
__u16 version; /* structure version */
__u16 unit; /* which device */
__u16 sw_index; /* send sw index to use */
__u16 len; /* data length, in bytes */
__u16 port; /* port number */
__u16 unused;
__u32 flags; /* call flags */
__u64 data; /* user data pointer */
__u64 pbc; /* PBC for the packet */
};
/* diag_pkt flags */
#define F_DIAGPKT_WAIT 0x1 /* wait until packet is sent */
/*
* The next set of defines are for packet headers, and chip register
* and memory bits that are visible to and/or used by user-mode software.

View File

@@ -29,12 +29,6 @@
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
/*
* The size has to be longer than this string, so we can append
* board/chip information to it in the initialization code.
*/
const char ib_hfi1_version[] = HFI1_DRIVER_VERSION "\n";
DEFINE_MUTEX(hfi1_mutex); /* general driver use */
unsigned int hfi1_max_mtu = HFI1_DEFAULT_MAX_MTU;

View File

@@ -72,7 +72,7 @@ static int read_efi_var(const char *name, unsigned long *size,
* is in the EFIVAR_FS code and may not be compiled in.
* However, even that is insufficient since it does not cover
* EFI_BUFFER_TOO_SMALL which could be an important return.
* For now, just split out succces or not found.
* For now, just split out success or not found.
*/
ret = status == EFI_SUCCESS ? 0 :
status == EFI_NOT_FOUND ? -ENOENT :

View File

@@ -265,6 +265,8 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from)
unsigned long dim = from->nr_segs;
int idx;
if (!HFI1_CAP_IS_KSET(SDMA))
return -EINVAL;
idx = srcu_read_lock(&fd->pq_srcu);
pq = srcu_dereference(fd->pq, &fd->pq_srcu);
if (!cq || !pq) {
@@ -1220,7 +1222,7 @@ static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len)
memset(&binfo, 0, sizeof(binfo));
binfo.hw_version = dd->revision;
binfo.sw_version = HFI1_KERN_SWVERSION;
binfo.sw_version = HFI1_USER_SWVERSION;
binfo.bthqp = RVT_KDETH_QP_PREFIX;
binfo.jkey = uctxt->jkey;
/*

View File

@@ -489,7 +489,7 @@ void set_link_ipg(struct hfi1_pportdata *ppd)
u16 shift, mult;
u64 src;
u32 current_egress_rate; /* Mbits /sec */
u32 max_pkt_time;
u64 max_pkt_time;
/*
* max_pkt_time is the maximum packet egress time in units
* of the fabric clock period 1/(805 MHz).

View File

@@ -1288,11 +1288,13 @@ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
kvfree(sde->tx_ring);
sde->tx_ring = NULL;
}
spin_lock_irq(&dd->sde_map_lock);
sdma_map_free(rcu_access_pointer(dd->sdma_map));
RCU_INIT_POINTER(dd->sdma_map, NULL);
spin_unlock_irq(&dd->sde_map_lock);
synchronize_rcu();
if (rcu_access_pointer(dd->sdma_map)) {
spin_lock_irq(&dd->sde_map_lock);
sdma_map_free(rcu_access_pointer(dd->sdma_map));
RCU_INIT_POINTER(dd->sdma_map, NULL);
spin_unlock_irq(&dd->sde_map_lock);
synchronize_rcu();
}
kfree(dd->per_sdma);
dd->per_sdma = NULL;

View File

@@ -1300,8 +1300,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE |
IB_DEVICE_MEM_MGT_EXTENSIONS |
IB_DEVICE_RDMA_NETDEV_OPA;
IB_DEVICE_MEM_MGT_EXTENSIONS;
rdi->dparms.props.kernel_cap_flags = IBK_RDMA_NETDEV_OPA;
rdi->dparms.props.page_size_cap = PAGE_SIZE;
rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3;
rdi->dparms.props.vendor_part_id = dd->pcidev->device;

View File

@@ -106,16 +106,6 @@ enum {
SERV_TYPE_XRC = 5,
};
enum hns_roce_qp_state {
HNS_ROCE_QP_STATE_RST,
HNS_ROCE_QP_STATE_INIT,
HNS_ROCE_QP_STATE_RTR,
HNS_ROCE_QP_STATE_RTS,
HNS_ROCE_QP_STATE_SQD,
HNS_ROCE_QP_STATE_ERR,
HNS_ROCE_QP_NUM_STATE,
};
enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_PATH_MIG = 0x01,
HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED = 0x02,
@@ -139,8 +129,6 @@ enum hns_roce_event {
HNS_ROCE_EVENT_TYPE_INVALID_XRCETH = 0x17,
};
#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
enum {
HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0),
HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1),
@@ -535,6 +523,11 @@ struct hns_roce_cmd_context {
u16 busy;
};
enum hns_roce_cmdq_state {
HNS_ROCE_CMDQ_STATE_NORMAL,
HNS_ROCE_CMDQ_STATE_FATAL_ERR,
};
struct hns_roce_cmdq {
struct dma_pool *pool;
struct semaphore poll_sem;
@@ -554,6 +547,7 @@ struct hns_roce_cmdq {
* close device, switch into poll mode(non event mode)
*/
u8 use_events;
enum hns_roce_cmdq_state state;
};
struct hns_roce_cmd_mailbox {
@@ -657,6 +651,11 @@ struct hns_roce_ceqe {
__le32 rsv[15];
};
#define CEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_ceqe, h, l)
#define CEQE_CQN CEQE_FIELD_LOC(23, 0)
#define CEQE_OWNER CEQE_FIELD_LOC(31, 31)
struct hns_roce_aeqe {
__le32 asyn;
union {
@@ -676,6 +675,13 @@ struct hns_roce_aeqe {
__le32 rsv[12];
};
#define AEQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_aeqe, h, l)
#define AEQE_EVENT_TYPE AEQE_FIELD_LOC(7, 0)
#define AEQE_SUB_TYPE AEQE_FIELD_LOC(15, 8)
#define AEQE_OWNER AEQE_FIELD_LOC(31, 31)
#define AEQE_EVENT_QUEUE_NUM AEQE_FIELD_LOC(55, 32)
struct hns_roce_eq {
struct hns_roce_dev *hr_dev;
void __iomem *db_reg;
@@ -725,7 +731,6 @@ struct hns_roce_caps {
u32 num_pi_qps;
u32 reserved_qps;
int num_qpc_timer;
int num_cqc_timer;
u32 num_srqs;
u32 max_wqes;
u32 max_srq_wrs;
@@ -1191,7 +1196,6 @@ void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, unsigned int n);
void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, unsigned int n);
bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, u32 nreq,
struct ib_cq *ib_cq);
enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state);
void hns_roce_lock_cqs(struct hns_roce_cq *send_cq,
struct hns_roce_cq *recv_cq);
void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq,

View File

@@ -149,8 +149,7 @@ static void set_atomic_seg(const struct ib_send_wr *wr,
aseg->cmp_data = 0;
}
roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
}
static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
@@ -271,8 +270,7 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
dseg += sizeof(struct hns_roce_v2_rc_send_wqe);
if (msg_len <= HNS_ROCE_V2_MAX_RC_INL_INN_SZ) {
roce_set_bit(rc_sq_wqe->byte_20,
V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 0);
hr_reg_clear(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
for (i = 0; i < wr->num_sge; i++) {
memcpy(dseg, ((void *)wr->sg_list[i].addr),
@@ -280,17 +278,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ib_send_wr *wr,
dseg += wr->sg_list[i].length;
}
} else {
roce_set_bit(rc_sq_wqe->byte_20,
V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S, 1);
hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_INL_TYPE);
ret = fill_ext_sge_inl_data(qp, wr, &curr_idx, msg_len);
if (ret)
return ret;
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S,
curr_idx - *sge_idx);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, curr_idx - *sge_idx);
}
*sge_idx = curr_idx;
@@ -309,12 +303,10 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
int j = 0;
int i;
roce_set_field(rc_sq_wqe->byte_20,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
(*sge_ind) & (qp->sge.sge_cnt - 1));
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_MSG_START_SGE_IDX,
(*sge_ind) & (qp->sge.sge_cnt - 1));
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_INLINE,
!!(wr->send_flags & IB_SEND_INLINE));
if (wr->send_flags & IB_SEND_INLINE)
return set_rc_inl(qp, wr, rc_sq_wqe, sge_ind);
@@ -339,9 +331,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr,
valid_num_sge - HNS_ROCE_SGE_IN_WQE);
}
roce_set_field(rc_sq_wqe->byte_16,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
return 0;
}
@@ -412,8 +402,7 @@ static int set_ud_opcode(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
ud_sq_wqe->immtdata = get_immtdata(wr);
roce_set_field(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OPCODE_M,
V2_UD_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
return 0;
}
@@ -424,21 +413,15 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
struct ib_device *ib_dev = ah->ibah.device;
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
roce_set_field(ud_sq_wqe->byte_24, V2_UD_SEND_WQE_BYTE_24_UDPSPN_M,
V2_UD_SEND_WQE_BYTE_24_UDPSPN_S, ah->av.udp_sport);
roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M,
V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S, ah->av.hop_limit);
roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M,
V2_UD_SEND_WQE_BYTE_36_TCLASS_S, ah->av.tclass);
roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M,
V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, ah->av.flowlabel);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_UDPSPN, ah->av.udp_sport);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel);
if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL))
return -EINVAL;
roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M,
V2_UD_SEND_WQE_BYTE_40_SL_S, ah->av.sl);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl);
ud_sq_wqe->sgid_index = ah->av.gid_index;
@@ -448,10 +431,8 @@ static int fill_ud_av(struct hns_roce_v2_ud_send_wqe *ud_sq_wqe,
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
return 0;
roce_set_bit(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S,
ah->av.vlan_en);
roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_VLAN_M,
V2_UD_SEND_WQE_BYTE_36_VLAN_S, ah->av.vlan_id);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN_EN, ah->av.vlan_en);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_VLAN, ah->av.vlan_id);
return 0;
}
@@ -476,27 +457,19 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
ud_sq_wqe->msg_len = cpu_to_le32(msg_len);
roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_CQE_S,
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_CQE,
!!(wr->send_flags & IB_SEND_SIGNALED));
roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_SE_S,
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SE,
!!(wr->send_flags & IB_SEND_SOLICITED));
roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_PD_M,
V2_UD_SEND_WQE_BYTE_16_PD_S, to_hr_pd(qp->ibqp.pd)->pdn);
roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M,
V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge);
roce_set_field(ud_sq_wqe->byte_20,
V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M,
V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
curr_idx & (qp->sge.sge_cnt - 1));
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_PD, to_hr_pd(qp->ibqp.pd)->pdn);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SGE_NUM, valid_num_sge);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_MSG_START_SGE_IDX,
curr_idx & (qp->sge.sge_cnt - 1));
ud_sq_wqe->qkey = cpu_to_le32(ud_wr(wr)->remote_qkey & 0x80000000 ?
qp->qkey : ud_wr(wr)->remote_qkey);
roce_set_field(ud_sq_wqe->byte_32, V2_UD_SEND_WQE_BYTE_32_DQPN_M,
V2_UD_SEND_WQE_BYTE_32_DQPN_S, ud_wr(wr)->remote_qpn);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_DQPN, ud_wr(wr)->remote_qpn);
ret = fill_ud_av(ud_sq_wqe, ah);
if (ret)
@@ -516,8 +489,7 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp,
dma_wmb();
*sge_idx = curr_idx;
roce_set_bit(ud_sq_wqe->byte_4, V2_UD_SEND_WQE_BYTE_4_OWNER_S,
owner_bit);
hr_reg_write(ud_sq_wqe, UD_SEND_WQE_OWNER, owner_bit);
return 0;
}
@@ -553,7 +525,7 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev,
ret = -EOPNOTSUPP;
break;
case IB_WR_LOCAL_INV:
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1);
hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_SO);
fallthrough;
case IB_WR_SEND_WITH_INV:
rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
@@ -565,11 +537,11 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev,
if (unlikely(ret))
return ret;
roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M,
V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op));
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OPCODE, to_hr_opcode(ib_op));
return ret;
}
static inline int set_rc_wqe(struct hns_roce_qp *qp,
const struct ib_send_wr *wr,
void *wqe, unsigned int *sge_idx,
@@ -590,13 +562,13 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
if (WARN_ON(ret))
return ret;
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FENCE_S,
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_FENCE,
(wr->send_flags & IB_SEND_FENCE) ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SE_S,
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SE,
(wr->send_flags & IB_SEND_SOLICITED) ? 1 : 0);
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_CQE_S,
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_CQE,
(wr->send_flags & IB_SEND_SIGNALED) ? 1 : 0);
if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@@ -616,8 +588,7 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp,
dma_wmb();
*sge_idx = curr_idx;
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OWNER_S,
owner_bit);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_OWNER, owner_bit);
return ret;
}
@@ -682,14 +653,11 @@ static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe;
/* All kinds of DirectWQE have the same header field layout */
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1);
roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M,
V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl);
roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M,
V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S,
qp->sl >> HNS_ROCE_SL_SHIFT);
roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M,
V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head);
hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_FLAG);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_L, qp->sl);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_DB_SL_H,
qp->sl >> HNS_ROCE_SL_SHIFT);
hr_reg_write(rc_sq_wqe, RC_SEND_WQE_WQE_INDEX, qp->sq.head);
hns_roce_write512(hr_dev, wqe, qp->sq.db_reg);
}
@@ -1265,6 +1233,16 @@ static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev)
return tail == priv->cmq.csq.head;
}
static void update_cmdq_status(struct hns_roce_dev *hr_dev)
{
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
handle->rinfo.instance_state == HNS_ROCE_STATE_INIT)
hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
}
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
struct hns_roce_cmq_desc *desc, int num)
{
@@ -1296,7 +1274,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
} while (++timeout < priv->cmq.tx_timeout);
if (hns_roce_cmq_csq_done(hr_dev)) {
for (ret = 0, i = 0; i < num; i++) {
ret = 0;
for (i = 0; i < num; i++) {
/* check the result of hardware write back */
desc[i] = csq->desc[tail++];
if (tail == csq->desc_num)
@@ -1318,6 +1297,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
csq->head, tail);
csq->head = tail;
update_cmdq_status(hr_dev);
ret = -EAGAIN;
}
@@ -1332,6 +1313,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
bool busy;
int ret;
if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
return -EIO;
if (!v2_chk_mbox_is_avail(hr_dev, &busy))
return busy ? -EBUSY : 0;
@@ -1499,7 +1483,7 @@ static void __hns_roce_function_clear(struct hns_roce_dev *hr_dev, int vf_id)
if (ret)
continue;
if (roce_get_bit(resp->func_done, FUNC_CLEAR_RST_FUN_DONE_S)) {
if (hr_reg_read(resp, FUNC_CLEAR_RST_FUN_DONE)) {
if (vf_id == 0)
hr_dev->is_reset = true;
return;
@@ -1510,7 +1494,7 @@ out:
hns_roce_func_clr_rst_proc(hr_dev, ret, fclr_write_fail_flag);
}
static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
static int hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
{
enum hns_roce_opcode_type opcode = HNS_ROCE_OPC_ALLOC_VF_RES;
struct hns_roce_cmq_desc desc[2];
@@ -1521,17 +1505,29 @@ static void hns_roce_free_vf_resource(struct hns_roce_dev *hr_dev, int vf_id)
desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false);
hr_reg_write(req_a, FUNC_RES_A_VF_ID, vf_id);
hns_roce_cmq_send(hr_dev, desc, 2);
return hns_roce_cmq_send(hr_dev, desc, 2);
}
static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
{
int ret;
int i;
if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
return;
for (i = hr_dev->func_num - 1; i >= 0; i--) {
__hns_roce_function_clear(hr_dev, i);
if (i != 0)
hns_roce_free_vf_resource(hr_dev, i);
if (i == 0)
continue;
ret = hns_roce_free_vf_resource(hr_dev, i);
if (ret)
ibdev_err(&hr_dev->ib_dev,
"failed to free vf resource, vf_id = %d, ret = %d.\n",
i, ret);
}
}
@@ -1757,17 +1753,16 @@ static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
swt = (struct hns_roce_vf_switch *)desc.data;
hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M,
VF_SWITCH_DATA_FUN_ID_VF_ID_S, vf_id);
hr_reg_write(swt, VF_SWITCH_VF_ID, vf_id);
ret = hns_roce_cmq_send(hr_dev, &desc, 1);
if (ret)
return ret;
desc.flag = cpu_to_le16(HNS_ROCE_CMD_FLAG_IN);
desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0);
roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
hr_reg_enable(swt, VF_SWITCH_ALW_LPBK);
hr_reg_clear(swt, VF_SWITCH_ALW_LCL_LPBK);
hr_reg_enable(swt, VF_SWITCH_ALW_DST_OVRD);
return hns_roce_cmq_send(hr_dev, &desc, 1);
}
@@ -1947,7 +1942,7 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM;
caps->num_qpc_timer = HNS_ROCE_V2_MAX_QPC_TIMER_NUM;
caps->num_cqc_timer = HNS_ROCE_V2_MAX_CQC_TIMER_NUM;
caps->cqc_timer_bt_num = HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM;
caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
@@ -2243,7 +2238,6 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg);
caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer);
caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer);
caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
caps->num_aeq_vectors = resp_a->num_aeq_vectors;
@@ -2270,87 +2264,39 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
ctx_hop_num = resp_b->ctx_hop_num;
pbl_hop_num = resp_b->pbl_hop_num;
caps->num_pds = 1 << roce_get_field(resp_c->cap_flags_num_pds,
V2_QUERY_PF_CAPS_C_NUM_PDS_M,
V2_QUERY_PF_CAPS_C_NUM_PDS_S);
caps->flags = roce_get_field(resp_c->cap_flags_num_pds,
V2_QUERY_PF_CAPS_C_CAP_FLAGS_M,
V2_QUERY_PF_CAPS_C_CAP_FLAGS_S);
caps->num_pds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_PDS);
caps->flags = hr_reg_read(resp_c, PF_CAPS_C_CAP_FLAGS);
caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) <<
HNS_ROCE_CAP_FLAGS_EX_SHIFT;
caps->num_cqs = 1 << roce_get_field(resp_c->max_gid_num_cqs,
V2_QUERY_PF_CAPS_C_NUM_CQS_M,
V2_QUERY_PF_CAPS_C_NUM_CQS_S);
caps->gid_table_len[0] = roce_get_field(resp_c->max_gid_num_cqs,
V2_QUERY_PF_CAPS_C_MAX_GID_M,
V2_QUERY_PF_CAPS_C_MAX_GID_S);
caps->max_cqes = 1 << roce_get_field(resp_c->cq_depth,
V2_QUERY_PF_CAPS_C_CQ_DEPTH_M,
V2_QUERY_PF_CAPS_C_CQ_DEPTH_S);
caps->num_mtpts = 1 << roce_get_field(resp_c->num_mrws,
V2_QUERY_PF_CAPS_C_NUM_MRWS_M,
V2_QUERY_PF_CAPS_C_NUM_MRWS_S);
caps->num_qps = 1 << roce_get_field(resp_c->ord_num_qps,
V2_QUERY_PF_CAPS_C_NUM_QPS_M,
V2_QUERY_PF_CAPS_C_NUM_QPS_S);
caps->max_qp_init_rdma = roce_get_field(resp_c->ord_num_qps,
V2_QUERY_PF_CAPS_C_MAX_ORD_M,
V2_QUERY_PF_CAPS_C_MAX_ORD_S);
caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS);
caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID);
caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH);
caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS);
caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS);
caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD);
caps->max_qp_dest_rdma = caps->max_qp_init_rdma;
caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
caps->num_srqs = 1 << roce_get_field(resp_d->wq_hop_num_max_srqs,
V2_QUERY_PF_CAPS_D_NUM_SRQS_M,
V2_QUERY_PF_CAPS_D_NUM_SRQS_S);
caps->cong_type = roce_get_field(resp_d->wq_hop_num_max_srqs,
V2_QUERY_PF_CAPS_D_CONG_TYPE_M,
V2_QUERY_PF_CAPS_D_CONG_TYPE_S);
caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE);
caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
caps->default_aeq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_AEQ_ARM_ST);
caps->default_ceq_arm_st = hr_reg_read(resp_d, PF_CAPS_D_CEQ_ARM_ST);
caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
caps->reserved_uars = hr_reg_read(resp_d, PF_CAPS_D_RSV_UARS);
caps->ceqe_depth = 1 << roce_get_field(resp_d->num_ceqs_ceq_depth,
V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M,
V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S);
caps->num_comp_vectors = roce_get_field(resp_d->num_ceqs_ceq_depth,
V2_QUERY_PF_CAPS_D_NUM_CEQS_M,
V2_QUERY_PF_CAPS_D_NUM_CEQS_S);
caps->aeqe_depth = 1 << roce_get_field(resp_d->arm_st_aeq_depth,
V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M,
V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S);
caps->default_aeq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth,
V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M,
V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S);
caps->default_ceq_arm_st = roce_get_field(resp_d->arm_st_aeq_depth,
V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M,
V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S);
caps->reserved_pds = roce_get_field(resp_d->num_uars_rsv_pds,
V2_QUERY_PF_CAPS_D_RSV_PDS_M,
V2_QUERY_PF_CAPS_D_RSV_PDS_S);
caps->num_uars = 1 << roce_get_field(resp_d->num_uars_rsv_pds,
V2_QUERY_PF_CAPS_D_NUM_UARS_M,
V2_QUERY_PF_CAPS_D_NUM_UARS_S);
caps->reserved_qps = roce_get_field(resp_d->rsv_uars_rsv_qps,
V2_QUERY_PF_CAPS_D_RSV_QPS_M,
V2_QUERY_PF_CAPS_D_RSV_QPS_S);
caps->reserved_uars = roce_get_field(resp_d->rsv_uars_rsv_qps,
V2_QUERY_PF_CAPS_D_RSV_UARS_M,
V2_QUERY_PF_CAPS_D_RSV_UARS_S);
caps->reserved_mrws = roce_get_field(resp_e->chunk_size_shift_rsv_mrws,
V2_QUERY_PF_CAPS_E_RSV_MRWS_M,
V2_QUERY_PF_CAPS_E_RSV_MRWS_S);
caps->chunk_sz = 1 << roce_get_field(resp_e->chunk_size_shift_rsv_mrws,
V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M,
V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S);
caps->reserved_cqs = roce_get_field(resp_e->rsv_cqs,
V2_QUERY_PF_CAPS_E_RSV_CQS_M,
V2_QUERY_PF_CAPS_E_RSV_CQS_S);
caps->reserved_srqs = roce_get_field(resp_e->rsv_srqs,
V2_QUERY_PF_CAPS_E_RSV_SRQS_M,
V2_QUERY_PF_CAPS_E_RSV_SRQS_S);
caps->reserved_lkey = roce_get_field(resp_e->rsv_lkey,
V2_QUERY_PF_CAPS_E_RSV_LKEYS_M,
V2_QUERY_PF_CAPS_E_RSV_LKEYS_S);
caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS);
caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT);
caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS);
caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS);
caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS);
caps->default_ceq_max_cnt = le16_to_cpu(resp_e->ceq_max_cnt);
caps->default_ceq_period = le16_to_cpu(resp_e->ceq_period);
caps->default_aeq_max_cnt = le16_to_cpu(resp_e->aeq_max_cnt);
@@ -2365,15 +2311,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->cqe_hop_num = pbl_hop_num;
caps->srqwqe_hop_num = pbl_hop_num;
caps->idx_hop_num = pbl_hop_num;
caps->wqe_sq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M,
V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S);
caps->wqe_sge_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M,
V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S);
caps->wqe_rq_hop_num = roce_get_field(resp_d->wq_hop_num_max_srqs,
V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M,
V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S);
caps->wqe_sq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_SQWQE_HOP_NUM);
caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM);
caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM);
return 0;
}
@@ -3000,6 +2940,9 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout,
mb_st = (struct hns_roce_mbox_status *)desc.data;
end = msecs_to_jiffies(timeout) + jiffies;
while (v2_chk_mbox_is_avail(hr_dev, &busy)) {
if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
return -EIO;
status = 0;
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST,
true);
@@ -3103,10 +3046,8 @@ static int config_sgid_table(struct hns_roce_dev *hr_dev,
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
roce_set_field(sgid_tb->table_idx_rsv, CFG_SGID_TB_TABLE_IDX_M,
CFG_SGID_TB_TABLE_IDX_S, gid_index);
roce_set_field(sgid_tb->vf_sgid_type_rsv, CFG_SGID_TB_VF_SGID_TYPE_M,
CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
hr_reg_write(sgid_tb, CFG_SGID_TB_TABLE_IDX, gid_index);
hr_reg_write(sgid_tb, CFG_SGID_TB_VF_SGID_TYPE, sgid_type);
copy_gid(&sgid_tb->vf_sgid_l, gid);
@@ -3141,19 +3082,14 @@ static int config_gmv_table(struct hns_roce_dev *hr_dev,
copy_gid(&tb_a->vf_sgid_l, gid);
roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_SGID_TYPE_M,
CFG_GMV_TB_VF_SGID_TYPE_S, sgid_type);
roce_set_bit(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_EN_S,
vlan_id < VLAN_CFI_MASK);
roce_set_field(tb_a->vf_sgid_type_vlan, CFG_GMV_TB_VF_VLAN_ID_M,
CFG_GMV_TB_VF_VLAN_ID_S, vlan_id);
hr_reg_write(tb_a, GMV_TB_A_VF_SGID_TYPE, sgid_type);
hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_EN, vlan_id < VLAN_CFI_MASK);
hr_reg_write(tb_a, GMV_TB_A_VF_VLAN_ID, vlan_id);
tb_b->vf_smac_l = cpu_to_le32(*(u32 *)mac);
roce_set_field(tb_b->vf_smac_h, CFG_GMV_TB_SMAC_H_M,
CFG_GMV_TB_SMAC_H_S, *(u16 *)&mac[4]);
roce_set_field(tb_b->table_idx_rsv, CFG_GMV_TB_SGID_IDX_M,
CFG_GMV_TB_SGID_IDX_S, gid_index);
hr_reg_write(tb_b, GMV_TB_B_SMAC_H, *(u16 *)&mac[4]);
hr_reg_write(tb_b, GMV_TB_B_SGID_IDX, gid_index);
return hns_roce_cmq_send(hr_dev, desc, 2);
}
@@ -3202,10 +3138,8 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
reg_smac_l = *(u32 *)(&addr[0]);
reg_smac_h = *(u16 *)(&addr[4]);
roce_set_field(smac_tb->tb_idx_rsv, CFG_SMAC_TB_IDX_M,
CFG_SMAC_TB_IDX_S, phy_port);
roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M,
CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
hr_reg_write(smac_tb, CFG_SMAC_TB_IDX, phy_port);
hr_reg_write(smac_tb, CFG_SMAC_TB_VF_SMAC_H, reg_smac_h);
smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l);
return hns_roce_cmq_send(hr_dev, &desc, 1);
@@ -3234,21 +3168,15 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
mpt_entry->pbl_ba_l = cpu_to_le32(pbl_ba >> 3);
roce_set_field(mpt_entry->byte_48_mode_ba,
V2_MPT_BYTE_48_PBL_BA_H_M, V2_MPT_BYTE_48_PBL_BA_H_S,
upper_32_bits(pbl_ba >> 3));
hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
mpt_entry->pa0_l = cpu_to_le32(lower_32_bits(pages[0]));
roce_set_field(mpt_entry->byte_56_pa0_h, V2_MPT_BYTE_56_PA0_H_M,
V2_MPT_BYTE_56_PA0_H_S, upper_32_bits(pages[0]));
hr_reg_write(mpt_entry, MPT_PA0_H, upper_32_bits(pages[0]));
mpt_entry->pa1_l = cpu_to_le32(lower_32_bits(pages[1]));
roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PA1_H_M,
V2_MPT_BYTE_64_PA1_H_S, upper_32_bits(pages[1]));
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
hr_reg_write(mpt_entry, MPT_PA1_H, upper_32_bits(pages[1]));
hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
return 0;
}
@@ -3257,7 +3185,6 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
void *mb_buf, struct hns_roce_mr *mr)
{
struct hns_roce_v2_mpt_entry *mpt_entry;
int ret;
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
@@ -3296,9 +3223,7 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD);
ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
return ret;
return set_mtpt_pbl(hr_dev, mpt_entry, mr);
}
static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
@@ -3309,24 +3234,19 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev,
u32 mr_access_flags = mr->access;
int ret = 0;
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
hr_reg_write(mpt_entry, MPT_PD, mr->pd);
if (flags & IB_MR_REREG_ACCESS) {
roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
V2_MPT_BYTE_8_BIND_EN_S,
hr_reg_write(mpt_entry, MPT_BIND_EN,
(mr_access_flags & IB_ACCESS_MW_BIND ? 1 : 0));
roce_set_bit(mpt_entry->byte_8_mw_cnt_en,
V2_MPT_BYTE_8_ATOMIC_EN_S,
hr_reg_write(mpt_entry, MPT_ATOMIC_EN,
mr_access_flags & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S,
hr_reg_write(mpt_entry, MPT_RR_EN,
mr_access_flags & IB_ACCESS_REMOTE_READ ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S,
hr_reg_write(mpt_entry, MPT_RW_EN,
mr_access_flags & IB_ACCESS_REMOTE_WRITE ? 1 : 0);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S,
hr_reg_write(mpt_entry, MPT_LW_EN,
mr_access_flags & IB_ACCESS_LOCAL_WRITE ? 1 : 0);
}
@@ -3357,37 +3277,28 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
return -ENOBUFS;
}
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
V2_MPT_BYTE_4_PBL_HOP_NUM_S, 1);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
hr_reg_write(mpt_entry, MPT_PD, mr->pd);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
hr_reg_enable(mpt_entry, MPT_RA_EN);
hr_reg_enable(mpt_entry, MPT_R_INV_EN);
hr_reg_enable(mpt_entry, MPT_L_INV_EN);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_FRE_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
hr_reg_enable(mpt_entry, MPT_FRE);
hr_reg_clear(mpt_entry, MPT_MR_MW);
hr_reg_enable(mpt_entry, MPT_BPD);
hr_reg_clear(mpt_entry, MPT_PA);
hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, 1);
hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift));
hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
mpt_entry->pbl_size = cpu_to_le32(mr->npages);
mpt_entry->pbl_ba_l = cpu_to_le32(lower_32_bits(pbl_ba >> 3));
roce_set_field(mpt_entry->byte_48_mode_ba, V2_MPT_BYTE_48_PBL_BA_H_M,
V2_MPT_BYTE_48_PBL_BA_H_S,
upper_32_bits(pbl_ba >> 3));
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift));
hr_reg_write(mpt_entry, MPT_PBL_BA_H, upper_32_bits(pbl_ba >> 3));
return 0;
}
@@ -3399,36 +3310,29 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
mpt_entry = mb_buf;
memset(mpt_entry, 0, sizeof(*mpt_entry));
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M,
V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_FREE);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mw->pdn);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M,
V2_MPT_BYTE_4_PBL_HOP_NUM_S,
mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
mw->pbl_hop_num);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_FREE);
hr_reg_write(mpt_entry, MPT_PD, mw->pdn);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1);
roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, 1);
hr_reg_enable(mpt_entry, MPT_R_INV_EN);
hr_reg_enable(mpt_entry, MPT_L_INV_EN);
hr_reg_enable(mpt_entry, MPT_LW_EN);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, 0);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_MR_MW_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BPD_S, 1);
roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_BQP_S,
hr_reg_enable(mpt_entry, MPT_MR_MW);
hr_reg_enable(mpt_entry, MPT_BPD);
hr_reg_clear(mpt_entry, MPT_PA);
hr_reg_write(mpt_entry, MPT_BQP,
mw->ibmw.type == IB_MW_TYPE_1 ? 0 : 1);
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
mpt_entry->lkey = cpu_to_le32(mw->rkey);
hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM,
mw->pbl_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
mw->pbl_hop_num);
hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ,
mw->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
hr_reg_write(mpt_entry, MPT_PBL_BUF_PG_SZ,
mw->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
return 0;
}
@@ -4966,9 +4870,7 @@ static int hns_roce_v2_set_path(struct ib_qp *ibqp,
if (ret)
return ret;
if (gid_attr)
is_udp = (gid_attr->gid_type ==
IB_GID_TYPE_ROCE_UDP_ENCAP);
is_udp = (gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
}
/* Only HIP08 needs to set the vlan_en bits in QPC */
@@ -5949,7 +5851,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq)
(eq->cons_index & (eq->entries - 1)) *
eq->eqe_size);
return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^
return (hr_reg_read(aeqe, AEQE_OWNER) ^
!!(eq->cons_index & eq->entries)) ? aeqe : NULL;
}
@@ -5969,15 +5871,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
*/
dma_rmb();
event_type = roce_get_field(aeqe->asyn,
HNS_ROCE_V2_AEQE_EVENT_TYPE_M,
HNS_ROCE_V2_AEQE_EVENT_TYPE_S);
sub_type = roce_get_field(aeqe->asyn,
HNS_ROCE_V2_AEQE_SUB_TYPE_M,
HNS_ROCE_V2_AEQE_SUB_TYPE_S);
queue_num = roce_get_field(aeqe->event.queue_event.num,
HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
event_type = hr_reg_read(aeqe, AEQE_EVENT_TYPE);
sub_type = hr_reg_read(aeqe, AEQE_SUB_TYPE);
queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -6037,8 +5933,8 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq)
(eq->cons_index & (eq->entries - 1)) *
eq->eqe_size);
return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^
(!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
return (hr_reg_read(ceqe, CEQE_OWNER) ^
!!(eq->cons_index & eq->entries)) ? ceqe : NULL;
}
static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
@@ -6054,8 +5950,7 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev,
*/
dma_rmb();
cqn = roce_get_field(ceqe->comp, HNS_ROCE_V2_CEQE_COMP_CQN_M,
HNS_ROCE_V2_CEQE_COMP_CQN_S);
cqn = hr_reg_read(ceqe, CEQE_CQN);
hns_roce_cq_completion(hr_dev, cqn);

View File

@@ -41,7 +41,7 @@
#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
#define HNS_ROCE_V2_MAX_SRQ_SGE 64
#define HNS_ROCE_V2_MAX_CQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQC_TIMER_NUM 0x100
#define HNS_ROCE_V2_MAX_CQC_TIMER_BT_NUM 0x100
#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQE_NUM 0x400000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 64
@@ -303,33 +303,6 @@ struct hns_roce_v2_cq_context {
#define HNS_ROCE_V2_CQ_DEFAULT_BURST_NUM 0x0
#define HNS_ROCE_V2_CQ_DEFAULT_INTERVAL 0x0
#define V2_CQC_BYTE_4_ARM_ST_S 6
#define V2_CQC_BYTE_4_ARM_ST_M GENMASK(7, 6)
#define V2_CQC_BYTE_4_CEQN_S 15
#define V2_CQC_BYTE_4_CEQN_M GENMASK(23, 15)
#define V2_CQC_BYTE_8_CQN_S 0
#define V2_CQC_BYTE_8_CQN_M GENMASK(23, 0)
#define V2_CQC_BYTE_16_CQE_HOP_NUM_S 30
#define V2_CQC_BYTE_16_CQE_HOP_NUM_M GENMASK(31, 30)
#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S 0
#define V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M GENMASK(23, 0)
#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S 0
#define V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M GENMASK(23, 0)
#define V2_CQC_BYTE_52_CQE_CNT_S 0
#define V2_CQC_BYTE_52_CQE_CNT_M GENMASK(23, 0)
#define V2_CQC_BYTE_56_CQ_MAX_CNT_S 0
#define V2_CQC_BYTE_56_CQ_MAX_CNT_M GENMASK(15, 0)
#define V2_CQC_BYTE_56_CQ_PERIOD_S 16
#define V2_CQC_BYTE_56_CQ_PERIOD_M GENMASK(31, 16)
#define CQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_cq_context, h, l)
#define CQC_CQ_ST CQC_FIELD_LOC(1, 0)
@@ -788,12 +761,15 @@ struct hns_roce_v2_mpt_entry {
#define MPT_LKEY MPT_FIELD_LOC(223, 192)
#define MPT_VA MPT_FIELD_LOC(287, 224)
#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288)
#define MPT_PBL_BA MPT_FIELD_LOC(380, 320)
#define MPT_PBL_BA_L MPT_FIELD_LOC(351, 320)
#define MPT_PBL_BA_H MPT_FIELD_LOC(380, 352)
#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381)
#define MPT_RSV0 MPT_FIELD_LOC(383, 382)
#define MPT_PA0 MPT_FIELD_LOC(441, 384)
#define MPT_PA0_L MPT_FIELD_LOC(415, 384)
#define MPT_PA0_H MPT_FIELD_LOC(441, 416)
#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442)
#define MPT_PA1 MPT_FIELD_LOC(505, 448)
#define MPT_PA1_L MPT_FIELD_LOC(479, 448)
#define MPT_PA1_H MPT_FIELD_LOC(505, 480)
#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506)
#define MPT_RSV2 MPT_FIELD_LOC(507, 507)
#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508)
@@ -899,48 +875,24 @@ struct hns_roce_v2_ud_send_wqe {
u8 dgid[GID_LEN_V2];
};
#define V2_UD_SEND_WQE_BYTE_4_OPCODE_S 0
#define V2_UD_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
#define UD_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_ud_send_wqe, h, l)
#define V2_UD_SEND_WQE_BYTE_4_OWNER_S 7
#define V2_UD_SEND_WQE_BYTE_4_CQE_S 8
#define V2_UD_SEND_WQE_BYTE_4_SE_S 11
#define V2_UD_SEND_WQE_BYTE_16_PD_S 0
#define V2_UD_SEND_WQE_BYTE_16_PD_M GENMASK(23, 0)
#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S 24
#define V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_S 16
#define V2_UD_SEND_WQE_BYTE_24_UDPSPN_M GENMASK(31, 16)
#define V2_UD_SEND_WQE_BYTE_32_DQPN_S 0
#define V2_UD_SEND_WQE_BYTE_32_DQPN_M GENMASK(23, 0)
#define V2_UD_SEND_WQE_BYTE_36_VLAN_S 0
#define V2_UD_SEND_WQE_BYTE_36_VLAN_M GENMASK(15, 0)
#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_S 16
#define V2_UD_SEND_WQE_BYTE_36_HOPLIMIT_M GENMASK(23, 16)
#define V2_UD_SEND_WQE_BYTE_36_TCLASS_S 24
#define V2_UD_SEND_WQE_BYTE_36_TCLASS_M GENMASK(31, 24)
#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S 0
#define V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M GENMASK(19, 0)
#define V2_UD_SEND_WQE_BYTE_40_SL_S 20
#define V2_UD_SEND_WQE_BYTE_40_SL_M GENMASK(23, 20)
#define V2_UD_SEND_WQE_BYTE_40_UD_VLAN_EN_S 30
#define V2_UD_SEND_WQE_BYTE_40_LBI_S 31
#define UD_SEND_WQE_OPCODE UD_SEND_WQE_FIELD_LOC(4, 0)
#define UD_SEND_WQE_OWNER UD_SEND_WQE_FIELD_LOC(7, 7)
#define UD_SEND_WQE_CQE UD_SEND_WQE_FIELD_LOC(8, 8)
#define UD_SEND_WQE_SE UD_SEND_WQE_FIELD_LOC(11, 11)
#define UD_SEND_WQE_PD UD_SEND_WQE_FIELD_LOC(119, 96)
#define UD_SEND_WQE_SGE_NUM UD_SEND_WQE_FIELD_LOC(127, 120)
#define UD_SEND_WQE_MSG_START_SGE_IDX UD_SEND_WQE_FIELD_LOC(151, 128)
#define UD_SEND_WQE_UDPSPN UD_SEND_WQE_FIELD_LOC(191, 176)
#define UD_SEND_WQE_DQPN UD_SEND_WQE_FIELD_LOC(247, 224)
#define UD_SEND_WQE_VLAN UD_SEND_WQE_FIELD_LOC(271, 256)
#define UD_SEND_WQE_HOPLIMIT UD_SEND_WQE_FIELD_LOC(279, 272)
#define UD_SEND_WQE_TCLASS UD_SEND_WQE_FIELD_LOC(287, 280)
#define UD_SEND_WQE_FLOW_LABEL UD_SEND_WQE_FIELD_LOC(307, 288)
#define UD_SEND_WQE_SL UD_SEND_WQE_FIELD_LOC(311, 308)
#define UD_SEND_WQE_VLAN_EN UD_SEND_WQE_FIELD_LOC(318, 318)
#define UD_SEND_WQE_LBI UD_SEND_WQE_FIELD_LOC(319, 319)
struct hns_roce_v2_rc_send_wqe {
__le32 byte_4;
@@ -955,42 +907,23 @@ struct hns_roce_v2_rc_send_wqe {
__le64 va;
};
#define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0
#define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0)
#define RC_SEND_WQE_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_rc_send_wqe, h, l)
#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5
#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5)
#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13
#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13)
#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15
#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15)
#define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7
#define V2_RC_SEND_WQE_BYTE_4_CQE_S 8
#define V2_RC_SEND_WQE_BYTE_4_FENCE_S 9
#define V2_RC_SEND_WQE_BYTE_4_SO_S 10
#define V2_RC_SEND_WQE_BYTE_4_SE_S 11
#define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12
#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0
#define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0)
#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S 24
#define V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M GENMASK(31, 24)
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S 0
#define V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M GENMASK(23, 0)
#define V2_RC_SEND_WQE_BYTE_20_INL_TYPE_S 31
#define RC_SEND_WQE_OPCODE RC_SEND_WQE_FIELD_LOC(4, 0)
#define RC_SEND_WQE_DB_SL_L RC_SEND_WQE_FIELD_LOC(6, 5)
#define RC_SEND_WQE_DB_SL_H RC_SEND_WQE_FIELD_LOC(14, 13)
#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10)
#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)
#define RC_SEND_WQE_FLAG RC_SEND_WQE_FIELD_LOC(31, 31)
#define RC_SEND_WQE_XRC_SRQN RC_SEND_WQE_FIELD_LOC(119, 96)
#define RC_SEND_WQE_SGE_NUM RC_SEND_WQE_FIELD_LOC(127, 120)
#define RC_SEND_WQE_MSG_START_SGE_IDX RC_SEND_WQE_FIELD_LOC(151, 128)
#define RC_SEND_WQE_INL_TYPE RC_SEND_WQE_FIELD_LOC(159, 159)
struct hns_roce_wqe_frmr_seg {
__le32 pbl_size;
@@ -1033,7 +966,10 @@ struct hns_roce_func_clear {
__le32 rsv[4];
};
#define FUNC_CLEAR_RST_FUN_DONE_S 0
#define FUNC_CLEAR_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_func_clear, h, l)
#define FUNC_CLEAR_RST_FUN_DONE FUNC_CLEAR_FIELD_LOC(32, 32)
/* Each physical function manages up to 248 virtual functions, it takes up to
* 100ms for each function to execute clear. If an abnormal reset occurs, it is
* executed twice at most, so it takes up to 249 * 2 * 100ms.
@@ -1112,12 +1048,12 @@ struct hns_roce_vf_switch {
__le32 resv3;
};
#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3
#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3)
#define VF_SWITCH_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_vf_switch, h, l)
#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1
#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2
#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3
#define VF_SWITCH_VF_ID VF_SWITCH_FIELD_LOC(42, 35)
#define VF_SWITCH_ALW_LPBK VF_SWITCH_FIELD_LOC(65, 65)
#define VF_SWITCH_ALW_LCL_LPBK VF_SWITCH_FIELD_LOC(66, 66)
#define VF_SWITCH_ALW_DST_OVRD VF_SWITCH_FIELD_LOC(67, 67)
struct hns_roce_post_mbox {
__le32 in_param_l;
@@ -1180,11 +1116,10 @@ struct hns_roce_cfg_sgid_tb {
__le32 vf_sgid_type_rsv;
};
#define CFG_SGID_TB_TABLE_IDX_S 0
#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
#define SGID_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_sgid_tb, h, l)
#define CFG_SGID_TB_VF_SGID_TYPE_S 0
#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0)
#define CFG_SGID_TB_TABLE_IDX SGID_TB_FIELD_LOC(7, 0)
#define CFG_SGID_TB_VF_SGID_TYPE SGID_TB_FIELD_LOC(161, 160)
struct hns_roce_cfg_smac_tb {
__le32 tb_idx_rsv;
@@ -1192,11 +1127,11 @@ struct hns_roce_cfg_smac_tb {
__le32 vf_smac_h_rsv;
__le32 rsv[3];
};
#define CFG_SMAC_TB_IDX_S 0
#define CFG_SMAC_TB_IDX_M GENMASK(7, 0)
#define CFG_SMAC_TB_VF_SMAC_H_S 0
#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
#define SMAC_TB_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_smac_tb, h, l)
#define CFG_SMAC_TB_IDX SMAC_TB_FIELD_LOC(7, 0)
#define CFG_SMAC_TB_VF_SMAC_H SMAC_TB_FIELD_LOC(79, 64)
struct hns_roce_cfg_gmv_tb_a {
__le32 vf_sgid_l;
@@ -1207,16 +1142,11 @@ struct hns_roce_cfg_gmv_tb_a {
__le32 resv;
};
#define CFG_GMV_TB_SGID_IDX_S 0
#define CFG_GMV_TB_SGID_IDX_M GENMASK(7, 0)
#define GMV_TB_A_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_a, h, l)
#define CFG_GMV_TB_VF_SGID_TYPE_S 0
#define CFG_GMV_TB_VF_SGID_TYPE_M GENMASK(1, 0)
#define CFG_GMV_TB_VF_VLAN_EN_S 2
#define CFG_GMV_TB_VF_VLAN_ID_S 16
#define CFG_GMV_TB_VF_VLAN_ID_M GENMASK(27, 16)
#define GMV_TB_A_VF_SGID_TYPE GMV_TB_A_FIELD_LOC(129, 128)
#define GMV_TB_A_VF_VLAN_EN GMV_TB_A_FIELD_LOC(130, 130)
#define GMV_TB_A_VF_VLAN_ID GMV_TB_A_FIELD_LOC(155, 144)
struct hns_roce_cfg_gmv_tb_b {
__le32 vf_smac_l;
@@ -1225,8 +1155,10 @@ struct hns_roce_cfg_gmv_tb_b {
__le32 resv[3];
};
#define CFG_GMV_TB_SMAC_H_S 0
#define CFG_GMV_TB_SMAC_H_M GENMASK(15, 0)
#define GMV_TB_B_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_cfg_gmv_tb_b, h, l)
#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32)
#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64)
#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5
struct hns_roce_query_pf_caps_a {
@@ -1278,29 +1210,17 @@ struct hns_roce_query_pf_caps_c {
__le16 rq_depth;
};
#define V2_QUERY_PF_CAPS_C_NUM_PDS_S 0
#define V2_QUERY_PF_CAPS_C_NUM_PDS_M GENMASK(19, 0)
#define PF_CAPS_C_FIELD_LOC(h, l) \
FIELD_LOC(struct hns_roce_query_pf_caps_c, h, l)
#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_S 20
#define V2_QUERY_PF_CAPS_C_CAP_FLAGS_M GENMASK(31, 20)
#define V2_QUERY_PF_CAPS_C_NUM_CQS_S 0
#define V2_QUERY_PF_CAPS_C_NUM_CQS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_C_MAX_GID_S 20
#define V2_QUERY_PF_CAPS_C_MAX_GID_M GENMASK(28, 20)
#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_S 0
#define V2_QUERY_PF_CAPS_C_CQ_DEPTH_M GENMASK(22, 0)
#define V2_QUERY_PF_CAPS_C_NUM_MRWS_S 0
#define V2_QUERY_PF_CAPS_C_NUM_MRWS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_C_NUM_QPS_S 0
#define V2_QUERY_PF_CAPS_C_NUM_QPS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_C_MAX_ORD_S 20
#define V2_QUERY_PF_CAPS_C_MAX_ORD_M GENMASK(27, 20)
#define PF_CAPS_C_NUM_PDS PF_CAPS_C_FIELD_LOC(19, 0)
#define PF_CAPS_C_CAP_FLAGS PF_CAPS_C_FIELD_LOC(31, 20)
#define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32)
#define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52)
#define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64)
#define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96)
#define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128)
#define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148)
struct hns_roce_query_pf_caps_d {
__le32 wq_hop_num_max_srqs;
@@ -1311,20 +1231,26 @@ struct hns_roce_query_pf_caps_d {
__le32 num_uars_rsv_pds;
__le32 rsv_uars_rsv_qps;
};
#define V2_QUERY_PF_CAPS_D_NUM_SRQS_S 0
#define V2_QUERY_PF_CAPS_D_NUM_SRQS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_S 20
#define V2_QUERY_PF_CAPS_D_RQWQE_HOP_NUM_M GENMASK(21, 20)
#define PF_CAPS_D_FIELD_LOC(h, l) \
FIELD_LOC(struct hns_roce_query_pf_caps_d, h, l)
#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_S 22
#define V2_QUERY_PF_CAPS_D_EX_SGE_HOP_NUM_M GENMASK(23, 22)
#define PF_CAPS_D_NUM_SRQS PF_CAPS_D_FIELD_LOC(19, 0)
#define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
#define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
#define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26)
#define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
#define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
#define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
#define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
#define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
#define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
#define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
#define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
#define PF_CAPS_D_RSV_UARS PF_CAPS_D_FIELD_LOC(187, 180)
#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_S 24
#define V2_QUERY_PF_CAPS_D_SQWQE_HOP_NUM_M GENMASK(25, 24)
#define V2_QUERY_PF_CAPS_D_CONG_TYPE_S 26
#define V2_QUERY_PF_CAPS_D_CONG_TYPE_M GENMASK(29, 26)
#define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12
struct hns_roce_congestion_algorithm {
u8 alg_sel;
@@ -1333,33 +1259,6 @@ struct hns_roce_congestion_algorithm {
u8 wnd_mode_sel;
};
#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_S 0
#define V2_QUERY_PF_CAPS_D_CEQ_DEPTH_M GENMASK(21, 0)
#define V2_QUERY_PF_CAPS_D_NUM_CEQS_S 22
#define V2_QUERY_PF_CAPS_D_NUM_CEQS_M GENMASK(31, 22)
#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_S 0
#define V2_QUERY_PF_CAPS_D_AEQ_DEPTH_M GENMASK(21, 0)
#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_S 22
#define V2_QUERY_PF_CAPS_D_AEQ_ARM_ST_M GENMASK(23, 22)
#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_S 24
#define V2_QUERY_PF_CAPS_D_CEQ_ARM_ST_M GENMASK(25, 24)
#define V2_QUERY_PF_CAPS_D_RSV_PDS_S 0
#define V2_QUERY_PF_CAPS_D_RSV_PDS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_D_NUM_UARS_S 20
#define V2_QUERY_PF_CAPS_D_NUM_UARS_M GENMASK(27, 20)
#define V2_QUERY_PF_CAPS_D_RSV_QPS_S 0
#define V2_QUERY_PF_CAPS_D_RSV_QPS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_D_RSV_UARS_S 20
#define V2_QUERY_PF_CAPS_D_RSV_UARS_M GENMASK(27, 20)
struct hns_roce_query_pf_caps_e {
__le32 chunk_size_shift_rsv_mrws;
__le32 rsv_cqs;
@@ -1371,20 +1270,14 @@ struct hns_roce_query_pf_caps_e {
__le16 aeq_period;
};
#define V2_QUERY_PF_CAPS_E_RSV_MRWS_S 0
#define V2_QUERY_PF_CAPS_E_RSV_MRWS_M GENMASK(19, 0)
#define PF_CAPS_E_FIELD_LOC(h, l) \
FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l)
#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_S 20
#define V2_QUERY_PF_CAPS_E_CHUNK_SIZE_SHIFT_M GENMASK(31, 20)
#define V2_QUERY_PF_CAPS_E_RSV_CQS_S 0
#define V2_QUERY_PF_CAPS_E_RSV_CQS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_E_RSV_SRQS_S 0
#define V2_QUERY_PF_CAPS_E_RSV_SRQS_M GENMASK(19, 0)
#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_S 0
#define V2_QUERY_PF_CAPS_E_RSV_LKEYS_M GENMASK(19, 0)
#define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0)
#define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20)
#define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32)
#define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64)
#define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96)
struct hns_roce_cmq_req {
__le32 data[6];
@@ -1485,9 +1378,6 @@ struct hns_roce_dip {
#define HNS_ROCE_EQ_INIT_CONS_IDX 0
#define HNS_ROCE_EQ_INIT_NXT_EQE_BA 0
#define HNS_ROCE_V2_CEQ_CEQE_OWNER_S 31
#define HNS_ROCE_V2_AEQ_AEQE_OWNER_S 31
#define HNS_ROCE_V2_COMP_EQE_NUM 0x1000
#define HNS_ROCE_V2_ASYNC_EQE_NUM 0x1000
@@ -1544,18 +1434,6 @@ struct hns_roce_eq_context {
#define EQC_NEX_EQE_BA_H EQC_FIELD_LOC(339, 320)
#define EQC_EQE_SIZE EQC_FIELD_LOC(341, 340)
#define HNS_ROCE_V2_CEQE_COMP_CQN_S 0
#define HNS_ROCE_V2_CEQE_COMP_CQN_M GENMASK(23, 0)
#define HNS_ROCE_V2_AEQE_EVENT_TYPE_S 0
#define HNS_ROCE_V2_AEQE_EVENT_TYPE_M GENMASK(7, 0)
#define HNS_ROCE_V2_AEQE_SUB_TYPE_S 8
#define HNS_ROCE_V2_AEQE_SUB_TYPE_M GENMASK(15, 8)
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S 0
#define HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M GENMASK(23, 0)
#define MAX_SERVICE_LEVEL 0x7
struct hns_roce_wqe_atomic_seg {

View File

@@ -737,7 +737,7 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cqc_timer_table,
HEM_TYPE_CQC_TIMER,
hr_dev->caps.cqc_timer_entry_sz,
hr_dev->caps.num_cqc_timer, 1);
hr_dev->caps.cqc_timer_bt_num, 1);
if (ret) {
dev_err(dev,
"Failed to init CQC timer memory, aborting.\n");

View File

@@ -340,7 +340,6 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
struct hns_roce_mr *mr = to_hr_mr(ibmr);
int ret = 0;
if (hr_dev->hw->dereg_mr)
hr_dev->hw->dereg_mr(hr_dev);
@@ -348,7 +347,7 @@ int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
hns_roce_mr_free(hr_dev, mr);
kfree(mr);
return ret;
return 0;
}
struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,

View File

@@ -243,26 +243,6 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
return 0;
}
enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state)
{
switch (state) {
case IB_QPS_RESET:
return HNS_ROCE_QP_STATE_RST;
case IB_QPS_INIT:
return HNS_ROCE_QP_STATE_INIT;
case IB_QPS_RTR:
return HNS_ROCE_QP_STATE_RTR;
case IB_QPS_RTS:
return HNS_ROCE_QP_STATE_RTS;
case IB_QPS_SQD:
return HNS_ROCE_QP_STATE_SQD;
case IB_QPS_ERR:
return HNS_ROCE_QP_STATE_ERR;
default:
return HNS_ROCE_QP_NUM_STATE;
}
}
static void add_qp_to_list(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
struct ib_cq *send_cq, struct ib_cq *recv_cq)

View File

@@ -13,61 +13,40 @@ static int hns_roce_fill_cq(struct sk_buff *msg,
struct hns_roce_v2_cq_context *context)
{
if (rdma_nl_put_driver_u32(msg, "state",
roce_get_field(context->byte_4_pg_ceqn,
V2_CQC_BYTE_4_ARM_ST_M,
V2_CQC_BYTE_4_ARM_ST_S)))
hr_reg_read(context, CQC_ARM_ST)))
goto err;
if (rdma_nl_put_driver_u32(msg, "ceqn",
roce_get_field(context->byte_4_pg_ceqn,
V2_CQC_BYTE_4_CEQN_M,
V2_CQC_BYTE_4_CEQN_S)))
hr_reg_read(context, CQC_CEQN)))
goto err;
if (rdma_nl_put_driver_u32(msg, "cqn",
roce_get_field(context->byte_8_cqn,
V2_CQC_BYTE_8_CQN_M,
V2_CQC_BYTE_8_CQN_S)))
hr_reg_read(context, CQC_CQN)))
goto err;
if (rdma_nl_put_driver_u32(msg, "hopnum",
roce_get_field(context->byte_16_hop_addr,
V2_CQC_BYTE_16_CQE_HOP_NUM_M,
V2_CQC_BYTE_16_CQE_HOP_NUM_S)))
hr_reg_read(context, CQC_CQE_HOP_NUM)))
goto err;
if (rdma_nl_put_driver_u32(
msg, "pi",
roce_get_field(context->byte_28_cq_pi,
V2_CQC_BYTE_28_CQ_PRODUCER_IDX_M,
V2_CQC_BYTE_28_CQ_PRODUCER_IDX_S)))
if (rdma_nl_put_driver_u32(msg, "pi",
hr_reg_read(context, CQC_CQ_PRODUCER_IDX)))
goto err;
if (rdma_nl_put_driver_u32(
msg, "ci",
roce_get_field(context->byte_32_cq_ci,
V2_CQC_BYTE_32_CQ_CONSUMER_IDX_M,
V2_CQC_BYTE_32_CQ_CONSUMER_IDX_S)))
if (rdma_nl_put_driver_u32(msg, "ci",
hr_reg_read(context, CQC_CQ_CONSUMER_IDX)))
goto err;
if (rdma_nl_put_driver_u32(
msg, "coalesce",
roce_get_field(context->byte_56_cqe_period_maxcnt,
V2_CQC_BYTE_56_CQ_MAX_CNT_M,
V2_CQC_BYTE_56_CQ_MAX_CNT_S)))
if (rdma_nl_put_driver_u32(msg, "coalesce",
hr_reg_read(context, CQC_CQ_MAX_CNT)))
goto err;
if (rdma_nl_put_driver_u32(
msg, "period",
roce_get_field(context->byte_56_cqe_period_maxcnt,
V2_CQC_BYTE_56_CQ_PERIOD_M,
V2_CQC_BYTE_56_CQ_PERIOD_S)))
if (rdma_nl_put_driver_u32(msg, "period",
hr_reg_read(context, CQC_CQ_PERIOD)))
goto err;
if (rdma_nl_put_driver_u32(msg, "cnt",
roce_get_field(context->byte_52_cqe_cnt,
V2_CQC_BYTE_52_CQE_CNT_M,
V2_CQC_BYTE_52_CQE_CNT_S)))
hr_reg_read(context, CQC_CQE_CNT)))
goto err;
return 0;

View File

@@ -61,7 +61,7 @@ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq)
struct irdma_cq *cq = iwcq->back_cq;
if (!cq->user_mode)
cq->armed = false;
atomic_set(&cq->armed, 0);
if (cq->ibcq.comp_handler)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
@@ -1827,10 +1827,6 @@ int irdma_rt_init_hw(struct irdma_device *iwdev,
rf->rsrc_created = true;
}
iwdev->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_MGT_EXTENSIONS;
if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_1)
irdma_alloc_set_mac(iwdev);
irdma_add_ip(iwdev);
@@ -2693,24 +2689,29 @@ void irdma_flush_wqes(struct irdma_qp *iwqp, u32 flush_mask)
info.sq = flush_mask & IRDMA_FLUSH_SQ;
info.rq = flush_mask & IRDMA_FLUSH_RQ;
if (flush_mask & IRDMA_REFLUSH) {
if (info.sq)
iwqp->sc_qp.flush_sq = false;
if (info.rq)
iwqp->sc_qp.flush_rq = false;
}
/* Generate userflush errors in CQE */
info.sq_major_code = IRDMA_FLUSH_MAJOR_ERR;
info.sq_minor_code = FLUSH_GENERAL_ERR;
info.rq_major_code = IRDMA_FLUSH_MAJOR_ERR;
info.rq_minor_code = FLUSH_GENERAL_ERR;
info.userflushcode = true;
if (flush_code) {
if (info.sq && iwqp->sc_qp.sq_flush_code)
info.sq_minor_code = flush_code;
if (info.rq && iwqp->sc_qp.rq_flush_code)
info.rq_minor_code = flush_code;
if (flush_mask & IRDMA_REFLUSH) {
if (info.sq)
iwqp->sc_qp.flush_sq = false;
if (info.rq)
iwqp->sc_qp.flush_rq = false;
} else {
if (flush_code) {
if (info.sq && iwqp->sc_qp.sq_flush_code)
info.sq_minor_code = flush_code;
if (info.rq && iwqp->sc_qp.rq_flush_code)
info.rq_minor_code = flush_code;
}
if (!iwqp->user_mode)
queue_delayed_work(iwqp->iwdev->cleanup_wq,
&iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
/* Issue flush */

View File

@@ -338,7 +338,6 @@ struct irdma_device {
u32 roce_ackcreds;
u32 vendor_id;
u32 vendor_part_id;
u32 device_cap_flags;
u32 push_mode;
u32 rcv_wnd;
u16 mac_ip_table_idx;

View File

@@ -191,7 +191,6 @@ static void irdma_puda_dele_buf(struct irdma_sc_dev *dev,
static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp,
u32 *wqe_idx)
{
__le64 *wqe = NULL;
int ret_code = 0;
*wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring);
@@ -199,11 +198,9 @@ static __le64 *irdma_puda_get_next_send_wqe(struct irdma_qp_uk *qp,
qp->swqe_polarity = !qp->swqe_polarity;
IRDMA_RING_MOVE_HEAD(qp->sq_ring, ret_code);
if (ret_code)
return wqe;
return NULL;
wqe = qp->sq_base[*wqe_idx].elem;
return wqe;
return qp->sq_base[*wqe_idx].elem;
}
/**

View File

@@ -2495,3 +2495,150 @@ bool irdma_cq_empty(struct irdma_cq *iwcq)
return polarity != ukcq->polarity;
}
void irdma_remove_cmpls_list(struct irdma_cq *iwcq)
{
struct irdma_cmpl_gen *cmpl_node;
struct list_head *tmp_node, *list_node;
list_for_each_safe (list_node, tmp_node, &iwcq->cmpl_generated) {
cmpl_node = list_entry(list_node, struct irdma_cmpl_gen, list);
list_del(&cmpl_node->list);
kfree(cmpl_node);
}
}
int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info)
{
struct irdma_cmpl_gen *cmpl;
if (list_empty(&iwcq->cmpl_generated))
return -ENOENT;
cmpl = list_first_entry_or_null(&iwcq->cmpl_generated, struct irdma_cmpl_gen, list);
list_del(&cmpl->list);
memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info));
kfree(cmpl);
ibdev_dbg(iwcq->ibcq.device,
"VERBS: %s: Poll artificially generated completion for QP 0x%X, op %u, wr_id=0x%llx\n",
__func__, cq_poll_info->qp_id, cq_poll_info->op_type,
cq_poll_info->wr_id);
return 0;
}
/**
* irdma_set_cpi_common_values - fill in values for polling info struct
* @cpi: resulting structure of cq_poll_info type
* @qp: QPair
* @qp_num: id of the QP
*/
static void irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi,
struct irdma_qp_uk *qp, u32 qp_num)
{
cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED;
cpi->error = true;
cpi->major_err = IRDMA_FLUSH_MAJOR_ERR;
cpi->minor_err = FLUSH_GENERAL_ERR;
cpi->qp_handle = (irdma_qp_handle)(uintptr_t)qp;
cpi->qp_id = qp_num;
}
static inline void irdma_comp_handler(struct irdma_cq *cq)
{
if (!cq->ibcq.comp_handler)
return;
if (atomic_cmpxchg(&cq->armed, 1, 0))
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
void irdma_generate_flush_completions(struct irdma_qp *iwqp)
{
struct irdma_qp_uk *qp = &iwqp->sc_qp.qp_uk;
struct irdma_ring *sq_ring = &qp->sq_ring;
struct irdma_ring *rq_ring = &qp->rq_ring;
struct irdma_cmpl_gen *cmpl;
__le64 *sw_wqe;
u64 wqe_qword;
u32 wqe_idx;
bool compl_generated = false;
unsigned long flags1;
spin_lock_irqsave(&iwqp->iwscq->lock, flags1);
if (irdma_cq_empty(iwqp->iwscq)) {
unsigned long flags2;
spin_lock_irqsave(&iwqp->lock, flags2);
while (IRDMA_RING_MORE_WORK(*sq_ring)) {
cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
if (!cmpl) {
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
return;
}
wqe_idx = sq_ring->tail;
irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid;
sw_wqe = qp->sq_base[wqe_idx].elem;
get_64bit_val(sw_wqe, 24, &wqe_qword);
cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
/* remove the SQ WR by moving SQ tail*/
IRDMA_RING_SET_TAIL(*sq_ring,
sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
ibdev_dbg(iwqp->iwscq->ibcq.device,
"DEV: %s: adding wr_id = 0x%llx SQ Completion to list qp_id=%d\n",
__func__, cmpl->cpi.wr_id, qp->qp_id);
list_add_tail(&cmpl->list, &iwqp->iwscq->cmpl_generated);
compl_generated = true;
}
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
if (compl_generated)
irdma_comp_handler(iwqp->iwrcq);
} else {
spin_unlock_irqrestore(&iwqp->iwscq->lock, flags1);
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
spin_lock_irqsave(&iwqp->iwrcq->lock, flags1);
if (irdma_cq_empty(iwqp->iwrcq)) {
unsigned long flags2;
spin_lock_irqsave(&iwqp->lock, flags2);
while (IRDMA_RING_MORE_WORK(*rq_ring)) {
cmpl = kzalloc(sizeof(*cmpl), GFP_ATOMIC);
if (!cmpl) {
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
return;
}
wqe_idx = rq_ring->tail;
irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id);
cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
/* remove the RQ WR by moving RQ tail */
IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
ibdev_dbg(iwqp->iwrcq->ibcq.device,
"DEV: %s: adding wr_id = 0x%llx RQ Completion to list qp_id=%d, wqe_idx=%d\n",
__func__, cmpl->cpi.wr_id, qp->qp_id,
wqe_idx);
list_add_tail(&cmpl->list, &iwqp->iwrcq->cmpl_generated);
compl_generated = true;
}
spin_unlock_irqrestore(&iwqp->lock, flags2);
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
if (compl_generated)
irdma_comp_handler(iwqp->iwrcq);
} else {
spin_unlock_irqrestore(&iwqp->iwrcq->lock, flags1);
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
}

View File

@@ -25,7 +25,9 @@ static int irdma_query_device(struct ib_device *ibdev,
iwdev->netdev->dev_addr);
props->fw_ver = (u64)irdma_fw_major_ver(&rf->sc_dev) << 32 |
irdma_fw_minor_ver(&rf->sc_dev);
props->device_cap_flags = iwdev->device_cap_flags;
props->device_cap_flags = IB_DEVICE_MEM_WINDOW |
IB_DEVICE_MEM_MGT_EXTENSIONS;
props->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
props->vendor_id = pcidev->vendor;
props->vendor_part_id = pcidev->device;
@@ -533,6 +535,9 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (iwqp->iwarp_state == IRDMA_QP_STATE_RTS)
irdma_modify_qp_to_err(&iwqp->sc_qp);
if (!iwqp->user_mode)
cancel_delayed_work_sync(&iwqp->dwork_flush);
irdma_qp_rem_ref(&iwqp->ibqp);
wait_for_completion(&iwqp->free_qp);
irdma_free_lsmm_rsrc(iwqp);
@@ -788,6 +793,14 @@ static int irdma_validate_qp_attrs(struct ib_qp_init_attr *init_attr,
return 0;
}
static void irdma_flush_worker(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct irdma_qp *iwqp = container_of(dwork, struct irdma_qp, dwork_flush);
irdma_generate_flush_completions(iwqp);
}
/**
* irdma_create_qp - create qp
* @ibqp: ptr of qp
@@ -907,6 +920,7 @@ static int irdma_create_qp(struct ib_qp *ibqp,
init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver;
irdma_setup_virt_qp(iwdev, iwqp, &init_info);
} else {
INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker);
init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER;
err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr);
}
@@ -1398,11 +1412,11 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
}
if (iwqp->ibqp_state > IB_QPS_RTS &&
!iwqp->flush_issued) {
iwqp->flush_issued = 1;
spin_unlock_irqrestore(&iwqp->lock, flags);
irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ |
IRDMA_FLUSH_RQ |
IRDMA_FLUSH_WAIT);
iwqp->flush_issued = 1;
} else {
spin_unlock_irqrestore(&iwqp->lock, flags);
}
@@ -1755,6 +1769,8 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
unsigned long flags;
spin_lock_irqsave(&iwcq->lock, flags);
if (!list_empty(&iwcq->cmpl_generated))
irdma_remove_cmpls_list(iwcq);
if (!list_empty(&iwcq->resize_list))
irdma_process_resize_list(iwcq, iwdev, NULL);
spin_unlock_irqrestore(&iwcq->lock, flags);
@@ -1959,6 +1975,7 @@ static int irdma_create_cq(struct ib_cq *ibcq,
cq->back_cq = iwcq;
spin_lock_init(&iwcq->lock);
INIT_LIST_HEAD(&iwcq->resize_list);
INIT_LIST_HEAD(&iwcq->cmpl_generated);
info.dev = dev;
ukinfo->cq_size = max(entries, 4);
ukinfo->cq_id = cq_num;
@@ -3044,15 +3061,12 @@ static int irdma_post_send(struct ib_qp *ibqp,
unsigned long flags;
bool inv_stag;
struct irdma_ah *ah;
bool reflush = false;
iwqp = to_iwqp(ibqp);
ukqp = &iwqp->sc_qp.qp_uk;
dev = &iwqp->iwdev->rf->sc_dev;
spin_lock_irqsave(&iwqp->lock, flags);
if (iwqp->flush_issued && ukqp->sq_flush_complete)
reflush = true;
while (ib_wr) {
memset(&info, 0, sizeof(info));
inv_stag = false;
@@ -3202,15 +3216,14 @@ static int irdma_post_send(struct ib_qp *ibqp,
ib_wr = ib_wr->next;
}
if (!iwqp->flush_issued && iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS) {
irdma_uk_qp_post_wr(ukqp);
if (!iwqp->flush_issued) {
if (iwqp->hw_iwarp_state <= IRDMA_QP_STATE_RTS)
irdma_uk_qp_post_wr(ukqp);
spin_unlock_irqrestore(&iwqp->lock, flags);
} else if (reflush) {
ukqp->sq_flush_complete = false;
spin_unlock_irqrestore(&iwqp->lock, flags);
irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_REFLUSH);
} else {
spin_unlock_irqrestore(&iwqp->lock, flags);
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
}
if (err)
*bad_wr = ib_wr;
@@ -3233,14 +3246,11 @@ static int irdma_post_recv(struct ib_qp *ibqp,
struct irdma_post_rq_info post_recv = {};
unsigned long flags;
int err = 0;
bool reflush = false;
iwqp = to_iwqp(ibqp);
ukqp = &iwqp->sc_qp.qp_uk;
spin_lock_irqsave(&iwqp->lock, flags);
if (iwqp->flush_issued && ukqp->rq_flush_complete)
reflush = true;
while (ib_wr) {
post_recv.num_sges = ib_wr->num_sge;
post_recv.wr_id = ib_wr->wr_id;
@@ -3256,13 +3266,10 @@ static int irdma_post_recv(struct ib_qp *ibqp,
}
out:
if (reflush) {
ukqp->rq_flush_complete = false;
spin_unlock_irqrestore(&iwqp->lock, flags);
irdma_flush_wqes(iwqp, IRDMA_FLUSH_RQ | IRDMA_REFLUSH);
} else {
spin_unlock_irqrestore(&iwqp->lock, flags);
}
spin_unlock_irqrestore(&iwqp->lock, flags);
if (iwqp->flush_issued)
mod_delayed_work(iwqp->iwdev->cleanup_wq, &iwqp->dwork_flush,
msecs_to_jiffies(IRDMA_FLUSH_DELAY_MS));
if (err)
*bad_wr = ib_wr;
@@ -3474,6 +3481,11 @@ static int __irdma_poll_cq(struct irdma_cq *iwcq, int num_entries, struct ib_wc
/* check the current CQ for new cqes */
while (npolled < num_entries) {
ret = irdma_poll_one(ukcq, cur_cqe, entry + npolled);
if (ret == -ENOENT) {
ret = irdma_generated_cmpls(iwcq, cur_cqe);
if (!ret)
irdma_process_cqe(entry + npolled, cur_cqe);
}
if (!ret) {
++npolled;
cq_new_cqe = true;
@@ -3555,13 +3567,13 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq,
if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED)
promo_event = true;
if (!iwcq->armed || promo_event) {
iwcq->armed = true;
if (!atomic_cmpxchg(&iwcq->armed, 0, 1) || promo_event) {
iwcq->last_notify = cq_notify;
irdma_uk_cq_request_notification(ukcq, cq_notify);
}
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq))
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
(!irdma_cq_empty(iwcq) || !list_empty(&iwcq->cmpl_generated)))
ret = 1;
spin_unlock_irqrestore(&iwcq->lock, flags);

View File

@@ -4,6 +4,7 @@
#define IRDMA_VERBS_H
#define IRDMA_MAX_SAVED_PHY_PGADDR 4
#define IRDMA_FLUSH_DELAY_MS 20
#define IRDMA_PKEY_TBL_SZ 1
#define IRDMA_DEFAULT_PKEY 0xFFFF
@@ -115,7 +116,7 @@ struct irdma_cq {
u16 cq_size;
u16 cq_num;
bool user_mode;
bool armed;
atomic_t armed;
enum irdma_cmpl_notify last_notify;
u32 polled_cmpls;
u32 cq_mem_size;
@@ -126,6 +127,12 @@ struct irdma_cq {
struct irdma_pbl *iwpbl_shadow;
struct list_head resize_list;
struct irdma_cq_poll_info cur_cqe;
struct list_head cmpl_generated;
};
struct irdma_cmpl_gen {
struct list_head list;
struct irdma_cq_poll_info cpi;
};
struct disconn_work {
@@ -166,6 +173,7 @@ struct irdma_qp {
refcount_t refcnt;
struct iw_cm_id *cm_id;
struct irdma_cm_node *cm_node;
struct delayed_work dwork_flush;
struct ib_mr *lsmm_mr;
atomic_t hw_mod_qp_pend;
enum ib_qp_state ibqp_state;
@@ -229,4 +237,7 @@ int irdma_ib_register_device(struct irdma_device *iwdev);
void irdma_ib_unregister_device(struct irdma_device *iwdev);
void irdma_ib_dealloc_device(struct ib_device *ibdev);
void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event);
void irdma_generate_flush_completions(struct irdma_qp *iwqp);
void irdma_remove_cmpls_list(struct irdma_cq *iwcq);
int irdma_generated_cmpls(struct irdma_cq *iwcq, struct irdma_cq_poll_info *cq_poll_info);
#endif /* IRDMA_VERBS_H */

View File

@@ -80,6 +80,7 @@ struct cm_req_msg {
union ib_gid primary_path_sgid;
};
static struct workqueue_struct *cm_wq;
static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
{
@@ -288,10 +289,10 @@ static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
/*make sure that there is no schedule inside the scheduled work.*/
if (!sriov->is_going_down && !id->scheduled_delete) {
id->scheduled_delete = 1;
schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
queue_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
} else if (id->scheduled_delete) {
/* Adjust timeout if already scheduled */
mod_delayed_work(system_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
mod_delayed_work(cm_wq, &id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
}
spin_unlock_irqrestore(&sriov->going_down_lock, flags);
spin_unlock(&sriov->id_map_lock);
@@ -370,7 +371,7 @@ static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl
ret = xa_err(item);
else
/* If a retry, adjust delayed work */
mod_delayed_work(system_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
mod_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
goto err_or_exists;
}
xa_unlock(&sriov->xa_rej_tmout);
@@ -393,7 +394,7 @@ static int alloc_rej_tmout(struct mlx4_ib_sriov *sriov, u32 rem_pv_cm_id, int sl
return xa_err(old);
}
schedule_delayed_work(&item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
queue_delayed_work(cm_wq, &item->timeout, CM_CLEANUP_CACHE_TIMEOUT);
return 0;
@@ -500,7 +501,7 @@ static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
xa_lock(&sriov->xa_rej_tmout);
xa_for_each(&sriov->xa_rej_tmout, id, item) {
if (slave < 0 || slave == item->slave) {
mod_delayed_work(system_wq, &item->timeout, 0);
mod_delayed_work(cm_wq, &item->timeout, 0);
flush_needed = true;
++cnt;
}
@@ -508,7 +509,7 @@ static void rej_tmout_xa_cleanup(struct mlx4_ib_sriov *sriov, int slave)
xa_unlock(&sriov->xa_rej_tmout);
if (flush_needed) {
flush_scheduled_work();
flush_workqueue(cm_wq);
pr_debug("Deleted %d entries in xarray for slave %d during cleanup\n",
cnt, slave);
}
@@ -540,7 +541,7 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
spin_unlock(&sriov->id_map_lock);
if (need_flush)
flush_scheduled_work(); /* make sure all timers were flushed */
flush_workqueue(cm_wq); /* make sure all timers were flushed */
/* now, remove all leftover entries from databases*/
spin_lock(&sriov->id_map_lock);
@@ -587,3 +588,17 @@ void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
rej_tmout_xa_cleanup(sriov, slave);
}
int mlx4_ib_cm_init(void)
{
cm_wq = alloc_workqueue("mlx4_ib_cm", 0, 0);
if (!cm_wq)
return -ENOMEM;
return 0;
}
void mlx4_ib_cm_destroy(void)
{
destroy_workqueue(cm_wq);
}

View File

@@ -479,8 +479,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
IB_DEVICE_RC_RNR_NAK_GEN;
props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -494,9 +494,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
if (dev->dev->caps.max_gso_sz &&
(dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
props->device_cap_flags |= IB_DEVICE_UD_TSO;
props->kernel_cap_flags |= IBK_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
props->kernel_cap_flags |= IBK_LOCAL_DMA_LKEY;
if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
(dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
@@ -3307,10 +3307,14 @@ static int __init mlx4_ib_init(void)
if (!wq)
return -ENOMEM;
err = mlx4_ib_mcg_init();
err = mlx4_ib_cm_init();
if (err)
goto clean_wq;
err = mlx4_ib_mcg_init();
if (err)
goto clean_cm;
err = mlx4_register_interface(&mlx4_ib_interface);
if (err)
goto clean_mcg;
@@ -3320,6 +3324,9 @@ static int __init mlx4_ib_init(void)
clean_mcg:
mlx4_ib_mcg_destroy();
clean_cm:
mlx4_ib_cm_destroy();
clean_wq:
destroy_workqueue(wq);
return err;
@@ -3329,6 +3336,7 @@ static void __exit mlx4_ib_cleanup(void)
{
mlx4_unregister_interface(&mlx4_ib_interface);
mlx4_ib_mcg_destroy();
mlx4_ib_cm_destroy();
destroy_workqueue(wq);
}

View File

@@ -937,4 +937,7 @@ mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
int *num_of_mtts);
int mlx4_ib_cm_init(void);
void mlx4_ib_cm_destroy(void);
#endif /* MLX4_IB_H */

View File

@@ -19,6 +19,7 @@ mlx5_ib-y := ah.o \
restrack.o \
srq.o \
srq_cmd.o \
umr.o \
wr.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o

View File

@@ -1095,11 +1095,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
if (is_egress) {
err = -EINVAL;
goto free;
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
struct mlx5_ib_mcounters *mcounters;

View File

@@ -41,6 +41,7 @@
#include "wr.h"
#include "restrack.h"
#include "counters.h"
#include "umr.h"
#include <rdma/uverbs_std_types.h>
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
@@ -854,13 +855,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_DEVICE_MEM_WINDOW_TYPE_2B;
props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
/* We support 'Gappy' memory registration too */
props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
props->kernel_cap_flags |= IBK_SG_GAPS_REG;
}
/* IB_WR_REG_MR always requires changing the entity size with UMR */
if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
if (MLX5_CAP_GEN(mdev, sho)) {
props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
props->kernel_cap_flags |= IBK_INTEGRITY_HANDOVER;
/* At this stage no support for signature handover */
props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
IB_PROT_T10DIF_TYPE_2 |
@@ -869,7 +870,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
IB_GUARD_T10DIF_CSUM;
}
if (MLX5_CAP_GEN(mdev, block_lb_mc))
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
props->kernel_cap_flags |= IBK_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) {
if (MLX5_CAP_ETH(mdev, csum_cap)) {
@@ -916,7 +917,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
props->device_cap_flags |= IB_DEVICE_UD_TSO;
props->kernel_cap_flags |= IBK_UD_TSO;
}
if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
@@ -992,7 +993,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
props->kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
props->odp_caps = dev->odp_caps;
if (!uhw) {
/* ODP for kernel QPs is not implemented for receive
@@ -1013,11 +1014,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
}
if (MLX5_CAP_GEN(mdev, cd))
props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
if (mlx5_core_is_vf(mdev))
props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
props->kernel_cap_flags |= IBK_VIRTUAL_FUNCTION;
if (mlx5_ib_port_link_layer(ibdev, 1) ==
IB_LINK_LAYER_ETHERNET && raw_support) {
@@ -4008,12 +4006,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
if (err)
mlx5_ib_warn(dev, "mr cache cleanup failed\n");
if (dev->umrc.qp)
ib_destroy_qp(dev->umrc.qp);
if (dev->umrc.cq)
ib_free_cq(dev->umrc.cq);
if (dev->umrc.pd)
ib_dealloc_pd(dev->umrc.pd);
mlx5r_umr_resource_cleanup(dev);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@@ -4021,112 +4014,19 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
ib_unregister_device(&dev->ib_dev);
}
enum {
MAX_UMR_WR = 128,
};
static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
struct ib_qp_init_attr *init_attr = NULL;
struct ib_qp_attr *attr = NULL;
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
int ret;
attr = kzalloc(sizeof(*attr), GFP_KERNEL);
init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL);
if (!attr || !init_attr) {
ret = -ENOMEM;
goto error_0;
}
ret = mlx5r_umr_resource_init(dev);
if (ret)
return ret;
pd = ib_alloc_pd(&dev->ib_dev, 0);
if (IS_ERR(pd)) {
mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
ret = PTR_ERR(pd);
goto error_0;
}
cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(cq)) {
mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
goto error_2;
}
init_attr->send_cq = cq;
init_attr->recv_cq = cq;
init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
init_attr->cap.max_send_wr = MAX_UMR_WR;
init_attr->cap.max_send_sge = 1;
init_attr->qp_type = MLX5_IB_QPT_REG_UMR;
init_attr->port_num = 1;
qp = ib_create_qp(pd, init_attr);
if (IS_ERR(qp)) {
mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
ret = PTR_ERR(qp);
goto error_3;
}
attr->qp_state = IB_QPS_INIT;
attr->port_num = 1;
ret = ib_modify_qp(qp, attr,
IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
goto error_4;
}
memset(attr, 0, sizeof(*attr));
attr->qp_state = IB_QPS_RTR;
attr->path_mtu = IB_MTU_256;
ret = ib_modify_qp(qp, attr, IB_QP_STATE);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
goto error_4;
}
memset(attr, 0, sizeof(*attr));
attr->qp_state = IB_QPS_RTS;
ret = ib_modify_qp(qp, attr, IB_QP_STATE);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
goto error_4;
}
dev->umrc.qp = qp;
dev->umrc.cq = cq;
dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
ret = mlx5_mr_cache_init(dev);
if (ret) {
mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
goto error_4;
mlx5r_umr_resource_cleanup(dev);
}
kfree(attr);
kfree(init_attr);
return 0;
error_4:
ib_destroy_qp(qp);
dev->umrc.qp = NULL;
error_3:
ib_free_cq(cq);
dev->umrc.cq = NULL;
error_2:
ib_dealloc_pd(pd);
dev->umrc.pd = NULL;
error_0:
kfree(attr);
kfree(init_attr);
return ret;
}

View File

@@ -291,16 +291,9 @@ struct mlx5_ib_flow_db {
};
/* Use macros here so that don't have to duplicate
* enum ib_send_flags and enum ib_qp_type for low-level driver
* enum ib_qp_type for low-level driver
*/
#define MLX5_IB_SEND_UMR_ENABLE_MR (IB_SEND_RESERVED_START << 0)
#define MLX5_IB_SEND_UMR_DISABLE_MR (IB_SEND_RESERVED_START << 1)
#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 2)
#define MLX5_IB_SEND_UMR_UPDATE_XLT (IB_SEND_RESERVED_START << 3)
#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 4)
#define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS IB_SEND_RESERVED_END
#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
/*
* IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI
@@ -311,9 +304,6 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_QPT_DCT IB_QPT_RESERVED4
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
#define MLX5_IB_UMR_OCTOWORD 16
#define MLX5_IB_UMR_XLT_ALIGNMENT 64
#define MLX5_IB_UPD_XLT_ZAP BIT(0)
#define MLX5_IB_UPD_XLT_ENABLE BIT(1)
#define MLX5_IB_UPD_XLT_ATOMIC BIT(2)
@@ -539,24 +529,6 @@ struct mlx5_ib_cq_buf {
int nent;
};
struct mlx5_umr_wr {
struct ib_send_wr wr;
u64 virt_addr;
u64 offset;
struct ib_pd *pd;
unsigned int page_shift;
unsigned int xlt_size;
u64 length;
int access_flags;
u32 mkey;
u8 ignore_free_state:1;
};
static inline const struct mlx5_umr_wr *umr_wr(const struct ib_send_wr *wr)
{
return container_of(wr, struct mlx5_umr_wr, wr);
}
enum mlx5_ib_cq_pr_flags {
MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD = 1 << 0,
MLX5_IB_CQ_PR_FLAGS_REAL_TIME_TS = 1 << 1,
@@ -1291,9 +1263,6 @@ int mlx5_ib_advise_mr(struct ib_pd *pd,
struct uverbs_attr_bundle *attrs);
int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags);
void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
@@ -1472,9 +1441,6 @@ static inline int is_qp1(enum ib_qp_type qp_type)
return qp_type == MLX5_IB_QPT_HW_GSI || qp_type == IB_QPT_GSI;
}
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
static inline u32 check_cq_create_flags(u32 flags)
{
/*
@@ -1546,59 +1512,6 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
struct mlx5_bfreg_info *bfregi, u32 bfregn,
bool dyn_bfreg);
static inline bool mlx5_ib_can_load_pas_with_umr(struct mlx5_ib_dev *dev,
size_t length)
{
/*
* umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
* always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
* MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
* can never be enabled without this capability. Simplify this weird
* quirky hardware by just saying it can't use PAS lists with UMR at
* all.
*/
if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return false;
/*
* length is the size of the MR in bytes when mlx5_ib_update_xlt() is
* used.
*/
if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
return false;
return true;
}
/*
* true if an existing MR can be reconfigured to new access_flags using UMR.
* Older HW cannot use UMR to update certain elements of the MKC. See
* umr_check_mkey_mask(), get_umr_update_access_mask() and umr_check_mkey_mask()
*/
static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev,
unsigned int current_access_flags,
unsigned int target_access_flags)
{
unsigned int diffs = current_access_flags ^ target_access_flags;
if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
MLX5_CAP_GEN(dev->mdev, atomic) &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return false;
if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return false;
if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return false;
return true;
}
static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev,
struct mlx5_ib_mkey *mmkey)
{

View File

@@ -44,13 +44,7 @@
#include <rdma/ib_verbs.h>
#include "dm.h"
#include "mlx5_ib.h"
/*
* We can't use an array for xlt_emergency_page because dma_map_single doesn't
* work on kernel modules memory
*/
void *xlt_emergency_page;
static DEFINE_MUTEX(xlt_emergency_page_mutex);
#include "umr.h"
enum {
MAX_PENDING_REG_MR = 8,
@@ -128,11 +122,6 @@ mlx5_ib_create_mkey_cb(struct mlx5_ib_dev *dev,
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent);
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
}
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)));
@@ -600,7 +589,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
struct mlx5_ib_mr *mr;
/* Matches access in alloc_cache_mr() */
if (!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags))
if (!mlx5r_umr_can_reconfig(dev, 0, access_flags))
return ERR_PTR(-EOPNOTSUPP);
spin_lock_irq(&ent->lock);
@@ -741,7 +730,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
if ((dev->mdev->profile.mask & MLX5_PROF_MASK_MR_CACHE) &&
!dev->is_rep && mlx5_core_is_pf(dev->mdev) &&
mlx5_ib_can_load_pas_with_umr(dev, 0))
mlx5r_umr_can_load_pas(dev, 0))
ent->limit = dev->mdev->profile.mr_cache[i].limit;
else
ent->limit = 0;
@@ -848,49 +837,6 @@ static int mr_cache_max_order(struct mlx5_ib_dev *dev)
return MLX5_MAX_UMR_SHIFT;
}
static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
context->status = wc->status;
complete(&context->done);
}
static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
{
context->cqe.done = mlx5_ib_umr_done;
context->status = -1;
init_completion(&context->done);
}
static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
struct mlx5_umr_wr *umrwr)
{
struct umr_common *umrc = &dev->umrc;
const struct ib_send_wr *bad;
int err;
struct mlx5_ib_umr_context umr_context;
mlx5_ib_init_umr_context(&umr_context);
umrwr->wr.wr_cqe = &umr_context.cqe;
down(&umrc->sem);
err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
if (err) {
mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
} else {
wait_for_completion(&umr_context.done);
if (umr_context.status != IB_WC_SUCCESS) {
mlx5_ib_warn(dev, "reg umr failed (%u)\n",
umr_context.status);
err = -EFAULT;
}
}
up(&umrc->sem);
return err;
}
static struct mlx5_cache_ent *mr_cache_ent_from_order(struct mlx5_ib_dev *dev,
unsigned int order)
{
@@ -949,7 +895,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
* cache then synchronously create an uncached one.
*/
if (!ent || ent->limit == 0 ||
!mlx5_ib_can_reconfig_with_umr(dev, 0, access_flags)) {
!mlx5r_umr_can_reconfig(dev, 0, access_flags)) {
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(pd, umem, iova, access_flags, page_size, false);
mutex_unlock(&dev->slow_path_mutex);
@@ -968,289 +914,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
return mr;
}
#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
MLX5_UMR_MTT_ALIGNMENT)
#define MLX5_SPARE_UMR_CHUNK 0x10000
/*
* Allocate a temporary buffer to hold the per-page information to transfer to
* HW. For efficiency this should be as large as it can be, but buffer
* allocation failure is not allowed, so try smaller sizes.
*/
static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
{
const size_t xlt_chunk_align =
MLX5_UMR_MTT_ALIGNMENT / ent_size;
size_t size;
void *res = NULL;
static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
/*
* MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
* allocation can't trigger any kind of reclaim.
*/
might_sleep();
gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
/*
* If the system already has a suitable high order page then just use
* that, but don't try hard to create one. This max is about 1M, so a
* free x86 huge page will satisfy it.
*/
size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
MLX5_MAX_UMR_CHUNK);
*nents = size / ent_size;
res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
get_order(size));
if (res)
return res;
if (size > MLX5_SPARE_UMR_CHUNK) {
size = MLX5_SPARE_UMR_CHUNK;
*nents = size / ent_size;
res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
get_order(size));
if (res)
return res;
}
*nents = PAGE_SIZE / ent_size;
res = (void *)__get_free_page(gfp_mask);
if (res)
return res;
mutex_lock(&xlt_emergency_page_mutex);
memset(xlt_emergency_page, 0, PAGE_SIZE);
return xlt_emergency_page;
}
static void mlx5_ib_free_xlt(void *xlt, size_t length)
{
if (xlt == xlt_emergency_page) {
mutex_unlock(&xlt_emergency_page_mutex);
return;
}
free_pages((unsigned long)xlt, get_order(length));
}
/*
* Create a MLX5_IB_SEND_UMR_UPDATE_XLT work request and XLT buffer ready for
* submission.
*/
static void *mlx5_ib_create_xlt_wr(struct mlx5_ib_mr *mr,
struct mlx5_umr_wr *wr, struct ib_sge *sg,
size_t nents, size_t ent_size,
unsigned int flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
dma_addr_t dma;
void *xlt;
xlt = mlx5_ib_alloc_xlt(&nents, ent_size,
flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
GFP_KERNEL);
sg->length = nents * ent_size;
dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma)) {
mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
mlx5_ib_free_xlt(xlt, sg->length);
return NULL;
}
sg->addr = dma;
sg->lkey = dev->umrc.pd->local_dma_lkey;
memset(wr, 0, sizeof(*wr));
wr->wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
wr->wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
wr->wr.sg_list = sg;
wr->wr.num_sge = 1;
wr->wr.opcode = MLX5_IB_WR_UMR;
wr->pd = mr->ibmr.pd;
wr->mkey = mr->mmkey.key;
wr->length = mr->ibmr.length;
wr->virt_addr = mr->ibmr.iova;
wr->access_flags = mr->access_flags;
wr->page_shift = mr->page_shift;
wr->xlt_size = sg->length;
return xlt;
}
static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
struct ib_sge *sg)
{
struct device *ddev = &dev->mdev->pdev->dev;
dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
mlx5_ib_free_xlt(xlt, sg->length);
}
static unsigned int xlt_wr_final_send_flags(unsigned int flags)
{
unsigned int res = 0;
if (flags & MLX5_IB_UPD_XLT_ENABLE)
res |= MLX5_IB_SEND_UMR_ENABLE_MR |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS)
res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
if (flags & MLX5_IB_UPD_XLT_ADDR)
res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
return res;
}
int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
void *xlt;
struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
? sizeof(struct mlx5_klm)
: sizeof(struct mlx5_mtt);
const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
const int page_mask = page_align - 1;
size_t pages_mapped = 0;
size_t pages_to_map = 0;
size_t pages_iter;
size_t size_to_map = 0;
size_t orig_sg_length;
if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
!umr_can_use_indirect_mkey(dev))
return -EPERM;
if (WARN_ON(!mr->umem->is_odp))
return -EINVAL;
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
* so we need to align the offset and length accordingly
*/
if (idx & page_mask) {
npages += idx & page_mask;
idx &= ~page_mask;
}
pages_to_map = ALIGN(npages, page_align);
xlt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, npages, desc_size, flags);
if (!xlt)
return -ENOMEM;
pages_iter = sg.length / desc_size;
orig_sg_length = sg.length;
if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
pages_to_map = min_t(size_t, pages_to_map, max_pages);
}
wr.page_shift = page_shift;
for (pages_mapped = 0;
pages_mapped < pages_to_map && !err;
pages_mapped += pages_iter, idx += pages_iter) {
npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
size_to_map = npages * desc_size;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
if (pages_mapped + pages_iter >= pages_to_map)
wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
wr.offset = idx * desc_size;
wr.xlt_size = sg.length;
err = mlx5_ib_post_send_wait(dev, &wr);
}
sg.length = orig_sg_length;
mlx5_ib_unmap_free_xlt(dev, xlt, &sg);
return err;
}
/*
* Send the DMA list to the HW for a normal MR using UMR.
* Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
* flag may be used.
*/
int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
struct ib_block_iter biter;
struct mlx5_mtt *cur_mtt;
struct mlx5_umr_wr wr;
size_t orig_sg_length;
struct mlx5_mtt *mtt;
size_t final_size;
struct ib_sge sg;
int err = 0;
if (WARN_ON(mr->umem->is_odp))
return -EINVAL;
mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg,
ib_umem_num_dma_blocks(mr->umem,
1 << mr->page_shift),
sizeof(*mtt), flags);
if (!mtt)
return -ENOMEM;
orig_sg_length = sg.length;
cur_mtt = mtt;
rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter,
mr->umem->sgt_append.sgt.nents,
BIT(mr->page_shift)) {
if (cur_mtt == (void *)mtt + sg.length) {
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
err = mlx5_ib_post_send_wait(dev, &wr);
if (err)
goto err;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
wr.offset += sg.length;
cur_mtt = mtt;
}
cur_mtt->ptag =
cpu_to_be64(rdma_block_iter_dma_address(&biter) |
MLX5_IB_MTT_PRESENT);
if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
cur_mtt->ptag = 0;
cur_mtt++;
}
final_size = (void *)cur_mtt - (void *)mtt;
sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
memset(cur_mtt, 0, sg.length - final_size);
wr.wr.send_flags |= xlt_wr_final_send_flags(flags);
wr.xlt_size = sg.length;
dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
err = mlx5_ib_post_send_wait(dev, &wr);
err:
sg.length = orig_sg_length;
mlx5_ib_unmap_free_xlt(dev, mtt, &sg);
return err;
}
/*
* If ibmr is NULL it will be allocated by reg_create.
* Else, the given ibmr will be used.
@@ -1441,7 +1104,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
bool xlt_with_umr;
int err;
xlt_with_umr = mlx5_ib_can_load_pas_with_umr(dev, umem->length);
xlt_with_umr = mlx5r_umr_can_load_pas(dev, umem->length);
if (xlt_with_umr) {
mr = alloc_cacheable_mr(pd, umem, iova, access_flags);
} else {
@@ -1467,7 +1130,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
* configured properly but left disabled. It is safe to go ahead
* and configure it again via UMR while enabling it.
*/
err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
err = mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE);
if (err) {
mlx5_ib_dereg_mr(&mr->ibmr, NULL);
return ERR_PTR(err);
@@ -1504,7 +1167,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length,
}
/* ODP requires xlt update via umr to work. */
if (!mlx5_ib_can_load_pas_with_umr(dev, length))
if (!mlx5r_umr_can_load_pas(dev, length))
return ERR_PTR(-EINVAL);
odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags,
@@ -1566,7 +1229,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach)
if (!umem_dmabuf->sgt)
return;
mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP);
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
}
@@ -1594,7 +1257,7 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
offset, virt_addr, length, fd, access_flags);
/* dmabuf requires xlt update via umr to work. */
if (!mlx5_ib_can_load_pas_with_umr(dev, length))
if (!mlx5r_umr_can_load_pas(dev, length))
return ERR_PTR(-EINVAL);
umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd,
@@ -1631,31 +1294,6 @@ err_dereg_mr:
return ERR_PTR(err);
}
/**
* revoke_mr - Fence all DMA on the MR
* @mr: The MR to fence
*
* Upon return the NIC will not be doing any DMA to the pages under the MR,
* and any DMA in progress will be completed. Failure of this function
* indicates the HW has failed catastrophically.
*/
static int revoke_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_umr_wr umrwr = {};
if (mr_to_mdev(mr)->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
umrwr.wr.opcode = MLX5_IB_WR_UMR;
umrwr.pd = mr_to_mdev(mr)->umrc.pd;
umrwr.mkey = mr->mmkey.key;
umrwr.ignore_free_state = 1;
return mlx5_ib_post_send_wait(mr_to_mdev(mr), &umrwr);
}
/*
* True if the change in access flags can be done via UMR, only some access
* flags can be updated.
@@ -1669,32 +1307,8 @@ static bool can_use_umr_rereg_access(struct mlx5_ib_dev *dev,
if (diffs & ~(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ | IB_ACCESS_RELAXED_ORDERING))
return false;
return mlx5_ib_can_reconfig_with_umr(dev, current_access_flags,
target_access_flags);
}
static int umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
struct mlx5_umr_wr umrwr = {
.wr = {
.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS,
.opcode = MLX5_IB_WR_UMR,
},
.mkey = mr->mmkey.key,
.pd = pd,
.access_flags = access_flags,
};
int err;
err = mlx5_ib_post_send_wait(dev, &umrwr);
if (err)
return err;
mr->access_flags = access_flags;
return 0;
return mlx5r_umr_can_reconfig(dev, current_access_flags,
target_access_flags);
}
static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
@@ -1707,7 +1321,7 @@ static bool can_use_umr_rereg_pas(struct mlx5_ib_mr *mr,
/* We only track the allocated sizes of MRs from the cache */
if (!mr->cache_ent)
return false;
if (!mlx5_ib_can_load_pas_with_umr(dev, new_umem->length))
if (!mlx5r_umr_can_load_pas(dev, new_umem->length))
return false;
*page_size =
@@ -1732,7 +1346,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
* with it. This ensure the change is atomic relative to any use of the
* MR.
*/
err = revoke_mr(mr);
err = mlx5r_umr_revoke_mr(mr);
if (err)
return err;
@@ -1750,7 +1364,7 @@ static int umr_rereg_pas(struct mlx5_ib_mr *mr, struct ib_pd *pd,
mr->ibmr.length = new_umem->length;
mr->page_shift = order_base_2(page_size);
mr->umem = new_umem;
err = mlx5_ib_update_mr_pas(mr, upd_flags);
err = mlx5r_umr_update_mr_pas(mr, upd_flags);
if (err) {
/*
* The MR is revoked at this point so there is no issue to free
@@ -1797,7 +1411,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
/* Fast path for PD/access change */
if (can_use_umr_rereg_access(dev, mr->access_flags,
new_access_flags)) {
err = umr_rereg_pd_access(mr, new_pd, new_access_flags);
err = mlx5r_umr_rereg_pd_access(mr, new_pd,
new_access_flags);
if (err)
return ERR_PTR(err);
return NULL;
@@ -1810,7 +1425,7 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
* Only one active MR can refer to a umem at one time, revoke
* the old MR before assigning the umem to the new one.
*/
err = revoke_mr(mr);
err = mlx5r_umr_revoke_mr(mr);
if (err)
return ERR_PTR(err);
umem = mr->umem;
@@ -1955,7 +1570,7 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
/* Stop DMA */
if (mr->cache_ent) {
if (revoke_mr(mr)) {
if (mlx5r_umr_revoke_mr(mr)) {
spin_lock_irq(&mr->cache_ent->lock);
mr->cache_ent->total_mrs--;
spin_unlock_irq(&mr->cache_ent->lock);

View File

@@ -38,6 +38,7 @@
#include "mlx5_ib.h"
#include "cmd.h"
#include "umr.h"
#include "qp.h"
#include <linux/mlx5/eq.h>
@@ -117,7 +118,7 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
*
* xa_store()
* mutex_lock(umem_mutex)
* mlx5_ib_update_xlt()
* mlx5r_umr_update_xlt()
* mutex_unlock(umem_mutex)
* destroy lkey
*
@@ -198,9 +199,9 @@ static void free_implicit_child_mr_work(struct work_struct *work)
mlx5r_deref_wait_odp_mkey(&mr->mmkey);
mutex_lock(&odp_imr->umem_mutex);
mlx5_ib_update_xlt(mr->parent, ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT,
1, 0,
MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
mlx5r_umr_update_xlt(mr->parent,
ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
mlx5_ib_dereg_mr(&mr->ibmr, NULL);
@@ -282,19 +283,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
u64 umr_offset = idx & umr_block_mask;
if (in_block && umr_offset == 0) {
mlx5_ib_update_xlt(mr, blk_start_idx,
idx - blk_start_idx, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
mlx5r_umr_update_xlt(mr, blk_start_idx,
idx - blk_start_idx, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
}
}
}
if (in_block)
mlx5_ib_update_xlt(mr, blk_start_idx,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
mlx5r_umr_update_xlt(mr, blk_start_idx,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
mlx5_update_odp_stats(mr, invalidations, invalidations);
@@ -323,8 +324,7 @@ static void internal_fill_odp_caps(struct mlx5_ib_dev *dev)
memset(caps, 0, sizeof(*caps));
if (!MLX5_CAP_GEN(dev->mdev, pg) ||
!mlx5_ib_can_load_pas_with_umr(dev, 0))
if (!MLX5_CAP_GEN(dev->mdev, pg) || !mlx5r_umr_can_load_pas(dev, 0))
return;
caps->general_caps = IB_ODP_SUPPORT;
@@ -442,11 +442,11 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
*/
refcount_set(&mr->mmkey.usecount, 2);
err = mlx5_ib_update_xlt(mr, 0,
MLX5_IMR_MTT_ENTRIES,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
err = mlx5r_umr_update_xlt(mr, 0,
MLX5_IMR_MTT_ENTRIES,
PAGE_SHIFT,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
if (err) {
ret = ERR_PTR(err);
goto out_mr;
@@ -487,8 +487,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
struct mlx5_ib_mr *imr;
int err;
if (!mlx5_ib_can_load_pas_with_umr(dev,
MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
if (!mlx5r_umr_can_load_pas(dev, MLX5_IMR_MTT_ENTRIES * PAGE_SIZE))
return ERR_PTR(-EOPNOTSUPP);
umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags);
@@ -510,16 +509,15 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
imr->ibmr.lkey = imr->mmkey.key;
imr->ibmr.rkey = imr->mmkey.key;
imr->ibmr.device = &dev->ib_dev;
imr->umem = &umem_odp->umem;
imr->is_odp_implicit = true;
xa_init(&imr->implicit_children);
err = mlx5_ib_update_xlt(imr, 0,
mlx5_imr_ksm_entries,
MLX5_KSM_PAGE_SHIFT,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
err = mlx5r_umr_update_xlt(imr, 0,
mlx5_imr_ksm_entries,
MLX5_KSM_PAGE_SHIFT,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ENABLE);
if (err)
goto out_mr;
@@ -582,7 +580,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
* No need to check whether the MTTs really belong to this MR, since
* ib_umem_odp_map_dma_and_lock already checks this.
*/
ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
ret = mlx5r_umr_update_xlt(mr, start_idx, np, page_shift, xlt_flags);
mutex_unlock(&odp->umem_mutex);
if (ret < 0) {
@@ -680,9 +678,9 @@ out:
* next pagefault handler will see the new information.
*/
mutex_lock(&odp_imr->umem_mutex);
err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
err = mlx5r_umr_update_xlt(imr, upd_start_idx, upd_len, 0,
MLX5_IB_UPD_XLT_INDIRECT |
MLX5_IB_UPD_XLT_ATOMIC);
mutex_unlock(&odp_imr->umem_mutex);
if (err) {
mlx5_ib_err(mr_to_mdev(imr), "Failed to update PAS\n");
@@ -716,7 +714,7 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
err = -EINVAL;
} else {
err = mlx5_ib_update_mr_pas(mr, xlt_flags);
err = mlx5r_umr_update_mr_pas(mr, xlt_flags);
}
dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv);

View File

@@ -40,6 +40,7 @@
#include "ib_rep.h"
#include "counters.h"
#include "cmd.h"
#include "umr.h"
#include "qp.h"
#include "wr.h"

View File

@@ -0,0 +1,700 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
#include "umr.h"
#include "wr.h"
/*
* We can't use an array for xlt_emergency_page because dma_map_single doesn't
* work on kernel modules memory
*/
void *xlt_emergency_page;
static DEFINE_MUTEX(xlt_emergency_page_mutex);
static __be64 get_umr_enable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_disable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_update_translation_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR;
return cpu_to_be64(result);
}
static __be64 get_umr_update_access_mask(struct mlx5_ib_dev *dev)
{
u64 result;
result = MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW;
if (MLX5_CAP_GEN(dev->mdev, atomic))
result |= MLX5_MKEY_MASK_A;
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
return cpu_to_be64(result);
}
static __be64 get_umr_update_pd_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_PD;
return cpu_to_be64(result);
}
static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
{
if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return -EPERM;
if (mask & MLX5_MKEY_MASK_A &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return -EPERM;
if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return -EPERM;
if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return -EPERM;
return 0;
}
enum {
MAX_UMR_WR = 128,
};
static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
{
struct ib_qp_attr attr = {};
int ret;
attr.qp_state = IB_QPS_INIT;
attr.port_num = 1;
ret = ib_modify_qp(qp, &attr,
IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n");
return ret;
}
memset(&attr, 0, sizeof(attr));
attr.qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n");
return ret;
}
memset(&attr, 0, sizeof(attr));
attr.qp_state = IB_QPS_RTS;
ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
if (ret) {
mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n");
return ret;
}
return 0;
}
int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
{
struct ib_qp_init_attr init_attr = {};
struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
int ret;
pd = ib_alloc_pd(&dev->ib_dev, 0);
if (IS_ERR(pd)) {
mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
return PTR_ERR(pd);
}
cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(cq)) {
mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
goto destroy_pd;
}
init_attr.send_cq = cq;
init_attr.recv_cq = cq;
init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
init_attr.cap.max_send_wr = MAX_UMR_WR;
init_attr.cap.max_send_sge = 1;
init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
init_attr.port_num = 1;
qp = ib_create_qp(pd, &init_attr);
if (IS_ERR(qp)) {
mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
ret = PTR_ERR(qp);
goto destroy_cq;
}
ret = mlx5r_umr_qp_rst2rts(dev, qp);
if (ret)
goto destroy_qp;
dev->umrc.qp = qp;
dev->umrc.cq = cq;
dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
return 0;
destroy_qp:
ib_destroy_qp(qp);
destroy_cq:
ib_free_cq(cq);
destroy_pd:
ib_dealloc_pd(pd);
return ret;
}
void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
{
ib_destroy_qp(dev->umrc.qp);
ib_free_cq(dev->umrc.cq);
ib_dealloc_pd(dev->umrc.pd);
}
static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe,
struct mlx5r_umr_wqe *wqe, bool with_data)
{
unsigned int wqe_size =
with_data ? sizeof(struct mlx5r_umr_wqe) :
sizeof(struct mlx5r_umr_wqe) -
sizeof(struct mlx5_wqe_data_seg);
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_ctrl_seg *ctrl;
union {
struct ib_cqe *ib_cqe;
u64 wr_id;
} id;
void *cur_edge, *seg;
unsigned long flags;
unsigned int idx;
int size, err;
if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
return -EIO;
spin_lock_irqsave(&qp->sq.lock, flags);
err = mlx5r_begin_wqe(qp, &seg, &ctrl, &idx, &size, &cur_edge, 0,
cpu_to_be32(mkey), false, false);
if (WARN_ON(err))
goto out;
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
mlx5r_memcpy_send_wqe(&qp->sq, &cur_edge, &seg, &size, wqe, wqe_size);
id.ib_cqe = cqe;
mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, id.wr_id, 0,
MLX5_FENCE_MODE_NONE, MLX5_OPCODE_UMR);
mlx5r_ring_db(qp, 1, ctrl);
out:
spin_unlock_irqrestore(&qp->sq.lock, flags);
return err;
}
static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct mlx5_ib_umr_context *context =
container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
context->status = wc->status;
complete(&context->done);
}
static inline void mlx5r_umr_init_context(struct mlx5r_umr_context *context)
{
context->cqe.done = mlx5r_umr_done;
init_completion(&context->done);
}
static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey,
struct mlx5r_umr_wqe *wqe, bool with_data)
{
struct umr_common *umrc = &dev->umrc;
struct mlx5r_umr_context umr_context;
int err;
err = umr_check_mkey_mask(dev, be64_to_cpu(wqe->ctrl_seg.mkey_mask));
if (WARN_ON(err))
return err;
mlx5r_umr_init_context(&umr_context);
down(&umrc->sem);
err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context.cqe, wqe,
with_data);
if (err)
mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
else {
wait_for_completion(&umr_context.done);
if (umr_context.status != IB_WC_SUCCESS) {
mlx5_ib_warn(dev, "reg umr failed (%u)\n",
umr_context.status);
err = -EFAULT;
}
}
up(&umrc->sem);
return err;
}
/**
* mlx5r_umr_revoke_mr - Fence all DMA on the MR
* @mr: The MR to fence
*
* Upon return the NIC will not be doing any DMA to the pages under the MR,
* and any DMA in progress will be completed. Failure of this function
* indicates the HW has failed catastrophically.
*/
int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct mlx5r_umr_wqe wqe = {};
if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
wqe.ctrl_seg.mkey_mask |= get_umr_disable_mr_mask();
wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
MLX5_SET(mkc, &wqe.mkey_seg, free, 1);
MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(dev->umrc.pd)->pdn);
MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
mlx5_mkey_variant(mr->mmkey.key));
return mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
}
static void mlx5r_umr_set_access_flags(struct mlx5_ib_dev *dev,
struct mlx5_mkey_seg *seg,
unsigned int access_flags)
{
MLX5_SET(mkc, seg, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, seg, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, seg, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
MLX5_SET(mkc, seg, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
MLX5_SET(mkc, seg, lr, 1);
MLX5_SET(mkc, seg, relaxed_ordering_write,
!!(access_flags & IB_ACCESS_RELAXED_ORDERING));
MLX5_SET(mkc, seg, relaxed_ordering_read,
!!(access_flags & IB_ACCESS_RELAXED_ORDERING));
}
int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct mlx5r_umr_wqe wqe = {};
int err;
wqe.ctrl_seg.mkey_mask = get_umr_update_access_mask(dev);
wqe.ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
wqe.ctrl_seg.flags = MLX5_UMR_CHECK_FREE;
wqe.ctrl_seg.flags |= MLX5_UMR_INLINE;
mlx5r_umr_set_access_flags(dev, &wqe.mkey_seg, access_flags);
MLX5_SET(mkc, &wqe.mkey_seg, pd, to_mpd(pd)->pdn);
MLX5_SET(mkc, &wqe.mkey_seg, qpn, 0xffffff);
MLX5_SET(mkc, &wqe.mkey_seg, mkey_7_0,
mlx5_mkey_variant(mr->mmkey.key));
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, false);
if (err)
return err;
mr->access_flags = access_flags;
return 0;
}
#define MLX5_MAX_UMR_CHUNK \
((1 << (MLX5_MAX_UMR_SHIFT + 4)) - MLX5_UMR_MTT_ALIGNMENT)
#define MLX5_SPARE_UMR_CHUNK 0x10000
/*
* Allocate a temporary buffer to hold the per-page information to transfer to
* HW. For efficiency this should be as large as it can be, but buffer
* allocation failure is not allowed, so try smaller sizes.
*/
static void *mlx5r_umr_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
{
const size_t xlt_chunk_align = MLX5_UMR_MTT_ALIGNMENT / ent_size;
size_t size;
void *res = NULL;
static_assert(PAGE_SIZE % MLX5_UMR_MTT_ALIGNMENT == 0);
/*
* MLX5_IB_UPD_XLT_ATOMIC doesn't signal an atomic context just that the
* allocation can't trigger any kind of reclaim.
*/
might_sleep();
gfp_mask |= __GFP_ZERO | __GFP_NORETRY;
/*
* If the system already has a suitable high order page then just use
* that, but don't try hard to create one. This max is about 1M, so a
* free x86 huge page will satisfy it.
*/
size = min_t(size_t, ent_size * ALIGN(*nents, xlt_chunk_align),
MLX5_MAX_UMR_CHUNK);
*nents = size / ent_size;
res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
get_order(size));
if (res)
return res;
if (size > MLX5_SPARE_UMR_CHUNK) {
size = MLX5_SPARE_UMR_CHUNK;
*nents = size / ent_size;
res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
get_order(size));
if (res)
return res;
}
*nents = PAGE_SIZE / ent_size;
res = (void *)__get_free_page(gfp_mask);
if (res)
return res;
mutex_lock(&xlt_emergency_page_mutex);
memset(xlt_emergency_page, 0, PAGE_SIZE);
return xlt_emergency_page;
}
static void mlx5r_umr_free_xlt(void *xlt, size_t length)
{
if (xlt == xlt_emergency_page) {
mutex_unlock(&xlt_emergency_page_mutex);
return;
}
free_pages((unsigned long)xlt, get_order(length));
}
static void mlx5r_umr_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt,
struct ib_sge *sg)
{
struct device *ddev = &dev->mdev->pdev->dev;
dma_unmap_single(ddev, sg->addr, sg->length, DMA_TO_DEVICE);
mlx5r_umr_free_xlt(xlt, sg->length);
}
/*
* Create an XLT buffer ready for submission.
*/
static void *mlx5r_umr_create_xlt(struct mlx5_ib_dev *dev, struct ib_sge *sg,
size_t nents, size_t ent_size,
unsigned int flags)
{
struct device *ddev = &dev->mdev->pdev->dev;
dma_addr_t dma;
void *xlt;
xlt = mlx5r_umr_alloc_xlt(&nents, ent_size,
flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC :
GFP_KERNEL);
sg->length = nents * ent_size;
dma = dma_map_single(ddev, xlt, sg->length, DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma)) {
mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
mlx5r_umr_free_xlt(xlt, sg->length);
return NULL;
}
sg->addr = dma;
sg->lkey = dev->umrc.pd->local_dma_lkey;
return xlt;
}
static void
mlx5r_umr_set_update_xlt_ctrl_seg(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
unsigned int flags, struct ib_sge *sg)
{
if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
/* fail if free */
ctrl_seg->flags = MLX5_UMR_CHECK_FREE;
else
/* fail if not free */
ctrl_seg->flags = MLX5_UMR_CHECK_NOT_FREE;
ctrl_seg->xlt_octowords =
cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
}
static void mlx5r_umr_set_update_xlt_mkey_seg(struct mlx5_ib_dev *dev,
struct mlx5_mkey_seg *mkey_seg,
struct mlx5_ib_mr *mr,
unsigned int page_shift)
{
mlx5r_umr_set_access_flags(dev, mkey_seg, mr->access_flags);
MLX5_SET(mkc, mkey_seg, pd, to_mpd(mr->ibmr.pd)->pdn);
MLX5_SET64(mkc, mkey_seg, start_addr, mr->ibmr.iova);
MLX5_SET64(mkc, mkey_seg, len, mr->ibmr.length);
MLX5_SET(mkc, mkey_seg, log_page_size, page_shift);
MLX5_SET(mkc, mkey_seg, qpn, 0xffffff);
MLX5_SET(mkc, mkey_seg, mkey_7_0, mlx5_mkey_variant(mr->mmkey.key));
}
static void
mlx5r_umr_set_update_xlt_data_seg(struct mlx5_wqe_data_seg *data_seg,
struct ib_sge *sg)
{
data_seg->byte_count = cpu_to_be32(sg->length);
data_seg->lkey = cpu_to_be32(sg->lkey);
data_seg->addr = cpu_to_be64(sg->addr);
}
static void mlx5r_umr_update_offset(struct mlx5_wqe_umr_ctrl_seg *ctrl_seg,
u64 offset)
{
u64 octo_offset = mlx5r_umr_get_xlt_octo(offset);
ctrl_seg->xlt_offset = cpu_to_be16(octo_offset & 0xffff);
ctrl_seg->xlt_offset_47_16 = cpu_to_be32(octo_offset >> 16);
ctrl_seg->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
}
static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
struct mlx5r_umr_wqe *wqe,
struct mlx5_ib_mr *mr, struct ib_sge *sg,
unsigned int flags)
{
bool update_pd_access, update_translation;
if (flags & MLX5_IB_UPD_XLT_ENABLE)
wqe->ctrl_seg.mkey_mask |= get_umr_enable_mr_mask();
update_pd_access = flags & MLX5_IB_UPD_XLT_ENABLE ||
flags & MLX5_IB_UPD_XLT_PD ||
flags & MLX5_IB_UPD_XLT_ACCESS;
if (update_pd_access) {
wqe->ctrl_seg.mkey_mask |= get_umr_update_access_mask(dev);
wqe->ctrl_seg.mkey_mask |= get_umr_update_pd_mask();
}
update_translation =
flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
if (update_translation) {
wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
if (!mr->ibmr.length)
MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
}
wqe->ctrl_seg.xlt_octowords =
cpu_to_be16(mlx5r_umr_get_xlt_octo(sg->length));
wqe->data_seg.byte_count = cpu_to_be32(sg->length);
}
/*
* Send the DMA list to the HW for a normal MR using UMR.
* Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP
* flag may be used.
*/
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags)
{
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
struct mlx5r_umr_wqe wqe = {};
struct ib_block_iter biter;
struct mlx5_mtt *cur_mtt;
size_t orig_sg_length;
struct mlx5_mtt *mtt;
size_t final_size;
struct ib_sge sg;
u64 offset = 0;
int err = 0;
if (WARN_ON(mr->umem->is_odp))
return -EINVAL;
mtt = mlx5r_umr_create_xlt(
dev, &sg, ib_umem_num_dma_blocks(mr->umem, 1 << mr->page_shift),
sizeof(*mtt), flags);
if (!mtt)
return -ENOMEM;
orig_sg_length = sg.length;
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr,
mr->page_shift);
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
cur_mtt = mtt;
rdma_for_each_block(mr->umem->sgt_append.sgt.sgl, &biter,
mr->umem->sgt_append.sgt.nents,
BIT(mr->page_shift)) {
if (cur_mtt == (void *)mtt + sg.length) {
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe,
true);
if (err)
goto err;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
offset += sg.length;
mlx5r_umr_update_offset(&wqe.ctrl_seg, offset);
cur_mtt = mtt;
}
cur_mtt->ptag =
cpu_to_be64(rdma_block_iter_dma_address(&biter) |
MLX5_IB_MTT_PRESENT);
if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP))
cur_mtt->ptag = 0;
cur_mtt++;
}
final_size = (void *)cur_mtt - (void *)mtt;
sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT);
memset(cur_mtt, 0, sg.length - final_size);
mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE);
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
err:
sg.length = orig_sg_length;
mlx5r_umr_unmap_free_xlt(dev, mtt, &sg);
return err;
}
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
}
int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags)
{
int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
? sizeof(struct mlx5_klm)
: sizeof(struct mlx5_mtt);
const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
struct mlx5_ib_dev *dev = mr_to_mdev(mr);
struct device *ddev = &dev->mdev->pdev->dev;
const int page_mask = page_align - 1;
struct mlx5r_umr_wqe wqe = {};
size_t pages_mapped = 0;
size_t pages_to_map = 0;
size_t size_to_map = 0;
size_t orig_sg_length;
size_t pages_iter;
struct ib_sge sg;
int err = 0;
void *xlt;
if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
!umr_can_use_indirect_mkey(dev))
return -EPERM;
if (WARN_ON(!mr->umem->is_odp))
return -EINVAL;
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
* so we need to align the offset and length accordingly
*/
if (idx & page_mask) {
npages += idx & page_mask;
idx &= ~page_mask;
}
pages_to_map = ALIGN(npages, page_align);
xlt = mlx5r_umr_create_xlt(dev, &sg, npages, desc_size, flags);
if (!xlt)
return -ENOMEM;
pages_iter = sg.length / desc_size;
orig_sg_length = sg.length;
if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) {
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
size_t max_pages = ib_umem_odp_num_pages(odp) - idx;
pages_to_map = min_t(size_t, pages_to_map, max_pages);
}
mlx5r_umr_set_update_xlt_ctrl_seg(&wqe.ctrl_seg, flags, &sg);
mlx5r_umr_set_update_xlt_mkey_seg(dev, &wqe.mkey_seg, mr, page_shift);
mlx5r_umr_set_update_xlt_data_seg(&wqe.data_seg, &sg);
for (pages_mapped = 0;
pages_mapped < pages_to_map && !err;
pages_mapped += pages_iter, idx += pages_iter) {
npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
size_to_map = npages * desc_size;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT);
if (pages_mapped + pages_iter >= pages_to_map)
mlx5r_umr_final_update_xlt(dev, &wqe, mr, &sg, flags);
mlx5r_umr_update_offset(&wqe.ctrl_seg, idx * desc_size);
err = mlx5r_umr_post_send_wait(dev, mr->mmkey.key, &wqe, true);
}
sg.length = orig_sg_length;
mlx5r_umr_unmap_free_xlt(dev, xlt, &sg);
return err;
}

View File

@@ -0,0 +1,97 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. */
#ifndef _MLX5_IB_UMR_H
#define _MLX5_IB_UMR_H
#include "mlx5_ib.h"
#define MLX5_MAX_UMR_SHIFT 16
#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
#define MLX5_IB_UMR_OCTOWORD 16
#define MLX5_IB_UMR_XLT_ALIGNMENT 64
int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);
void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);
static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev,
size_t length)
{
/*
* umr_check_mkey_mask() rejects MLX5_MKEY_MASK_PAGE_SIZE which is
* always set if MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (aka
* MLX5_IB_UPD_XLT_ADDR and MLX5_IB_UPD_XLT_ENABLE) is set. Thus, a mkey
* can never be enabled without this capability. Simplify this weird
* quirky hardware by just saying it can't use PAS lists with UMR at
* all.
*/
if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return false;
/*
* length is the size of the MR in bytes when mlx5_ib_update_xlt() is
* used.
*/
if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
length >= MLX5_MAX_UMR_PAGES * PAGE_SIZE)
return false;
return true;
}
/*
* true if an existing MR can be reconfigured to new access_flags using UMR.
* Older HW cannot use UMR to update certain elements of the MKC. See
* get_umr_update_access_mask() and umr_check_mkey_mask()
*/
static inline bool mlx5r_umr_can_reconfig(struct mlx5_ib_dev *dev,
unsigned int current_access_flags,
unsigned int target_access_flags)
{
unsigned int diffs = current_access_flags ^ target_access_flags;
if ((diffs & IB_ACCESS_REMOTE_ATOMIC) &&
MLX5_CAP_GEN(dev->mdev, atomic) &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return false;
if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write) &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return false;
if ((diffs & IB_ACCESS_RELAXED_ORDERING) &&
MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read) &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return false;
return true;
}
static inline u64 mlx5r_umr_get_xlt_octo(u64 bytes)
{
return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
MLX5_IB_UMR_OCTOWORD;
}
struct mlx5r_umr_context {
struct ib_cqe cqe;
enum ib_wc_status status;
struct completion done;
};
struct mlx5r_umr_wqe {
struct mlx5_wqe_umr_ctrl_seg ctrl_seg;
struct mlx5_mkey_seg mkey_seg;
struct mlx5_wqe_data_seg data_seg;
};
int mlx5r_umr_revoke_mr(struct mlx5_ib_mr *mr);
int mlx5r_umr_rereg_pd_access(struct mlx5_ib_mr *mr, struct ib_pd *pd,
int access_flags);
int mlx5r_umr_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags);
int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
int page_shift, int flags);
#endif /* _MLX5_IB_UMR_H */

View File

@@ -7,6 +7,7 @@
#include <linux/mlx5/qp.h>
#include <linux/mlx5/driver.h>
#include "wr.h"
#include "umr.h"
static const u32 mlx5_ib_opcode[] = {
[IB_WR_SEND] = MLX5_OPCODE_SEND,
@@ -25,58 +26,7 @@ static const u32 mlx5_ib_opcode[] = {
[MLX5_IB_WR_UMR] = MLX5_OPCODE_UMR,
};
/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
* next nearby edge and get new address translation for current WQE position.
* @sq - SQ buffer.
* @seg: Current WQE position (16B aligned).
* @wqe_sz: Total current WQE size [16B].
* @cur_edge: Updated current edge.
*/
static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
u32 wqe_sz, void **cur_edge)
{
u32 idx;
if (likely(*seg != *cur_edge))
return;
idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
*cur_edge = get_sq_edge(sq, idx);
*seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
}
/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
* pointers. At the end @seg is aligned to 16B regardless the copied size.
* @sq - SQ buffer.
* @cur_edge: Updated current edge.
* @seg: Current WQE position (16B aligned).
* @wqe_sz: Total current WQE size [16B].
* @src: Pointer to copy from.
* @n: Number of bytes to copy.
*/
static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
void **seg, u32 *wqe_sz, const void *src,
size_t n)
{
while (likely(n)) {
size_t leftlen = *cur_edge - *seg;
size_t copysz = min_t(size_t, leftlen, n);
size_t stride;
memcpy(*seg, src, copysz);
n -= copysz;
src += copysz;
stride = !n ? ALIGN(copysz, 16) : copysz;
*seg += stride;
*wqe_sz += stride >> 4;
handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
}
}
static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq,
struct ib_cq *ib_cq)
int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
{
struct mlx5_ib_cq *cq;
unsigned int cur;
@@ -122,9 +72,9 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
eseg->mss = cpu_to_be16(ud_wr->mss);
eseg->inline_hdr.sz = cpu_to_be16(left);
/* memcpy_send_wqe should get a 16B align address. Hence, we
* first copy up to the current edge and then, if needed,
* continue to memcpy_send_wqe.
/* mlx5r_memcpy_send_wqe should get a 16B align address. Hence,
* we first copy up to the current edge and then, if needed,
* continue to mlx5r_memcpy_send_wqe.
*/
copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
left);
@@ -138,8 +88,8 @@ static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
left -= copysz;
pdata += copysz;
memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
left);
mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size,
pdata, left);
}
return;
@@ -165,12 +115,6 @@ static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
static u64 get_xlt_octo(u64 bytes)
{
return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
MLX5_IB_UMR_OCTOWORD;
}
static __be64 frwr_mkey_mask(bool atomic)
{
u64 result;
@@ -222,7 +166,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
memset(umr, 0, sizeof(*umr));
umr->flags = flags;
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
umr->mkey_mask = frwr_mkey_mask(atomic);
}
@@ -233,134 +177,6 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
umr->flags = MLX5_UMR_INLINE;
}
static __be64 get_umr_enable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_disable_mr_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_FREE;
return cpu_to_be64(result);
}
static __be64 get_umr_update_translation_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR;
return cpu_to_be64(result);
}
static __be64 get_umr_update_access_mask(int atomic,
int relaxed_ordering_write,
int relaxed_ordering_read)
{
u64 result;
result = MLX5_MKEY_MASK_LR |
MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW;
if (atomic)
result |= MLX5_MKEY_MASK_A;
if (relaxed_ordering_write)
result |= MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE;
if (relaxed_ordering_read)
result |= MLX5_MKEY_MASK_RELAXED_ORDERING_READ;
return cpu_to_be64(result);
}
static __be64 get_umr_update_pd_mask(void)
{
u64 result;
result = MLX5_MKEY_MASK_PD;
return cpu_to_be64(result);
}
static int umr_check_mkey_mask(struct mlx5_ib_dev *dev, u64 mask)
{
if (mask & MLX5_MKEY_MASK_PAGE_SIZE &&
MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled))
return -EPERM;
if (mask & MLX5_MKEY_MASK_A &&
MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled))
return -EPERM;
if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_WRITE &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
return -EPERM;
if (mask & MLX5_MKEY_MASK_RELAXED_ORDERING_READ &&
!MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
return -EPERM;
return 0;
}
static int set_reg_umr_segment(struct mlx5_ib_dev *dev,
struct mlx5_wqe_umr_ctrl_seg *umr,
const struct ib_send_wr *wr)
{
const struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(umr, 0, sizeof(*umr));
if (!umrwr->ignore_free_state) {
if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
/* fail if free */
umr->flags = MLX5_UMR_CHECK_FREE;
else
/* fail if not free */
umr->flags = MLX5_UMR_CHECK_NOT_FREE;
}
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
u64 offset = get_xlt_octo(umrwr->offset);
umr->xlt_offset = cpu_to_be16(offset & 0xffff);
umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
}
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
umr->mkey_mask |= get_umr_update_translation_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
umr->mkey_mask |= get_umr_update_access_mask(
!!(MLX5_CAP_GEN(dev->mdev, atomic)),
!!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr)),
!!(MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr)));
umr->mkey_mask |= get_umr_update_pd_mask();
}
if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
umr->mkey_mask |= get_umr_enable_mr_mask();
if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
umr->mkey_mask |= get_umr_disable_mr_mask();
if (!wr->num_sge)
umr->flags |= MLX5_UMR_INLINE;
return umr_check_mkey_mask(dev, be64_to_cpu(umr->mkey_mask));
}
static u8 get_umr_flags(int acc)
{
return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX5_PERM_ATOMIC : 0) |
@@ -398,43 +214,6 @@ static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
seg->status = MLX5_MKEY_STATUS_FREE;
}
static void set_reg_mkey_segment(struct mlx5_ib_dev *dev,
struct mlx5_mkey_seg *seg,
const struct ib_send_wr *wr)
{
const struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(seg, 0, sizeof(*seg));
if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
MLX5_SET(mkc, seg, free, 1);
MLX5_SET(mkc, seg, a,
!!(umrwr->access_flags & IB_ACCESS_REMOTE_ATOMIC));
MLX5_SET(mkc, seg, rw,
!!(umrwr->access_flags & IB_ACCESS_REMOTE_WRITE));
MLX5_SET(mkc, seg, rr, !!(umrwr->access_flags & IB_ACCESS_REMOTE_READ));
MLX5_SET(mkc, seg, lw, !!(umrwr->access_flags & IB_ACCESS_LOCAL_WRITE));
MLX5_SET(mkc, seg, lr, 1);
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write_umr))
MLX5_SET(mkc, seg, relaxed_ordering_write,
!!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read_umr))
MLX5_SET(mkc, seg, relaxed_ordering_read,
!!(umrwr->access_flags & IB_ACCESS_RELAXED_ORDERING));
if (umrwr->pd)
MLX5_SET(mkc, seg, pd, to_mpd(umrwr->pd)->pdn);
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
!umrwr->length)
MLX5_SET(mkc, seg, length64, 1);
MLX5_SET64(mkc, seg, start_addr, umrwr->virt_addr);
MLX5_SET64(mkc, seg, len, umrwr->length);
MLX5_SET(mkc, seg, log_page_size, umrwr->page_shift);
MLX5_SET(mkc, seg, qpn, 0xffffff);
MLX5_SET(mkc, seg, mkey_7_0, mlx5_mkey_variant(umrwr->mkey));
}
static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
struct mlx5_ib_mr *mr,
struct mlx5_ib_pd *pd)
@@ -760,7 +539,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
seg->len = cpu_to_be64(length);
seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
seg->xlt_oct_size = cpu_to_be32(mlx5r_umr_get_xlt_octo(size));
seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
}
@@ -770,7 +549,7 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
memset(umr, 0, sizeof(*umr));
umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->xlt_octowords = cpu_to_be16(mlx5r_umr_get_xlt_octo(size));
umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
umr->mkey_mask = sig_mkey_mask();
}
@@ -870,7 +649,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
* Relaxed Ordering is set implicitly in mlx5_set_umr_free_mkey() and
* kernel ULPs are not aware of it, so we don't set it here.
*/
if (!mlx5_ib_can_reconfig_with_umr(dev, 0, wr->access)) {
if (!mlx5r_umr_can_reconfig(dev, 0, wr->access)) {
mlx5_ib_warn(
to_mdev(qp->ibqp.device),
"Fast update for MR access flags is not possible\n");
@@ -899,8 +678,8 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
if (umr_inline) {
memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
mr_list_size);
mlx5r_memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
mr_list_size);
*size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
} else {
set_reg_data_seg(*seg, mr, pd);
@@ -942,23 +721,22 @@ static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
}
}
static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl,
const struct ib_send_wr *wr, unsigned int *idx,
int *size, void **cur_edge, int nreq,
bool send_signaled, bool solicited)
int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
int *size, void **cur_edge, int nreq, __be32 general_id,
bool send_signaled, bool solicited)
{
if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
if (unlikely(mlx5r_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
return -ENOMEM;
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
*seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
*ctrl = *seg;
*(uint32_t *)(*seg + 8) = 0;
(*ctrl)->imm = send_ieth(wr);
(*ctrl)->general_id = general_id;
(*ctrl)->fm_ce_se = qp->sq_signal_bits |
(send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
(solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
(send_signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0) |
(solicited ? MLX5_WQE_CTRL_SOLICITED : 0);
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
@@ -972,16 +750,14 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
const struct ib_send_wr *wr, unsigned int *idx, int *size,
void **cur_edge, int nreq)
{
return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
wr->send_flags & IB_SEND_SIGNALED,
wr->send_flags & IB_SEND_SOLICITED);
return mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
send_ieth(wr), wr->send_flags & IB_SEND_SIGNALED,
wr->send_flags & IB_SEND_SOLICITED);
}
static void finish_wqe(struct mlx5_ib_qp *qp,
struct mlx5_wqe_ctrl_seg *ctrl,
void *seg, u8 size, void *cur_edge,
unsigned int idx, u64 wr_id, int nreq, u8 fence,
u32 mlx5_opcode)
void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
void *seg, u8 size, void *cur_edge, unsigned int idx,
u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode)
{
u8 opmod = 0;
@@ -1045,8 +821,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
/*
* SET_PSV WQEs are not signaled and solicited on error.
*/
err = __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
false, true);
err = mlx5r_begin_wqe(qp, seg, ctrl, idx, size, cur_edge, nreq,
send_ieth(wr), false, true);
if (unlikely(err)) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -1057,8 +833,8 @@ static int handle_psv(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
mlx5_ib_warn(dev, "\n");
goto out;
}
finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
next_fence, MLX5_OPCODE_SET_PSV);
mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
nreq, next_fence, MLX5_OPCODE_SET_PSV);
out:
return err;
@@ -1098,8 +874,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
if (unlikely(err))
goto out;
finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
nreq, fence, MLX5_OPCODE_UMR);
mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx,
wr->wr_id, nreq, fence, MLX5_OPCODE_UMR);
err = begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq);
if (unlikely(err)) {
@@ -1130,8 +906,8 @@ static int handle_reg_mr_integrity(struct mlx5_ib_dev *dev,
mlx5_ib_warn(dev, "\n");
goto out;
}
finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id, nreq,
fence, MLX5_OPCODE_UMR);
mlx5r_finish_wqe(qp, *ctrl, *seg, *size, *cur_edge, *idx, wr->wr_id,
nreq, fence, MLX5_OPCODE_UMR);
sig_attrs = mr->ibmr.sig_attrs;
err = handle_psv(dev, qp, wr, ctrl, seg, size, cur_edge, idx, nreq,
@@ -1246,33 +1022,30 @@ static void handle_qpt_ud(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
}
}
static int handle_qpt_reg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
const struct ib_send_wr *wr,
struct mlx5_wqe_ctrl_seg **ctrl, void **seg,
int *size, void **cur_edge, unsigned int idx)
void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
struct mlx5_wqe_ctrl_seg *ctrl)
{
int err = 0;
struct mlx5_bf *bf = &qp->bf;
if (unlikely(wr->opcode != MLX5_IB_WR_UMR)) {
err = -EINVAL;
mlx5_ib_warn(dev, "bad opcode %d\n", wr->opcode);
goto out;
}
qp->sq.head += nreq;
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
(*ctrl)->imm = cpu_to_be32(umr_wr(wr)->mkey);
err = set_reg_umr_segment(dev, *seg, wr);
if (unlikely(err))
goto out;
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_reg_mkey_segment(dev, *seg, wr);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
out:
return err;
/* Make sure that descriptors are written before
* updating doorbell record and ringing the doorbell
*/
wmb();
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
/* Make sure doorbell record is visible to the HCA before
* we hit doorbell.
*/
wmb();
mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
/* Make sure doorbells don't leak out of SQ spinlock
* and reach the HCA out of order.
*/
bf->offset ^= bf->buf_size;
}
int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
@@ -1283,7 +1056,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf;
void *cur_edge;
int size;
unsigned long flags;
@@ -1305,8 +1077,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
if (qp->type == IB_QPT_GSI)
return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
bf = &qp->bf;
spin_lock_irqsave(&qp->sq.lock, flags);
for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -1384,12 +1154,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
case IB_QPT_UD:
handle_qpt_ud(qp, wr, &seg, &size, &cur_edge);
break;
case MLX5_IB_QPT_REG_UMR:
err = handle_qpt_reg_umr(dev, qp, wr, &ctrl, &seg,
&size, &cur_edge, idx);
if (unlikely(err))
goto out;
break;
default:
break;
@@ -1418,35 +1182,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
}
qp->next_fence = next_fence;
finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
fence, mlx5_ib_opcode[wr->opcode]);
mlx5r_finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id,
nreq, fence, mlx5_ib_opcode[wr->opcode]);
skip_psv:
if (0)
dump_wqe(qp, idx, size);
}
out:
if (likely(nreq)) {
qp->sq.head += nreq;
/* Make sure that descriptors are written before
* updating doorbell record and ringing the doorbell
*/
wmb();
qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
/* Make sure doorbell record is visible to the HCA before
* we hit doorbell.
*/
wmb();
mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
/* Make sure doorbells don't leak out of SQ spinlock
* and reach the HCA out of order.
*/
bf->offset ^= bf->buf_size;
}
if (likely(nreq))
mlx5r_ring_db(qp, nreq, ctrl);
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -1486,7 +1231,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
for (nreq = 0; wr; nreq++, wr = wr->next) {
if (mlx5_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
if (mlx5r_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
err = -ENOMEM;
*bad_wr = wr;
goto out;

View File

@@ -41,6 +41,66 @@ static inline void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
return fragment_end + MLX5_SEND_WQE_BB;
}
/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
* next nearby edge and get new address translation for current WQE position.
* @sq: SQ buffer.
* @seg: Current WQE position (16B aligned).
* @wqe_sz: Total current WQE size [16B].
* @cur_edge: Updated current edge.
*/
static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
u32 wqe_sz, void **cur_edge)
{
u32 idx;
if (likely(*seg != *cur_edge))
return;
idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
*cur_edge = get_sq_edge(sq, idx);
*seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
}
/* mlx5r_memcpy_send_wqe - copy data from src to WQE and update the relevant
* WQ's pointers. At the end @seg is aligned to 16B regardless the copied size.
* @sq: SQ buffer.
* @cur_edge: Updated current edge.
* @seg: Current WQE position (16B aligned).
* @wqe_sz: Total current WQE size [16B].
* @src: Pointer to copy from.
* @n: Number of bytes to copy.
*/
static inline void mlx5r_memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
void **seg, u32 *wqe_sz,
const void *src, size_t n)
{
while (likely(n)) {
size_t leftlen = *cur_edge - *seg;
size_t copysz = min_t(size_t, leftlen, n);
size_t stride;
memcpy(*seg, src, copysz);
n -= copysz;
src += copysz;
stride = !n ? ALIGN(copysz, 16) : copysz;
*seg += stride;
*wqe_sz += stride >> 4;
handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
}
}
int mlx5r_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq);
int mlx5r_begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl, unsigned int *idx,
int *size, void **cur_edge, int nreq, __be32 general_id,
bool send_signaled, bool solicited);
void mlx5r_finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl,
void *seg, u8 size, void *cur_edge, unsigned int idx,
u64 wr_id, int nreq, u8 fence, u32 mlx5_opcode);
void mlx5r_ring_db(struct mlx5_ib_qp *qp, unsigned int nreq,
struct mlx5_wqe_ctrl_seg *ctrl);
int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr, bool drain);
int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,

View File

@@ -90,8 +90,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_SHUTDOWN_PORT |
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_MGT_EXTENSIONS;
attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
attr->max_send_sge = dev->attr.max_send_sge;
attr->max_recv_sge = dev->attr.max_recv_sge;
attr->max_sge_rd = dev->attr.max_rdma_sge;

View File

@@ -500,7 +500,6 @@ static void qedr_sync_free_irqs(struct qedr_dev *dev)
if (dev->int_info.msix_cnt) {
idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
vector = dev->int_info.msix[idx].vector;
synchronize_irq(vector);
free_irq(vector, &dev->cnq_array[i]);
}
}

View File

@@ -134,7 +134,8 @@ int qedr_query_device(struct ib_device *ibdev,
attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
IB_DEVICE_MEM_MGT_EXTENSIONS;
attr->kernel_cap_flags = IBK_LOCAL_DMA_LKEY;
if (!rdma_protocol_iwarp(&dev->ibdev, 1))
attr->device_cap_flags |= IB_DEVICE_XRC;

View File

@@ -678,7 +678,7 @@ struct qib_pportdata {
/* Observers. Not to be taken lightly, possibly not to ship. */
/*
* If a diag read or write is to (bottom <= offset <= top),
* the "hoook" is called, allowing, e.g. shadows to be
* the "hook" is called, allowing, e.g. shadows to be
* updated in sync with the driver. struct diag_observer
* is the "visible" part.
*/

View File

@@ -534,6 +534,11 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
struct usnic_ib_vf *vf;
enum usnic_vnic_res_type res_type;
if (!device_iommu_mapped(&pdev->dev)) {
usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
return -EPERM;
}
vf = kzalloc(sizeof(*vf), GFP_KERNEL);
if (!vf)
return -ENOMEM;
@@ -642,12 +647,6 @@ static int __init usnic_ib_init(void)
printk_once(KERN_INFO "%s", usnic_version);
err = usnic_uiom_init(DRV_NAME);
if (err) {
usnic_err("Unable to initialize umem with err %d\n", err);
return err;
}
err = pci_register_driver(&usnic_ib_pci_driver);
if (err) {
usnic_err("Unable to register with PCI\n");

View File

@@ -305,7 +305,8 @@ int usnic_ib_query_device(struct ib_device *ibdev,
props->max_qp = qp_per_vf *
kref_read(&us_ibdev->vf_cnt);
props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
IB_DEVICE_SYS_IMAGE_GUID;
props->kernel_cap_flags = IBK_BLOCK_MULTICAST_LOOPBACK;
props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
kref_read(&us_ibdev->vf_cnt);
props->max_pd = USNIC_UIOM_MAX_PD_CNT;
@@ -442,7 +443,7 @@ int usnic_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
{
struct usnic_ib_pd *pd = to_upd(ibpd);
pd->umem_pd = usnic_uiom_alloc_pd();
pd->umem_pd = usnic_uiom_alloc_pd(ibpd->device->dev.parent);
if (IS_ERR(pd->umem_pd))
return PTR_ERR(pd->umem_pd);
@@ -706,4 +707,3 @@ int usnic_ib_mmap(struct ib_ucontext *context,
usnic_err("No VF %u found\n", vfid);
return -EINVAL;
}

View File

@@ -40,7 +40,6 @@
#include <linux/iommu.h>
#include <linux/workqueue.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <rdma/ib_verbs.h>
#include "usnic_log.h"
@@ -439,7 +438,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr)
__usnic_uiom_release_tail(uiomr);
}
struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev)
{
struct usnic_uiom_pd *pd;
void *domain;
@@ -448,7 +447,7 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
if (!pd)
return ERR_PTR(-ENOMEM);
pd->domain = domain = iommu_domain_alloc(&pci_bus_type);
pd->domain = domain = iommu_domain_alloc(dev->bus);
if (!domain) {
usnic_err("Failed to allocate IOMMU domain");
kfree(pd);
@@ -556,13 +555,3 @@ void usnic_uiom_free_dev_list(struct device **devs)
{
kfree(devs);
}
int usnic_uiom_init(char *drv_name)
{
if (!iommu_present(&pci_bus_type)) {
usnic_err("IOMMU required but not present or enabled. USNIC QPs will not function w/o enabling IOMMU\n");
return -EPERM;
}
return 0;
}

View File

@@ -80,7 +80,7 @@ struct usnic_uiom_chunk {
struct scatterlist page_list[];
};
struct usnic_uiom_pd *usnic_uiom_alloc_pd(void);
struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev);
void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd);
int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev);
void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd,
@@ -91,5 +91,4 @@ struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
unsigned long addr, size_t size,
int access, int dmasync);
void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr);
int usnic_uiom_init(char *drv_name);
#endif /* USNIC_UIOM_H_ */

View File

@@ -2775,7 +2775,7 @@ void rvt_qp_iter(struct rvt_dev_info *rdi,
EXPORT_SYMBOL(rvt_qp_iter);
/*
* This should be called with s_lock held.
* This should be called with s_lock and r_lock held.
*/
void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
@@ -3134,7 +3134,9 @@ send_comp:
rvp->n_loop_pkts++;
flush_send:
sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
spin_lock(&sqp->r_lock);
rvt_send_complete(sqp, wqe, send_status);
spin_unlock(&sqp->r_lock);
if (local_ops) {
atomic_dec(&sqp->local_ops_pending);
local_ops = 0;
@@ -3188,7 +3190,9 @@ serr:
spin_unlock_irqrestore(&qp->r_lock, flags);
serr_no_r_lock:
spin_lock_irqsave(&sqp->s_lock, flags);
spin_lock(&sqp->r_lock);
rvt_send_complete(sqp, wqe, send_status);
spin_unlock(&sqp->r_lock);
if (sqp->ibqp.qp_type == IB_QPT_RC) {
int lastwqe;

View File

@@ -46,6 +46,7 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.max_qp = RXE_MAX_QP;
rxe->attr.max_qp_wr = RXE_MAX_QP_WR;
rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS;
rxe->attr.kernel_cap_flags = IBK_ALLOW_USER_UNREG;
rxe->attr.max_send_sge = RXE_MAX_SGE;
rxe->attr.max_recv_sge = RXE_MAX_SGE;
rxe->attr.max_sge_rd = RXE_MAX_SGE_RD;

View File

@@ -562,7 +562,8 @@ int rxe_completer(void *arg)
enum comp_state state;
int ret = 0;
rxe_get(qp);
if (!rxe_get(qp))
return -EAGAIN;
if (!qp->valid || qp->req.state == QP_STATE_ERROR ||
qp->req.state == QP_STATE_RESET) {

View File

@@ -37,7 +37,7 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
void rxe_cq_disable(struct rxe_cq *cq);
void rxe_cq_cleanup(struct rxe_pool_elem *arg);
void rxe_cq_cleanup(struct rxe_pool_elem *elem);
/* rxe_mcast.c */
struct rxe_mcg *rxe_lookup_mcg(struct rxe_dev *rxe, union ib_gid *mgid);
@@ -81,7 +81,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey);
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr);
int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
void rxe_mr_cleanup(struct rxe_pool_elem *arg);
void rxe_mr_cleanup(struct rxe_pool_elem *elem);
/* rxe_mw.c */
int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
@@ -89,7 +89,7 @@ int rxe_dealloc_mw(struct ib_mw *ibmw);
int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
int rxe_invalidate_mw(struct rxe_qp *qp, u32 rkey);
struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey);
void rxe_mw_cleanup(struct rxe_pool_elem *arg);
void rxe_mw_cleanup(struct rxe_pool_elem *elem);
/* rxe_net.c */
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
@@ -114,7 +114,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr,
int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask);
void rxe_qp_error(struct rxe_qp *qp);
int rxe_qp_chk_destroy(struct rxe_qp *qp);
void rxe_qp_destroy(struct rxe_qp *qp);
void rxe_qp_cleanup(struct rxe_pool_elem *elem);
static inline int qp_num(struct rxe_qp *qp)
@@ -159,18 +158,16 @@ void retransmit_timer(struct timer_list *t);
void rnr_nak_timer(struct timer_list *t);
/* rxe_srq.c */
#define IB_SRQ_INIT_MASK (~IB_SRQ_LIMIT)
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);
int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init);
int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_init_attr *init, struct ib_udata *udata,
struct rxe_create_srq_resp __user *uresp);
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask);
int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata);
void rxe_srq_cleanup(struct rxe_pool_elem *elem);
void rxe_dealloc(struct ib_device *ib_dev);

View File

@@ -206,8 +206,10 @@ static struct rxe_mcg *rxe_get_mcg(struct rxe_dev *rxe, union ib_gid *mgid)
/* speculative alloc of new mcg */
mcg = kzalloc(sizeof(*mcg), GFP_KERNEL);
if (!mcg)
return ERR_PTR(-ENOMEM);
if (!mcg) {
err = -ENOMEM;
goto err_dec;
}
spin_lock_bh(&rxe->mcg_lock);
/* re-check to see if someone else just added it */

View File

@@ -683,14 +683,10 @@ int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct rxe_mr *mr = to_rmr(ibmr);
if (atomic_read(&mr->num_mw) > 0) {
pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
__func__);
/* See IBA 10.6.7.2.6 */
if (atomic_read(&mr->num_mw) > 0)
return -EINVAL;
}
mr->state = RXE_MR_STATE_INVALID;
rxe_put(mr_pd(mr));
rxe_put(mr);
return 0;
@@ -700,6 +696,8 @@ void rxe_mr_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
rxe_put(mr_pd(mr));
ib_umem_release(mr->umem);
if (mr->cur_map_set)

View File

@@ -3,6 +3,14 @@
* Copyright (c) 2020 Hewlett Packard Enterprise, Inc. All rights reserved.
*/
/*
* The rdma_rxe driver supports type 1 or type 2B memory windows.
* Type 1 MWs are created by ibv_alloc_mw() verbs calls and bound by
* ibv_bind_mw() calls. Type 2 MWs are also created by ibv_alloc_mw()
* but bound by bind_mw work requests. The ibv_bind_mw() call is converted
* by libibverbs to a bind_mw work request.
*/
#include "rxe.h"
int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
@@ -28,40 +36,11 @@ int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
return 0;
}
static void rxe_do_dealloc_mw(struct rxe_mw *mw)
{
if (mw->mr) {
struct rxe_mr *mr = mw->mr;
mw->mr = NULL;
atomic_dec(&mr->num_mw);
rxe_put(mr);
}
if (mw->qp) {
struct rxe_qp *qp = mw->qp;
mw->qp = NULL;
rxe_put(qp);
}
mw->access = 0;
mw->addr = 0;
mw->length = 0;
mw->state = RXE_MW_STATE_INVALID;
}
int rxe_dealloc_mw(struct ib_mw *ibmw)
{
struct rxe_mw *mw = to_rmw(ibmw);
struct rxe_pd *pd = to_rpd(ibmw->pd);
spin_lock_bh(&mw->lock);
rxe_do_dealloc_mw(mw);
spin_unlock_bh(&mw->lock);
rxe_put(mw);
rxe_put(pd);
return 0;
}
@@ -328,3 +307,31 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
return mw;
}
void rxe_mw_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_mw *mw = container_of(elem, typeof(*mw), elem);
struct rxe_pd *pd = to_rpd(mw->ibmw.pd);
rxe_put(pd);
if (mw->mr) {
struct rxe_mr *mr = mw->mr;
mw->mr = NULL;
atomic_dec(&mr->num_mw);
rxe_put(mr);
}
if (mw->qp) {
struct rxe_qp *qp = mw->qp;
mw->qp = NULL;
rxe_put(qp);
}
mw->access = 0;
mw->addr = 0;
mw->length = 0;
mw->state = RXE_MW_STATE_INVALID;
}

View File

@@ -29,7 +29,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_WR_SEND] = {
.name = "IB_WR_SEND",
.mask = {
[IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,
@@ -39,7 +38,6 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
[IB_WR_SEND_WITH_IMM] = {
.name = "IB_WR_SEND_WITH_IMM",
.mask = {
[IB_QPT_SMI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_GSI] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_RC] = WR_INLINE_MASK | WR_SEND_MASK,
[IB_QPT_UC] = WR_INLINE_MASK | WR_SEND_MASK,

View File

@@ -50,9 +50,7 @@ enum rxe_device_param {
| IB_DEVICE_RC_RNR_NAK_GEN
| IB_DEVICE_SRQ_RESIZE
| IB_DEVICE_MEM_MGT_EXTENSIONS
| IB_DEVICE_ALLOW_USER_UNREG
| IB_DEVICE_MEM_WINDOW
| IB_DEVICE_MEM_WINDOW_TYPE_2A
| IB_DEVICE_MEM_WINDOW_TYPE_2B,
RXE_MAX_SGE = 32,
RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) +

View File

@@ -13,7 +13,6 @@ static const struct rxe_type_info {
size_t size;
size_t elem_offset;
void (*cleanup)(struct rxe_pool_elem *elem);
enum rxe_pool_flags flags;
u32 min_index;
u32 max_index;
u32 max_elem;
@@ -46,6 +45,7 @@ static const struct rxe_type_info {
.name = "srq",
.size = sizeof(struct rxe_srq),
.elem_offset = offsetof(struct rxe_srq, elem),
.cleanup = rxe_srq_cleanup,
.min_index = RXE_MIN_SRQ_INDEX,
.max_index = RXE_MAX_SRQ_INDEX,
.max_elem = RXE_MAX_SRQ_INDEX - RXE_MIN_SRQ_INDEX + 1,
@@ -73,7 +73,6 @@ static const struct rxe_type_info {
.size = sizeof(struct rxe_mr),
.elem_offset = offsetof(struct rxe_mr, elem),
.cleanup = rxe_mr_cleanup,
.flags = RXE_POOL_ALLOC,
.min_index = RXE_MIN_MR_INDEX,
.max_index = RXE_MAX_MR_INDEX,
.max_elem = RXE_MAX_MR_INDEX - RXE_MIN_MR_INDEX + 1,
@@ -82,6 +81,7 @@ static const struct rxe_type_info {
.name = "mw",
.size = sizeof(struct rxe_mw),
.elem_offset = offsetof(struct rxe_mw, elem),
.cleanup = rxe_mw_cleanup,
.min_index = RXE_MIN_MW_INDEX,
.max_index = RXE_MAX_MW_INDEX,
.max_elem = RXE_MAX_MW_INDEX - RXE_MIN_MW_INDEX + 1,
@@ -101,7 +101,6 @@ void rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
pool->max_elem = info->max_elem;
pool->elem_size = ALIGN(info->size, RXE_POOL_ALIGN);
pool->elem_offset = info->elem_offset;
pool->flags = info->flags;
pool->cleanup = info->cleanup;
atomic_set(&pool->num_elem, 0);
@@ -122,7 +121,7 @@ void *rxe_alloc(struct rxe_pool *pool)
void *obj;
int err;
if (WARN_ON(!(pool->flags & RXE_POOL_ALLOC)))
if (WARN_ON(!(pool->type == RXE_TYPE_MR)))
return NULL;
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
@@ -156,7 +155,7 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem)
{
int err;
if (WARN_ON(pool->flags & RXE_POOL_ALLOC))
if (WARN_ON(pool->type == RXE_TYPE_MR))
return -EINVAL;
if (atomic_inc_return(&pool->num_elem) > pool->max_elem)
@@ -206,7 +205,7 @@ static void rxe_elem_release(struct kref *kref)
if (pool->cleanup)
pool->cleanup(elem);
if (pool->flags & RXE_POOL_ALLOC)
if (pool->type == RXE_TYPE_MR)
kfree(elem->obj);
atomic_dec(&pool->num_elem);

View File

@@ -7,10 +7,6 @@
#ifndef RXE_POOL_H
#define RXE_POOL_H
enum rxe_pool_flags {
RXE_POOL_ALLOC = BIT(1),
};
enum rxe_elem_type {
RXE_TYPE_UC,
RXE_TYPE_PD,
@@ -35,7 +31,6 @@ struct rxe_pool {
struct rxe_dev *rxe;
const char *name;
void (*cleanup)(struct rxe_pool_elem *elem);
enum rxe_pool_flags flags;
enum rxe_elem_type type;
unsigned int max_elem;

View File

@@ -63,7 +63,6 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
int port_num = init->port_num;
switch (init->qp_type) {
case IB_QPT_SMI:
case IB_QPT_GSI:
case IB_QPT_RC:
case IB_QPT_UC:
@@ -81,7 +80,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
if (rxe_qp_chk_cap(rxe, cap, !!init->srq))
goto err1;
if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
if (init->qp_type == IB_QPT_GSI) {
if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
pr_warn("invalid port = %d\n", port_num);
goto err1;
@@ -89,11 +88,6 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
port = &rxe->port;
if (init->qp_type == IB_QPT_SMI && port->qp_smi_index) {
pr_warn("SMI QP exists for port %d\n", port_num);
goto err1;
}
if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
pr_warn("GSI QP exists for port %d\n", port_num);
goto err1;
@@ -167,12 +161,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
port = &rxe->port;
switch (init->qp_type) {
case IB_QPT_SMI:
qp->ibqp.qp_num = 0;
port->qp_smi_index = qpn;
qp->attr.port_num = init->port_num;
break;
case IB_QPT_GSI:
qp->ibqp.qp_num = 1;
port->qp_gsi_index = qpn;
@@ -334,6 +322,9 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
qp->scq = scq;
qp->srq = srq;
atomic_inc(&rcq->num_wq);
atomic_inc(&scq->num_wq);
rxe_qp_init_misc(rxe, qp, init);
err = rxe_qp_init_req(rxe, qp, init, udata, uresp);
@@ -353,6 +344,9 @@ err2:
rxe_queue_cleanup(qp->sq.queue);
qp->sq.queue = NULL;
err1:
atomic_dec(&rcq->num_wq);
atomic_dec(&scq->num_wq);
qp->pd = NULL;
qp->rcq = NULL;
qp->scq = NULL;
@@ -777,9 +771,11 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp)
return 0;
}
/* called by the destroy qp verb */
void rxe_qp_destroy(struct rxe_qp *qp)
/* called when the last reference to the qp is dropped */
static void rxe_qp_do_cleanup(struct work_struct *work)
{
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
qp->valid = 0;
qp->qp_timeout_jiffies = 0;
rxe_cleanup_task(&qp->resp.task);
@@ -798,12 +794,6 @@ void rxe_qp_destroy(struct rxe_qp *qp)
__rxe_do_task(&qp->comp.task);
__rxe_do_task(&qp->req.task);
}
}
/* called when the last reference to the qp is dropped */
static void rxe_qp_do_cleanup(struct work_struct *work)
{
struct rxe_qp *qp = container_of(work, typeof(*qp), cleanup_work.work);
if (qp->sq.queue)
rxe_queue_cleanup(qp->sq.queue);
@@ -814,10 +804,14 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
if (qp->rq.queue)
rxe_queue_cleanup(qp->rq.queue);
atomic_dec(&qp->scq->num_wq);
if (qp->scq)
rxe_put(qp->scq);
atomic_dec(&qp->rcq->num_wq);
if (qp->rcq)
rxe_put(qp->rcq);
if (qp->pd)
rxe_put(qp->pd);

View File

@@ -34,7 +34,6 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
}
break;
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
if (unlikely(pkt_type != IB_OPCODE_UD)) {
pr_warn_ratelimited("bad qp type\n");

View File

@@ -33,8 +33,6 @@ static inline void retry_first_write_send(struct rxe_qp *qp,
} else {
advance_dma_data(&wqe->dma, to_send);
}
if (mask & WR_WRITE_MASK)
wqe->iova += qp->mtu;
}
}
@@ -308,7 +306,6 @@ static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
case IB_QPT_UC:
return next_opcode_uc(qp, opcode, fits);
case IB_QPT_SMI:
case IB_QPT_UD:
case IB_QPT_GSI:
switch (opcode) {
@@ -414,8 +411,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
if (pkt->mask & RXE_ATMETH_MASK) {
atmeth_set_va(pkt, wqe->iova);
if (opcode == IB_OPCODE_RC_COMPARE_SWAP ||
opcode == IB_OPCODE_RD_COMPARE_SWAP) {
if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
atmeth_set_swap_add(pkt, ibwr->wr.atomic.swap);
atmeth_set_comp(pkt, ibwr->wr.atomic.compare_add);
} else {
@@ -437,7 +433,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
struct rxe_send_wqe *wqe, struct rxe_pkt_info *pkt,
struct sk_buff *skb, u32 paylen)
struct sk_buff *skb, u32 payload)
{
int err;
@@ -449,19 +445,19 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
if (wqe->wr.send_flags & IB_SEND_INLINE) {
u8 *tmp = &wqe->dma.inline_data[wqe->dma.sge_offset];
memcpy(payload_addr(pkt), tmp, paylen);
memcpy(payload_addr(pkt), tmp, payload);
wqe->dma.resid -= paylen;
wqe->dma.sge_offset += paylen;
wqe->dma.resid -= payload;
wqe->dma.sge_offset += payload;
} else {
err = copy_data(qp->pd, 0, &wqe->dma,
payload_addr(pkt), paylen,
payload_addr(pkt), payload,
RXE_FROM_MR_OBJ);
if (err)
return err;
}
if (bth_pad(pkt)) {
u8 *pad = payload_addr(pkt) + paylen;
u8 *pad = payload_addr(pkt) + payload;
memset(pad, 0, bth_pad(pkt));
}
@@ -527,8 +523,7 @@ static void rollback_state(struct rxe_send_wqe *wqe,
qp->req.psn = rollback_psn;
}
static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
struct rxe_pkt_info *pkt)
static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{
qp->req.opcode = pkt->opcode;
@@ -611,7 +606,8 @@ int rxe_requester(void *arg)
struct rxe_ah *ah;
struct rxe_av *av;
rxe_get(qp);
if (!rxe_get(qp))
return -EAGAIN;
next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
@@ -661,7 +657,7 @@ next_wqe:
opcode = next_opcode(qp, wqe, wqe->wr.opcode);
if (unlikely(opcode < 0)) {
wqe->status = IB_WC_LOC_QP_OP_ERR;
goto exit;
goto err;
}
mask = rxe_opcode[opcode].mask;
@@ -755,7 +751,7 @@ next_wqe:
goto err;
}
update_state(qp, wqe, &pkt);
update_state(qp, &pkt);
goto next_wqe;

View File

@@ -277,7 +277,6 @@ static enum resp_states check_op_valid(struct rxe_qp *qp,
break;
case IB_QPT_UD:
case IB_QPT_SMI:
case IB_QPT_GSI:
break;
@@ -577,8 +576,7 @@ static enum resp_states process_atomic(struct rxe_qp *qp,
qp->resp.atomic_orig = *vaddr;
if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP ||
pkt->opcode == IB_OPCODE_RD_COMPARE_SWAP) {
if (pkt->opcode == IB_OPCODE_RC_COMPARE_SWAP) {
if (*vaddr == atmeth_comp(pkt))
*vaddr = atmeth_swap_add(pkt);
} else {
@@ -834,7 +832,6 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
if (pkt->mask & RXE_SEND_MASK) {
if (qp_type(qp) == IB_QPT_UD ||
qp_type(qp) == IB_QPT_SMI ||
qp_type(qp) == IB_QPT_GSI) {
if (skb->protocol == htons(ETH_P_IP)) {
memset(&hdr.reserved, 0,
@@ -1265,7 +1262,8 @@ int rxe_responder(void *arg)
struct rxe_pkt_info *pkt = NULL;
int ret = 0;
rxe_get(qp);
if (!rxe_get(qp))
return -EAGAIN;
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;

View File

@@ -6,64 +6,34 @@
#include <linux/vmalloc.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
{
if (srq && srq->error) {
pr_warn("srq in error state\n");
struct ib_srq_attr *attr = &init->attr;
if (attr->max_wr > rxe->attr.max_srq_wr) {
pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (mask & IB_SRQ_MAX_WR) {
if (attr->max_wr > rxe->attr.max_srq_wr) {
pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
if (srq && srq->limit && (attr->max_wr < srq->limit)) {
pr_warn("max_wr (%d) < srq->limit (%d)\n",
attr->max_wr, srq->limit);
goto err1;
}
if (attr->max_wr < RXE_MIN_SRQ_WR)
attr->max_wr = RXE_MIN_SRQ_WR;
if (attr->max_wr <= 0) {
pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
if (mask & IB_SRQ_LIMIT) {
if (attr->srq_limit > rxe->attr.max_srq_wr) {
pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
attr->srq_limit, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr < RXE_MIN_SRQ_WR)
attr->max_wr = RXE_MIN_SRQ_WR;
if (srq && (attr->srq_limit > srq->rq.queue->buf->index_mask)) {
pr_warn("srq_limit (%d) > cur limit(%d)\n",
attr->srq_limit,
srq->rq.queue->buf->index_mask);
goto err1;
}
if (attr->max_sge > rxe->attr.max_srq_sge) {
pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
attr->max_sge, rxe->attr.max_srq_sge);
goto err1;
}
if (mask == IB_SRQ_INIT_MASK) {
if (attr->max_sge > rxe->attr.max_srq_sge) {
pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
attr->max_sge, rxe->attr.max_srq_sge);
goto err1;
}
if (attr->max_sge < RXE_MIN_SRQ_SGE)
attr->max_sge = RXE_MIN_SRQ_SGE;
}
if (attr->max_sge < RXE_MIN_SRQ_SGE)
attr->max_sge = RXE_MIN_SRQ_SGE;
return 0;
@@ -93,8 +63,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
spin_lock_init(&srq->rq.consumer_lock);
type = QUEUE_TYPE_FROM_CLIENT;
q = rxe_queue_init(rxe, &srq->rq.max_wr,
srq_wqe_size, type);
q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type);
if (!q) {
pr_warn("unable to allocate queue for srq\n");
return -ENOMEM;
@@ -121,6 +90,57 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
return 0;
}
int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
{
if (srq->error) {
pr_warn("srq in error state\n");
goto err1;
}
if (mask & IB_SRQ_MAX_WR) {
if (attr->max_wr > rxe->attr.max_srq_wr) {
pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
attr->max_wr, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->max_wr <= 0) {
pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
goto err1;
}
if (srq->limit && (attr->max_wr < srq->limit)) {
pr_warn("max_wr (%d) < srq->limit (%d)\n",
attr->max_wr, srq->limit);
goto err1;
}
if (attr->max_wr < RXE_MIN_SRQ_WR)
attr->max_wr = RXE_MIN_SRQ_WR;
}
if (mask & IB_SRQ_LIMIT) {
if (attr->srq_limit > rxe->attr.max_srq_wr) {
pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
attr->srq_limit, rxe->attr.max_srq_wr);
goto err1;
}
if (attr->srq_limit > srq->rq.queue->buf->index_mask) {
pr_warn("srq_limit (%d) > cur limit(%d)\n",
attr->srq_limit,
srq->rq.queue->buf->index_mask);
goto err1;
}
}
return 0;
err1:
return -EINVAL;
}
int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata)
@@ -154,3 +174,14 @@ err2:
srq->rq.queue = NULL;
return err;
}
void rxe_srq_cleanup(struct rxe_pool_elem *elem)
{
struct rxe_srq *srq = container_of(elem, typeof(*srq), elem);
if (srq->pd)
rxe_put(srq->pd);
if (srq->rq.queue)
rxe_queue_cleanup(srq->rq.queue);
}

View File

@@ -7,8 +7,8 @@
#include <linux/dma-mapping.h>
#include <net/addrconf.h>
#include <rdma/uverbs_ioctl.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
#include "rxe_hw_counters.h"
@@ -286,36 +286,34 @@ static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
struct rxe_srq *srq = to_rsrq(ibsrq);
struct rxe_create_srq_resp __user *uresp = NULL;
if (init->srq_type != IB_SRQT_BASIC)
return -EOPNOTSUPP;
if (udata) {
if (udata->outlen < sizeof(*uresp))
return -EINVAL;
uresp = udata->outbuf;
}
err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK);
if (init->srq_type != IB_SRQT_BASIC)
return -EOPNOTSUPP;
err = rxe_srq_chk_init(rxe, init);
if (err)
goto err1;
return err;
err = rxe_add_to_pool(&rxe->srq_pool, srq);
if (err)
goto err1;
return err;
rxe_get(pd);
srq->pd = pd;
err = rxe_srq_from_init(rxe, srq, init, udata, uresp);
if (err)
goto err2;
goto err_put;
return 0;
err2:
rxe_put(pd);
err_put:
rxe_put(srq);
err1:
return err;
}
@@ -339,16 +337,12 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
err = rxe_srq_chk_attr(rxe, srq, attr, mask);
if (err)
goto err1;
return err;
err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
if (err)
goto err1;
return err;
return 0;
err1:
return err;
}
static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
@@ -368,10 +362,6 @@ static int rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
{
struct rxe_srq *srq = to_rsrq(ibsrq);
if (srq->rq.queue)
rxe_queue_cleanup(srq->rq.queue);
rxe_put(srq->pd);
rxe_put(srq);
return 0;
}
@@ -495,7 +485,6 @@ static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
if (ret)
return ret;
rxe_qp_destroy(qp);
rxe_put(qp);
return 0;
}
@@ -536,7 +525,6 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
wr->send_flags = ibwr->send_flags;
if (qp_type(qp) == IB_QPT_UD ||
qp_type(qp) == IB_QPT_SMI ||
qp_type(qp) == IB_QPT_GSI) {
struct ib_ah *ibah = ud_wr(ibwr)->ah;
@@ -807,6 +795,12 @@ static int rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
{
struct rxe_cq *cq = to_rcq(ibcq);
/* See IBA C11-17: The CI shall return an error if this Verb is
* invoked while a Work Queue is still associated with the CQ.
*/
if (atomic_read(&cq->num_wq))
return -EINVAL;
rxe_cq_disable(cq);
rxe_put(cq);

View File

@@ -67,6 +67,7 @@ struct rxe_cq {
bool is_dying;
bool is_user;
struct tasklet_struct comp_task;
atomic_t num_wq;
};
enum wqe_state {
@@ -373,7 +374,6 @@ struct rxe_port {
spinlock_t port_lock; /* guard port */
unsigned int mtu_cap;
/* special QPs */
u32 qp_smi_index;
u32 qp_gsi_index;
};
@@ -394,7 +394,6 @@ struct rxe_dev {
struct rxe_pool cq_pool;
struct rxe_pool mr_pool;
struct rxe_pool mw_pool;
struct rxe_pool mc_grp_pool;
/* multicast support */
spinlock_t mcg_lock;

View File

@@ -119,6 +119,7 @@ static int siw_dev_qualified(struct net_device *netdev)
* <linux/if_arp.h> for type identifiers.
*/
if (netdev->type == ARPHRD_ETHER || netdev->type == ARPHRD_IEEE802 ||
netdev->type == ARPHRD_NONE ||
(netdev->type == ARPHRD_LOOPBACK && loopback_enabled))
return 1;
@@ -315,12 +316,12 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
sdev->netdev = netdev;
if (netdev->type != ARPHRD_LOOPBACK) {
if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) {
addrconf_addr_eui48((unsigned char *)&base_dev->node_guid,
netdev->dev_addr);
} else {
/*
* The loopback device does not have a HW address,
* This device does not have a HW address,
* but connection mangagement lib expects gid != 0
*/
size_t len = min_t(size_t, strlen(base_dev->name), 6);

View File

@@ -132,8 +132,8 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr,
/* Revisit atomic caps if RFC 7306 gets supported */
attr->atomic_cap = 0;
attr->device_cap_flags =
IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_ALLOW_USER_UNREG;
attr->device_cap_flags = IB_DEVICE_MEM_MGT_EXTENSIONS;
attr->kernel_cap_flags = IBK_ALLOW_USER_UNREG;
attr->max_cq = sdev->attrs.max_cq;
attr->max_cqe = sdev->attrs.max_cqe;
attr->max_fast_reg_page_list_len = SIW_MAX_SGE_PBL;

View File

@@ -411,6 +411,7 @@ struct ipoib_dev_priv {
struct dentry *path_dentry;
#endif
u64 hca_caps;
u64 kernel_caps;
struct ipoib_ethtool_st ethtool;
unsigned int max_send_sge;
const struct net_device_ops *rn_ops;

View File

@@ -1850,11 +1850,12 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev)
static void ipoib_set_dev_features(struct ipoib_dev_priv *priv)
{
priv->hca_caps = priv->ca->attrs.device_cap_flags;
priv->kernel_caps = priv->ca->attrs.kernel_cap_flags;
if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
if (priv->kernel_caps & IBK_UD_TSO)
priv->dev->hw_features |= NETIF_F_TSO;
priv->dev->features |= priv->dev->hw_features;
@@ -2201,7 +2202,7 @@ int ipoib_intf_init(struct ib_device *hca, u32 port, const char *name,
priv->rn_ops = dev->netdev_ops;
if (hca->attrs.device_cap_flags & IB_DEVICE_VIRTUAL_FUNCTION)
if (hca->attrs.kernel_cap_flags & IBK_VIRTUAL_FUNCTION)
dev->netdev_ops = &ipoib_netdev_ops_vf;
else
dev->netdev_ops = &ipoib_netdev_ops_pf;

View File

@@ -197,16 +197,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
init_attr.send_cq = priv->send_cq;
init_attr.recv_cq = priv->recv_cq;
if (priv->hca_caps & IB_DEVICE_UD_TSO)
if (priv->kernel_caps & IBK_UD_TSO)
init_attr.create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
if (priv->kernel_caps & IBK_BLOCK_MULTICAST_LOOPBACK)
init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
if (priv->hca_caps & IB_DEVICE_RDMA_NETDEV_OPA)
if (priv->kernel_caps & IBK_RDMA_NETDEV_OPA)
init_attr.create_flags |= IB_QP_CREATE_NETDEV_USE;
priv->qp = ib_create_qp(priv->pd, &init_attr);

View File

@@ -650,7 +650,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
SHOST_DIX_GUARD_CRC);
}
if (!(ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
if (!(ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG))
shost->virt_boundary_mask = SZ_4K - 1;
if (iscsi_host_add(shost, ib_dev->dev.parent)) {

View File

@@ -363,7 +363,7 @@ struct iser_fr_pool {
* @cq: Connection completion queue
* @cq_size: The number of max outstanding completions
* @device: reference to iser device
* @fr_pool: connection fast registration poool
* @fr_pool: connection fast registration pool
* @pi_support: Indicate device T10-PI support
* @reg_cqe: completion handler
*/

View File

@@ -115,7 +115,7 @@ iser_create_fastreg_desc(struct iser_device *device,
if (!desc)
return ERR_PTR(-ENOMEM);
if (ib_dev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
if (ib_dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
mr_type = IB_MR_TYPE_SG_GAPS;
else
mr_type = IB_MR_TYPE_MEM_REG;
@@ -517,7 +517,7 @@ static void iser_calc_scsi_params(struct iser_conn *iser_conn,
* (head and tail) for a single page worth data, so one additional
* entry is required.
*/
if (attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
if (attr->kernel_cap_flags & IBK_SG_GAPS_REG)
reserved_mr_pages = 0;
else
reserved_mr_pages = 1;
@@ -562,8 +562,8 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
/* connection T10-PI support */
if (iser_pi_enable) {
if (!(device->ib_device->attrs.device_cap_flags &
IB_DEVICE_INTEGRITY_HANDOVER)) {
if (!(device->ib_device->attrs.kernel_cap_flags &
IBK_INTEGRITY_HANDOVER)) {
iser_warn("T10-PI requested but not supported on %s, "
"continue without T10-PI\n",
dev_name(&ib_conn->device->ib_device->dev));

View File

@@ -42,6 +42,7 @@ MODULE_PARM_DESC(sg_tablesize,
static DEFINE_MUTEX(device_list_mutex);
static LIST_HEAD(device_list);
static struct workqueue_struct *isert_login_wq;
static struct workqueue_struct *isert_comp_wq;
static struct workqueue_struct *isert_release_wq;
@@ -230,7 +231,7 @@ isert_create_device_ib_res(struct isert_device *device)
}
/* Check signature cap */
if (ib_dev->attrs.device_cap_flags & IB_DEVICE_INTEGRITY_HANDOVER)
if (ib_dev->attrs.kernel_cap_flags & IBK_INTEGRITY_HANDOVER)
device->pi_capable = true;
else
device->pi_capable = false;
@@ -1017,7 +1018,7 @@ isert_rx_login_req(struct isert_conn *isert_conn)
complete(&isert_conn->login_comp);
return;
}
schedule_delayed_work(&conn->login_work, 0);
queue_delayed_work(isert_login_wq, &conn->login_work, 0);
}
static struct iscsit_cmd
@@ -2348,9 +2349,9 @@ isert_get_login_rx(struct iscsit_conn *conn, struct iscsi_login *login)
/*
* For login requests after the first PDU, isert_rx_login_req() will
* kick schedule_delayed_work(&conn->login_work) as the packet is
* received, which turns this callback from iscsi_target_do_login_rx()
* into a NOP.
* kick queue_delayed_work(isert_login_wq, &conn->login_work) as
* the packet is received, which turns this callback from
* iscsi_target_do_login_rx() into a NOP.
*/
if (!login->first_request)
return 0;
@@ -2606,20 +2607,23 @@ static struct iscsit_transport iser_target_transport = {
static int __init isert_init(void)
{
int ret;
isert_login_wq = alloc_workqueue("isert_login_wq", 0, 0);
if (!isert_login_wq) {
isert_err("Unable to allocate isert_login_wq\n");
return -ENOMEM;
}
isert_comp_wq = alloc_workqueue("isert_comp_wq",
WQ_UNBOUND | WQ_HIGHPRI, 0);
if (!isert_comp_wq) {
isert_err("Unable to allocate isert_comp_wq\n");
return -ENOMEM;
goto destroy_login_wq;
}
isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND,
WQ_UNBOUND_MAX_ACTIVE);
if (!isert_release_wq) {
isert_err("Unable to allocate isert_release_wq\n");
ret = -ENOMEM;
goto destroy_comp_wq;
}
@@ -2630,17 +2634,20 @@ static int __init isert_init(void)
destroy_comp_wq:
destroy_workqueue(isert_comp_wq);
destroy_login_wq:
destroy_workqueue(isert_login_wq);
return ret;
return -ENOMEM;
}
static void __exit isert_exit(void)
{
flush_scheduled_work();
flush_workqueue(isert_login_wq);
destroy_workqueue(isert_release_wq);
destroy_workqueue(isert_comp_wq);
iscsit_unregister_transport(&iser_target_transport);
isert_info("iSER_TARGET[0] - Released iser_target_transport\n");
destroy_workqueue(isert_login_wq);
}
MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure");

View File

@@ -2785,7 +2785,7 @@ static void free_clt(struct rtrs_clt_sess *clt)
/**
* rtrs_clt_open() - Open a path to an RTRS server
* @ops: holds the link event callback and the private pointer.
* @sessname: name of the session
* @pathname: name of the path to an RTRS server
* @paths: Paths to be established defined by their src and dst addresses
* @paths_num: Number of elements in the @paths array
* @port: port to be used by the RTRS session

Some files were not shown because too many files have changed in this diff Show More